interface_cuda.F90 17.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
12
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!
!    More information can be found here:
!    http://elpa.rzg.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.

!This is a module contains all CUDA C Calls
! it was provided by NVIDIA with their ELPA GPU port and
! adapted for an ELPA release by A.Marek, RZG
44 45
!
! The file was written by A. Marek, MPCDF
46 47 48
#include "config-f90.h"

module cuda_routines
49

50 51 52 53
  implicit none

  public

54 55 56 57 58 59
  ! TODO: take these values from the definition header files of CUDA !!
  integer(kind=ik), parameter :: cudaMemcpyHostToDevice  = 1
  integer(kind=ik), parameter :: cudaMemcpyDeviceToHost  = 2
  integer(kind=ik), parameter :: cudaHostRegisterPortable  = 3
  integer(kind=ik), parameter :: cudaHostRegisterMapped  = 4
  integer(kind=ik), parameter :: cudaMemcpyDeviceToDevice = 5
60 61

  interface
62 63
    function cuda_setdevice_c(n) result(istat) &
             bind(C, name="cudaSetDeviceFromC")
64 65 66 67 68

      use iso_c_binding
      implicit none
      integer(C_INT), value    :: n
      integer(C_INT)           :: istat
69 70
    end function cuda_setdevice_c
  end interface
71

72 73 74
  interface
    function cuda_getdevicecount_c(n) result(istat) &
             bind(C, name="cudaGetDeviceCountFromC")
75 76
      use iso_c_binding
      implicit none
77 78 79 80
      integer(C_INT), intent(out) :: n
      integer(C_INT)              :: istat
    end function cuda_getdevicecount_c
  end interface
81

82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
!    function cuda_ProfilerStart() result(istat)&
!            bind (C, name="cudaProfilerStart")
!
!      use iso_c_binding
!      implicit none
!      integer(c_int)             :: istat
!    end function cuda_ProfilerStart
!
!    function cuda_ProfilerStop() result(istat)&
!             bind (C, name="cudaProfilerStop")
!
!      use iso_c_binding
!      implicit none
!
!      integer(c_int)             :: istat
!    end function cuda_ProfilerStop
98 99 100


   !*********************Allocate 1D Memory on Device******
101 102 103
   interface
     function cuda_malloc_c(a, width_height) result(istat) &
              bind(C, name="cudaMallocFromC")
104

105 106
       use iso_c_binding
       implicit none
107

108 109
       integer(c_intptr_t)                    :: a
       integer(c_intptr_t), intent(in), value :: width_height
110
       integer(C_INT)                       :: istat
111

112 113
     end function cuda_malloc_c
   end interface
114

115
   !******************* Allocate pinned memory***********
116

117 118 119 120 121 122
!   function cuda_hostalloc(a, size) result(istat) &
!            bind(C, name="cudaHostAlloc")
!
!     use iso_c_binding
!     implicit none
!
123 124
!     integer(c_intptr_t)                    :: a
!     integer(c_intptr_t), intent(in), value :: size
125 126 127 128 129 130 131 132 133 134
!     integer(C_INT)                       :: istat
!
!   end function cuda_hostalloc
!
!   function cuda_hostregister(a, size, dir) result(istat) &
!           bind(C, name="cudaHostRegister")
!
!     use iso_c_binding
!
!     implicit none
135 136
!     integer(c_intptr_t)                    :: a
!     integer(c_intptr_t), intent(in), value :: size
137 138 139 140 141 142 143 144 145 146 147 148
!     integer(C_INT), intent(in),value     :: dir
!     integer(C_INT)                       :: istat
!   end function cuda_hostregister
!
!   !******************* Alloacte 2D Memory on Device*****
!
!   function cuda_malloc_2d(a, width_height) result( istat) &
!            bind(C, name="cudaMalloc2d")
!
!     use iso_c_binding
!
!     implicit none
149 150
!     integer(c_intptr_t)                    :: a
!     integer(c_intptr_t), intent(in), value :: width_height
151 152 153 154 155 156 157 158 159 160 161 162
!     integer(C_INT)                       :: istat
!
!   end function cuda_malloc_2d
!
!   !******************* Alloacte 2D Memory on Device for coalesed access *****
!
!   function cuda_malloc2d_pitch(a, pitch,width, height) result( istat) &
!            bind(C, name="cudaMallocPitch")
!
!     use iso_c_binding
!
!     implicit none
163 164 165 166
!     integer(c_intptr_t)         :: a
!     integer(c_intptr_t)         :: pitch
!     integer(c_intptr_t), value  :: width
!     integer(c_intptr_t), value  :: height
167 168 169 170 171 172 173 174 175 176 177 178 179
!     integer(C_INT)            :: istat
!
!   end function cuda_malloc2d_pitch
!
!  !******************* Alloacte 3D Memory on Device*****
!
!  function cuda_malloc_3d(a,width_height_depth) result( istat) &
!           bind(C, name="cudaMalloc3d")
!
!    use iso_c_binding
!
!    implicit none
!
180 181
!    integer(c_intptr_t)         :: a
!    integer(c_intptr_t), value  :: width_height_depth
182 183 184
!    integer(C_INT)            :: istat
!
!  end function cuda_malloc_3d
185 186

  !*************Deallocate Device Memory*****************
187 188 189
  interface
    function cuda_free_c(a) result(istat) &
             bind(C, name="cudaFreeFromC")
190

191
      use iso_c_binding
192

193
      implicit none
194
      integer(c_intptr_t), value  :: a
195
      integer(C_INT)            :: istat
196

197 198
    end function cuda_free_c
  end interface
199

200 201 202 203 204 205
!  function cuda_freehost(a) result(istat) &
!           bind(C, name="cudaFreeHost")
!
!    use iso_c_binding
!
!    implicit none
206
!    integer(c_intptr_t)   :: a
207 208
!    integer(C_INT)      :: istat
!  end function cuda_freehost
209 210

  !*************Copy Data from device to host / host to device**********************************
211 212 213
  interface
    function cuda_memcpy_c(dst, src, size, dir) result(istat) &
             bind(C, name="cudaMemcpyFromC")
214

215
      use iso_c_binding
216

217
      implicit none
218 219 220
      integer(c_intptr_t), value              :: dst
      integer(c_intptr_t), value              :: src
      integer(c_intptr_t), intent(in), value  :: size
221 222
      integer(C_INT), intent(in), value     :: dir
      integer(C_INT)                        :: istat
223

224 225
    end function cuda_memcpy_c
  end interface
226

227 228 229 230 231 232 233 234 235
!  function cuda_d2d(val) result(istat)&
!           bind(C, name="cuda_MemcpyDeviceToDevice")
!
!    use iso_c_binding
!
!    implicit none
!    integer(C_INT), value   :: val
!    integer(C_INT)          :: istat
!  end function cuda_d2d
236 237

  !******************Copy Data from device to host / host to device for 2D*******
238 239 240
  interface
    function cuda_memcpy2d_c(dst, dpitch, src, spitch, width, height , dir) result(istat) &
             bind(C, name="cudaMemcpy2dFromC")
241

242
      use iso_c_binding
243

244
      implicit none
245

246 247 248 249 250 251
      integer(c_intptr_t), value              :: dst
      integer(c_intptr_t), intent(in), value  :: dpitch
      integer(c_intptr_t), value              :: src
      integer(c_intptr_t), intent(in), value  :: spitch
      integer(c_intptr_t), intent(in), value  :: width
      integer(c_intptr_t), intent(in), value  :: height
252 253 254 255 256
      integer(C_INT), intent(in), value     :: dir
      integer(C_INT)                        :: istat

    end function cuda_memcpy2d_c
  end interface
257 258
  !**************************Copy data to device memory Async*****************

259 260 261 262 263 264 265
!  function cuda_memcpy2dasync( dst, dpitch, src, spitch, width, height , dir, stream) result(istat) &
!           bind(C, name="cudaMemcpy2DAsync")
!
!    use iso_c_binding
!
!    implicit none
!
266 267 268 269 270 271
!    integer(c_intptr_t), value                :: dst
!    integer(c_intptr_t), intent(in), value       :: dpitch
!    integer(c_intptr_t), value                :: src
!    integer(c_intptr_t), intent(in), value       :: spitch
!    integer(c_intptr_t), intent(in), value    :: width
!    integer(c_intptr_t), intent(in), value    :: height
272
!    integer(C_INT), intent(in), value       :: dir
273
!    integer(c_intptr_t),value                 :: stream
274 275
!    integer(C_INT)                          :: istat
!  end function
276 277 278

  !***************Initialise memory***********************************************

279 280
  interface

281 282
 function cuda_memset_c(a, val, size) result(istat) &
          bind(C, name="cudaMemsetFromC")
283

284
   use iso_c_binding
285

286
   implicit none
287

288
   integer(c_intptr_t), value              :: a
289 290
   !integer(C_INT)                       :: val
   integer(C_INT)                        :: val
291
   integer(c_intptr_t), intent(in), value  :: size
292
   integer(C_INT)                        :: istat
293

294 295 296 297 298 299 300 301 302
 end function cuda_memset_c
 end interface
!
!  function c_memset(a, val, size) result(istat) &
!           bind(C, name="memset")
!
!    use iso_c_binding
!
!    implicit none
303
!    integer(c_intptr_t)                    :: a
304
!    integer(C_INT)                       :: val
305
!    integer(c_intptr_t), intent(in), value :: size
306 307 308 309 310 311 312 313 314 315 316 317 318
!    integer(C_INT)                       :: istat
!  end function c_memset
!
!  !***************************** CUDA LIBRARY CALLS***************************!
!  subroutine cublas_dgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)bind(C,name='cublasDgemm')
!
!    use iso_c_binding
!
!    implicit none
!    character(1,C_CHAR),value            :: cta, ctb
!    integer(C_INT),value                 :: m,n,k
!    integer(C_INT), intent(in), value    :: lda,ldb,ldc
!    real(C_DOUBLE),value                 :: alpha,beta
319
!    integer(c_intptr_t), value             :: a, b, c
320 321 322 323 324 325 326 327 328 329 330
!  end subroutine cublas_dgemm
!
!  subroutine cublas_dtrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) bind(C,name='cublasDtrmm')
!
!    use iso_c_binding
!
!    implicit none
!    character(1,C_CHAR),value            :: side, uplo, trans, diag
!    integer(C_INT),value                 :: m,n
!    integer(C_INT), intent(in), value    :: lda,ldb
!    real(C_DOUBLE), value                :: alpha
331
!    integer(c_intptr_t), value             :: a, b
332 333 334 335 336 337 338 339 340 341
!  end subroutine cublas_dtrmm
!
!  subroutine cublas_zgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) bind(C,name='cublasZgemm')
!
!    use iso_c_binding
!
!    implicit none
!    character(1,C_CHAR),value            :: cta, ctb
!    integer(C_INT),value                 :: m,n,k
!    integer(C_INT), intent(in), value    :: lda,ldb,ldc
342
!    complex(C_DOUBLE_COMPLEX),value              :: alpha,beta
343
!    integer(c_intptr_t), value             :: a, b, c
344 345 346 347 348 349 350 351 352 353 354
!
!  end subroutine cublas_zgemm
!
!  subroutine cublas_zgemv( trans , m, n, alpha, a, lda, b, ldb, beta, c, ldc) bind(C,name='cublasZgemv')
!
!    use iso_c_binding
!
!    implicit none
!    character(1,c_char),value       :: trans
!    integer(c_int),value            :: m,n,lda,ldb,ldc
!    complex(c_double_complex),value :: alpha,beta
355
!    integer(c_intptr_t), value        :: a,b,c
356 357 358 359 360 361 362 363 364 365 366
!
!  end subroutine cublas_zgemv
!
!  subroutine cublas_zhemv( trans , m, alpha, a, lda, b, ldb, beta, c, ldc)bind(C,name='cublasZhemv')
!
!    use iso_c_binding
!
!    implicit none
!    character(1,c_char),value       :: trans
!    integer(c_int),value            :: m,lda,ldb,ldc
!    complex(c_double_complex),value :: alpha,beta
367
!    integer(c_intptr_t), value        :: a,b,c
368 369 370 371 372 373 374 375 376 377 378
!
!  end subroutine cublas_zhemv
!
!  subroutine cublas_ztrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) bind(C,name='cublasZtrmm')
!
!    use iso_c_binding
!
!    implicit none
!    character(1,C_CHAR),value            :: side, uplo, trans, diag
!    integer(C_INT),value                 :: m,n
!    integer(C_INT), intent(in), value    :: lda,ldb
379
!    complex(C_DOUBLE_complex), value             :: alpha
380
!    integer(c_intptr_t), value             :: a, b
381 382 383 384 385 386 387 388 389 390 391
!
!  end subroutine cublas_ztrmm
!
!  subroutine cublas_zherk( uplo, trans, n, k, alpha, a, lda, beta, b, ldb) bind(C,name='cublasZherk')
!
!    use iso_c_binding
!
!    implicit none
!    character(1,C_CHAR),value            :: uplo, trans
!    integer(C_INT),value                 :: n, k
!    integer(C_INT), intent(in), value    :: lda,ldb
392
!    complex(C_DOUBLE_COMPLEX), value             :: alpha, beta
393
!    integer(c_intptr_t),value              :: a,b
394 395 396 397 398 399 400 401 402 403 404
!
!  end subroutine cublas_zherk
!
!  subroutine cublas_ztrmv( uplo, trans, diag, n, a, lda, b, ldb)bind(C,name='cublasZtrmv')
!
!    use iso_c_binding
!
!    implicit none
!    character(1,C_CHAR),value            :: uplo, trans, diag
!    integer(C_INT),value                 :: n
!    integer(C_INT), intent(in), value    :: lda,ldb
405
!    integer(c_intptr_t), value             :: a, b
406 407 408 409 410 411 412 413 414 415 416 417 418
!
!  end subroutine cublas_ztrmv
!
!
!  subroutine cublas_zher2( uplo, n, alpha, x, incx , y, incy , a, lda)bind(C,name='cublasZher2')
!
!    use iso_c_binding
!
!    implicit none
!    character(1,C_CHAR),value            :: uplo
!    integer(C_INT),value                 :: n
!    integer(C_INT), intent(in), value    :: lda,incx, incy
!    complex(C_DOUBLE_COMPLEX), value     :: alpha
419
!    integer(c_intptr_t),value              :: a,x,y
420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
!
!  end subroutine cublas_zher2
!
!  function cuda_devicesynchronize()result(istat) &
!           bind(C,name='cudaDeviceSynchronize')
!
!    use iso_c_binding
!
!    implicit none
!    integer(C_INT)                       :: istat
!
!  end function cuda_devicesynchronize
!
!  function cuda_memcpyAsync( dst, src, d_size, dir,stream ) result(istat) &
!           bind(C, name="cudaMemcpyAsync")
!
!    use iso_c_binding
!
!    implicit none
439 440 441
!    integer(c_intptr_t), value            :: dst
!    integer(c_intptr_t), value            :: src
!    integer(c_intptr_t),intent(in),value  :: d_size
442
!    integer(C_INT),intent(in),value     :: dir
443
!    integer(c_intptr_t),value             :: stream
444 445 446 447 448 449 450 451 452 453
!    integer(C_INT)                      :: istat
!
!  end function
!
!  function cuda_StreamCreate( pstream)result (istat) &
!           bind(C, name ="cudaStreamCreate")
!
!    use iso_c_binding
!
!    implicit none
454
!    integer(c_intptr_t) :: pstream
455 456 457 458 459 460 461 462 463
!    integer(c_int)    :: istat
!  end function
!
!  function cuda_StreamDestroy( pstream)result (istat) &
!           bind(C, name ="cudaStreamDestroy")
!
!    use iso_c_binding
!
!    implicit none
464
!    integer(c_intptr_t), value  :: pstream
465 466 467 468 469 470 471 472 473 474
!    integer(c_int)            :: istat
!
!  end function
!
!  function cuda_streamsynchronize( pstream)result(istat) &
!           bind(C,name='cudaStreamSynchronize')
!
!    use iso_c_binding
!
!    implicit none
475
!    integer(c_intptr_t),value  :: pstream
476 477 478 479
!    integer(C_INT)           :: istat
!
!  end function
!end interface
480

481 482
contains
  function cuda_setdevice(n) result(success)
483
    use iso_c_binding
484
    use precision
485 486
    implicit none

487 488
    integer(kind=ik), intent(in)  :: n
    logical                       :: success
489

490 491 492 493 494 495
#ifdef WITH_GPU_VERSION
    success = cuda_setdevice_c(int(n,kind=c_int)) /= 0
#else
    success = .true.
#endif
  end function cuda_setdevice
496

497
  function cuda_getdevicecount(n) result(success)
498
    use iso_c_binding
499
    use precision
500 501
    implicit none

502 503
    integer(kind=ik), intent(out) :: n
    logical                       :: success
504

505 506 507 508 509 510 511 512 513
#ifdef WITH_GPU_VERSION
    success = cuda_getdevicecount_c(int(n, kind=c_int)) /=0
#else
    success = .true.
    n     = 0
#endif
  end function cuda_getdevicecount

  function cuda_malloc(a, width_height) result(success)
514 515 516
    use iso_c_binding
    implicit none

517 518
    integer(c_intptr_t)             :: a
    integer(c_intptr_t), intent(in) :: width_height
519

520 521 522 523 524 525
#ifdef WTIH_GPU_VERSION
    success = cuda_malloc_c(a,width_height) /= 0
#else
    success == .false.
#endif
  end function
526

527
  function cuda_memcpy(dst, src, size, dir) result(success)
528 529 530 531

    use iso_c_binding

    implicit none
532 533 534
    integer(c_intptr_t)             :: dst
    integer(c_intptr_t)             :: src
    integer(c_intptr_t), intent(in) :: size
535
    integer(C_INT), intent(in)    :: dir
536

537 538 539 540 541
#ifdef WITH_GPU_VERSION
    success = cuda_memcpy_c(dst, src, size, dir) /=0
#else
    success = .false.
#endif
542

543
  end function cuda_memcpy
544

545
  function cuda_free(a) result(success)
546 547 548 549

    use iso_c_binding

    implicit none
550
    integer(c_intptr_t) :: a
551

552 553 554 555 556
#ifdef WITH_GPU_VERSION
    success = cuda_free_c(a) /= 0
#else
    success = .false.
#endif
557

558
  end function cuda_free
559

560
  function cuda_memcpy2d(dst, dpitch, src, spitch, width, height , dir) result(success)
561 562 563 564 565

    use iso_c_binding

    implicit none

566 567 568 569 570 571
    integer(c_intptr_t)             :: dst
    integer(c_intptr_t), intent(in) :: dpitch
    integer(c_intptr_t)             :: src
    integer(c_intptr_t), intent(in) :: spitch
    integer(c_intptr_t), intent(in) :: width
    integer(c_intptr_t), intent(in) :: height
572 573
    integer(C_INT), intent(in)    :: dir
    integer(C_INT)                :: istat
574

575 576 577 578 579
#ifdef WITH_GPU_VERSION
    success = cuda_memcpy2d_c(dst, dpitch, src, spitch, width, height , dir) /= 0
#else
    success = .false.
#endif
580

581
  end function cuda_memcpy2d
582 583 584


end module cuda_routines
585

586