interface_cuda.F90 17.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
12
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!
!    More information can be found here:
!    http://elpa.rzg.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.

!This is a module contains all CUDA C Calls
! it was provided by NVIDIA with their ELPA GPU port and
! adapted for an ELPA release by A.Marek, RZG
44
45
!
! The file was written by A. Marek, MPCDF
46
47
48
#include "config-f90.h"

module cuda_routines
49

50
51
52
53
  implicit none

  public

54
55
56
57
58
59
  ! TODO: take these values from the definition header files of CUDA !!
  integer(kind=ik), parameter :: cudaMemcpyHostToDevice  = 1
  integer(kind=ik), parameter :: cudaMemcpyDeviceToHost  = 2
  integer(kind=ik), parameter :: cudaHostRegisterPortable  = 3
  integer(kind=ik), parameter :: cudaHostRegisterMapped  = 4
  integer(kind=ik), parameter :: cudaMemcpyDeviceToDevice = 5
60
61

  interface
62
63
    function cuda_setdevice_c(n) result(istat) &
             bind(C, name="cudaSetDeviceFromC")
64
65
66
67
68

      use iso_c_binding
      implicit none
      integer(C_INT), value    :: n
      integer(C_INT)           :: istat
69
70
    end function cuda_setdevice_c
  end interface
71

72
73
74
  interface
    function cuda_getdevicecount_c(n) result(istat) &
             bind(C, name="cudaGetDeviceCountFromC")
75
76
      use iso_c_binding
      implicit none
77
78
79
80
      integer(C_INT), intent(out) :: n
      integer(C_INT)              :: istat
    end function cuda_getdevicecount_c
  end interface
81

82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
!    function cuda_ProfilerStart() result(istat)&
!            bind (C, name="cudaProfilerStart")
!
!      use iso_c_binding
!      implicit none
!      integer(c_int)             :: istat
!    end function cuda_ProfilerStart
!
!    function cuda_ProfilerStop() result(istat)&
!             bind (C, name="cudaProfilerStop")
!
!      use iso_c_binding
!      implicit none
!
!      integer(c_int)             :: istat
!    end function cuda_ProfilerStop
98
99
100


   !*********************Allocate 1D Memory on Device******
101
102
103
   interface
     function cuda_malloc_c(a, width_height) result(istat) &
              bind(C, name="cudaMallocFromC")
104

105
106
       use iso_c_binding
       implicit none
107

108
109
       integer(c_intptr_t)                    :: a
       integer(c_intptr_t), intent(in), value :: width_height
110
       integer(C_INT)                       :: istat
111

112
113
     end function cuda_malloc_c
   end interface
114

115
   !******************* Allocate pinned memory***********
116

117
118
119
120
121
122
!   function cuda_hostalloc(a, size) result(istat) &
!            bind(C, name="cudaHostAlloc")
!
!     use iso_c_binding
!     implicit none
!
123
124
!     integer(c_intptr_t)                    :: a
!     integer(c_intptr_t), intent(in), value :: size
125
126
127
128
129
130
131
132
133
134
!     integer(C_INT)                       :: istat
!
!   end function cuda_hostalloc
!
!   function cuda_hostregister(a, size, dir) result(istat) &
!           bind(C, name="cudaHostRegister")
!
!     use iso_c_binding
!
!     implicit none
135
136
!     integer(c_intptr_t)                    :: a
!     integer(c_intptr_t), intent(in), value :: size
137
138
139
140
141
142
143
144
145
146
147
148
!     integer(C_INT), intent(in),value     :: dir
!     integer(C_INT)                       :: istat
!   end function cuda_hostregister
!
!   !******************* Alloacte 2D Memory on Device*****
!
!   function cuda_malloc_2d(a, width_height) result( istat) &
!            bind(C, name="cudaMalloc2d")
!
!     use iso_c_binding
!
!     implicit none
149
150
!     integer(c_intptr_t)                    :: a
!     integer(c_intptr_t), intent(in), value :: width_height
151
152
153
154
155
156
157
158
159
160
161
162
!     integer(C_INT)                       :: istat
!
!   end function cuda_malloc_2d
!
!   !******************* Alloacte 2D Memory on Device for coalesed access *****
!
!   function cuda_malloc2d_pitch(a, pitch,width, height) result( istat) &
!            bind(C, name="cudaMallocPitch")
!
!     use iso_c_binding
!
!     implicit none
163
164
165
166
!     integer(c_intptr_t)         :: a
!     integer(c_intptr_t)         :: pitch
!     integer(c_intptr_t), value  :: width
!     integer(c_intptr_t), value  :: height
167
168
169
170
171
172
173
174
175
176
177
178
179
!     integer(C_INT)            :: istat
!
!   end function cuda_malloc2d_pitch
!
!  !******************* Alloacte 3D Memory on Device*****
!
!  function cuda_malloc_3d(a,width_height_depth) result( istat) &
!           bind(C, name="cudaMalloc3d")
!
!    use iso_c_binding
!
!    implicit none
!
180
181
!    integer(c_intptr_t)         :: a
!    integer(c_intptr_t), value  :: width_height_depth
182
183
184
!    integer(C_INT)            :: istat
!
!  end function cuda_malloc_3d
185
186

  !*************Deallocate Device Memory*****************
187
188
189
  interface
    function cuda_free_c(a) result(istat) &
             bind(C, name="cudaFreeFromC")
190

191
      use iso_c_binding
192

193
      implicit none
194
      integer(c_intptr_t), value  :: a
195
      integer(C_INT)            :: istat
196

197
198
    end function cuda_free_c
  end interface
199

200
201
202
203
204
205
!  function cuda_freehost(a) result(istat) &
!           bind(C, name="cudaFreeHost")
!
!    use iso_c_binding
!
!    implicit none
206
!    integer(c_intptr_t)   :: a
207
208
!    integer(C_INT)      :: istat
!  end function cuda_freehost
209
210

  !*************Copy Data from device to host / host to device**********************************
211
212
213
  interface
    function cuda_memcpy_c(dst, src, size, dir) result(istat) &
             bind(C, name="cudaMemcpyFromC")
214

215
      use iso_c_binding
216

217
      implicit none
218
219
220
      integer(c_intptr_t), value              :: dst
      integer(c_intptr_t), value              :: src
      integer(c_intptr_t), intent(in), value  :: size
221
222
      integer(C_INT), intent(in), value     :: dir
      integer(C_INT)                        :: istat
223

224
225
    end function cuda_memcpy_c
  end interface
226

227
228
229
230
231
232
233
234
235
!  function cuda_d2d(val) result(istat)&
!           bind(C, name="cuda_MemcpyDeviceToDevice")
!
!    use iso_c_binding
!
!    implicit none
!    integer(C_INT), value   :: val
!    integer(C_INT)          :: istat
!  end function cuda_d2d
236
237

  !******************Copy Data from device to host / host to device for 2D*******
238
239
240
  interface
    function cuda_memcpy2d_c(dst, dpitch, src, spitch, width, height , dir) result(istat) &
             bind(C, name="cudaMemcpy2dFromC")
241

242
      use iso_c_binding
243

244
      implicit none
245

246
247
248
249
250
251
      integer(c_intptr_t), value              :: dst
      integer(c_intptr_t), intent(in), value  :: dpitch
      integer(c_intptr_t), value              :: src
      integer(c_intptr_t), intent(in), value  :: spitch
      integer(c_intptr_t), intent(in), value  :: width
      integer(c_intptr_t), intent(in), value  :: height
252
253
254
255
256
      integer(C_INT), intent(in), value     :: dir
      integer(C_INT)                        :: istat

    end function cuda_memcpy2d_c
  end interface
257
258
  !**************************Copy data to device memory Async*****************

259
260
261
262
263
264
265
!  function cuda_memcpy2dasync( dst, dpitch, src, spitch, width, height , dir, stream) result(istat) &
!           bind(C, name="cudaMemcpy2DAsync")
!
!    use iso_c_binding
!
!    implicit none
!
266
267
268
269
270
271
!    integer(c_intptr_t), value                :: dst
!    integer(c_intptr_t), intent(in), value       :: dpitch
!    integer(c_intptr_t), value                :: src
!    integer(c_intptr_t), intent(in), value       :: spitch
!    integer(c_intptr_t), intent(in), value    :: width
!    integer(c_intptr_t), intent(in), value    :: height
272
!    integer(C_INT), intent(in), value       :: dir
273
!    integer(c_intptr_t),value                 :: stream
274
275
!    integer(C_INT)                          :: istat
!  end function
276
277
278

  !***************Initialise memory***********************************************

279
280
  interface

281
282
 function cuda_memset_c(a, val, size) result(istat) &
          bind(C, name="cudaMemsetFromC")
283

284
   use iso_c_binding
285

286
   implicit none
287

288
   integer(c_intptr_t), value              :: a
289
290
   !integer(C_INT)                       :: val
   integer(C_INT)                        :: val
291
   integer(c_intptr_t), intent(in), value  :: size
292
   integer(C_INT)                        :: istat
293

294
295
296
297
298
299
300
301
302
 end function cuda_memset_c
 end interface
!
!  function c_memset(a, val, size) result(istat) &
!           bind(C, name="memset")
!
!    use iso_c_binding
!
!    implicit none
303
!    integer(c_intptr_t)                    :: a
304
!    integer(C_INT)                       :: val
305
!    integer(c_intptr_t), intent(in), value :: size
306
307
308
309
310
311
312
313
314
315
316
317
318
!    integer(C_INT)                       :: istat
!  end function c_memset
!
!  !***************************** CUDA LIBRARY CALLS***************************!
!  subroutine cublas_dgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)bind(C,name='cublasDgemm')
!
!    use iso_c_binding
!
!    implicit none
!    character(1,C_CHAR),value            :: cta, ctb
!    integer(C_INT),value                 :: m,n,k
!    integer(C_INT), intent(in), value    :: lda,ldb,ldc
!    real(C_DOUBLE),value                 :: alpha,beta
319
!    integer(c_intptr_t), value             :: a, b, c
320
321
322
323
324
325
326
327
328
329
330
!  end subroutine cublas_dgemm
!
!  subroutine cublas_dtrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) bind(C,name='cublasDtrmm')
!
!    use iso_c_binding
!
!    implicit none
!    character(1,C_CHAR),value            :: side, uplo, trans, diag
!    integer(C_INT),value                 :: m,n
!    integer(C_INT), intent(in), value    :: lda,ldb
!    real(C_DOUBLE), value                :: alpha
331
!    integer(c_intptr_t), value             :: a, b
332
333
334
335
336
337
338
339
340
341
!  end subroutine cublas_dtrmm
!
!  subroutine cublas_zgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) bind(C,name='cublasZgemm')
!
!    use iso_c_binding
!
!    implicit none
!    character(1,C_CHAR),value            :: cta, ctb
!    integer(C_INT),value                 :: m,n,k
!    integer(C_INT), intent(in), value    :: lda,ldb,ldc
342
!    complex(C_DOUBLE_COMPLEX),value              :: alpha,beta
343
!    integer(c_intptr_t), value             :: a, b, c
344
345
346
347
348
349
350
351
352
353
354
!
!  end subroutine cublas_zgemm
!
!  subroutine cublas_zgemv( trans , m, n, alpha, a, lda, b, ldb, beta, c, ldc) bind(C,name='cublasZgemv')
!
!    use iso_c_binding
!
!    implicit none
!    character(1,c_char),value       :: trans
!    integer(c_int),value            :: m,n,lda,ldb,ldc
!    complex(c_double_complex),value :: alpha,beta
355
!    integer(c_intptr_t), value        :: a,b,c
356
357
358
359
360
361
362
363
364
365
366
!
!  end subroutine cublas_zgemv
!
!  subroutine cublas_zhemv( trans , m, alpha, a, lda, b, ldb, beta, c, ldc)bind(C,name='cublasZhemv')
!
!    use iso_c_binding
!
!    implicit none
!    character(1,c_char),value       :: trans
!    integer(c_int),value            :: m,lda,ldb,ldc
!    complex(c_double_complex),value :: alpha,beta
367
!    integer(c_intptr_t), value        :: a,b,c
368
369
370
371
372
373
374
375
376
377
378
!
!  end subroutine cublas_zhemv
!
!  subroutine cublas_ztrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) bind(C,name='cublasZtrmm')
!
!    use iso_c_binding
!
!    implicit none
!    character(1,C_CHAR),value            :: side, uplo, trans, diag
!    integer(C_INT),value                 :: m,n
!    integer(C_INT), intent(in), value    :: lda,ldb
379
!    complex(C_DOUBLE_complex), value             :: alpha
380
!    integer(c_intptr_t), value             :: a, b
381
382
383
384
385
386
387
388
389
390
391
!
!  end subroutine cublas_ztrmm
!
!  subroutine cublas_zherk( uplo, trans, n, k, alpha, a, lda, beta, b, ldb) bind(C,name='cublasZherk')
!
!    use iso_c_binding
!
!    implicit none
!    character(1,C_CHAR),value            :: uplo, trans
!    integer(C_INT),value                 :: n, k
!    integer(C_INT), intent(in), value    :: lda,ldb
392
!    complex(C_DOUBLE_COMPLEX), value             :: alpha, beta
393
!    integer(c_intptr_t),value              :: a,b
394
395
396
397
398
399
400
401
402
403
404
!
!  end subroutine cublas_zherk
!
!  subroutine cublas_ztrmv( uplo, trans, diag, n, a, lda, b, ldb)bind(C,name='cublasZtrmv')
!
!    use iso_c_binding
!
!    implicit none
!    character(1,C_CHAR),value            :: uplo, trans, diag
!    integer(C_INT),value                 :: n
!    integer(C_INT), intent(in), value    :: lda,ldb
405
!    integer(c_intptr_t), value             :: a, b
406
407
408
409
410
411
412
413
414
415
416
417
418
!
!  end subroutine cublas_ztrmv
!
!
!  subroutine cublas_zher2( uplo, n, alpha, x, incx , y, incy , a, lda)bind(C,name='cublasZher2')
!
!    use iso_c_binding
!
!    implicit none
!    character(1,C_CHAR),value            :: uplo
!    integer(C_INT),value                 :: n
!    integer(C_INT), intent(in), value    :: lda,incx, incy
!    complex(C_DOUBLE_COMPLEX), value     :: alpha
419
!    integer(c_intptr_t),value              :: a,x,y
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
!
!  end subroutine cublas_zher2
!
!  function cuda_devicesynchronize()result(istat) &
!           bind(C,name='cudaDeviceSynchronize')
!
!    use iso_c_binding
!
!    implicit none
!    integer(C_INT)                       :: istat
!
!  end function cuda_devicesynchronize
!
!  function cuda_memcpyAsync( dst, src, d_size, dir,stream ) result(istat) &
!           bind(C, name="cudaMemcpyAsync")
!
!    use iso_c_binding
!
!    implicit none
439
440
441
!    integer(c_intptr_t), value            :: dst
!    integer(c_intptr_t), value            :: src
!    integer(c_intptr_t),intent(in),value  :: d_size
442
!    integer(C_INT),intent(in),value     :: dir
443
!    integer(c_intptr_t),value             :: stream
444
445
446
447
448
449
450
451
452
453
!    integer(C_INT)                      :: istat
!
!  end function
!
!  function cuda_StreamCreate( pstream)result (istat) &
!           bind(C, name ="cudaStreamCreate")
!
!    use iso_c_binding
!
!    implicit none
454
!    integer(c_intptr_t) :: pstream
455
456
457
458
459
460
461
462
463
!    integer(c_int)    :: istat
!  end function
!
!  function cuda_StreamDestroy( pstream)result (istat) &
!           bind(C, name ="cudaStreamDestroy")
!
!    use iso_c_binding
!
!    implicit none
464
!    integer(c_intptr_t), value  :: pstream
465
466
467
468
469
470
471
472
473
474
!    integer(c_int)            :: istat
!
!  end function
!
!  function cuda_streamsynchronize( pstream)result(istat) &
!           bind(C,name='cudaStreamSynchronize')
!
!    use iso_c_binding
!
!    implicit none
475
!    integer(c_intptr_t),value  :: pstream
476
477
478
479
!    integer(C_INT)           :: istat
!
!  end function
!end interface
480

481
482
contains
  function cuda_setdevice(n) result(success)
483
    use iso_c_binding
484
    use precision
485
486
    implicit none

487
488
    integer(kind=ik), intent(in)  :: n
    logical                       :: success
489

490
491
492
493
494
495
#ifdef WITH_GPU_VERSION
    success = cuda_setdevice_c(int(n,kind=c_int)) /= 0
#else
    success = .true.
#endif
  end function cuda_setdevice
496

497
  function cuda_getdevicecount(n) result(success)
498
    use iso_c_binding
499
    use precision
500
501
    implicit none

502
503
    integer(kind=ik), intent(out) :: n
    logical                       :: success
504

505
506
507
508
509
510
511
512
513
#ifdef WITH_GPU_VERSION
    success = cuda_getdevicecount_c(int(n, kind=c_int)) /=0
#else
    success = .true.
    n     = 0
#endif
  end function cuda_getdevicecount

  function cuda_malloc(a, width_height) result(success)
514
515
516
    use iso_c_binding
    implicit none

517
518
    integer(c_intptr_t)             :: a
    integer(c_intptr_t), intent(in) :: width_height
519

520
521
522
523
524
525
#ifdef WTIH_GPU_VERSION
    success = cuda_malloc_c(a,width_height) /= 0
#else
    success == .false.
#endif
  end function
526

527
  function cuda_memcpy(dst, src, size, dir) result(success)
528
529
530
531

    use iso_c_binding

    implicit none
532
533
534
    integer(c_intptr_t)             :: dst
    integer(c_intptr_t)             :: src
    integer(c_intptr_t), intent(in) :: size
535
    integer(C_INT), intent(in)    :: dir
536

537
538
539
540
541
#ifdef WITH_GPU_VERSION
    success = cuda_memcpy_c(dst, src, size, dir) /=0
#else
    success = .false.
#endif
542

543
  end function cuda_memcpy
544

545
  function cuda_free(a) result(success)
546
547
548
549

    use iso_c_binding

    implicit none
550
    integer(c_intptr_t) :: a
551

552
553
554
555
556
#ifdef WITH_GPU_VERSION
    success = cuda_free_c(a) /= 0
#else
    success = .false.
#endif
557

558
  end function cuda_free
559

560
  function cuda_memcpy2d(dst, dpitch, src, spitch, width, height , dir) result(success)
561
562
563
564
565

    use iso_c_binding

    implicit none

566
567
568
569
570
571
    integer(c_intptr_t)             :: dst
    integer(c_intptr_t), intent(in) :: dpitch
    integer(c_intptr_t)             :: src
    integer(c_intptr_t), intent(in) :: spitch
    integer(c_intptr_t), intent(in) :: width
    integer(c_intptr_t), intent(in) :: height
572
573
    integer(C_INT), intent(in)    :: dir
    integer(C_INT)                :: istat
574

575
576
577
578
579
#ifdef WITH_GPU_VERSION
    success = cuda_memcpy2d_c(dst, dpitch, src, spitch, width, height , dir) /= 0
#else
    success = .false.
#endif
580

581
  end function cuda_memcpy2d
582
583
584


end module cuda_routines
585

586