interface_cuda.F90 13.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaftrn,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!
!    More information can be found here:
!    http://elpa.rzg.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.

!This is a module contains all CUDA C Calls
! it was provided by NVIDIA with their ELPA GPU port and
! adapted for an ELPA release by A.Marek, RZG

#include "config-f90.h"

#ifdef WITH_GPU_VERSION
module cuda_routines
  implicit none

  public

  integer, parameter :: cudaMemcpyHostToDevice  = 1
  integer, parameter :: cudaMemcpyDeviceToHost  = 2
  integer, parameter :: cudaHostRegisterPortable  = 3
  integer, parameter :: cudaHostRegisterMapped  = 4
  integer, parameter :: cudaMemcpyDeviceToDevice = 5

  interface
    !************Device Select***************************
    function cuda_setdevice(n) result(istat) &
             bind(C, name="cudaSetDevice")

      use iso_c_binding
      implicit none
      integer(C_INT), value    :: n
      integer(C_INT)           :: istat
    end function cuda_setdevice

    function cuda_ProfilerStart() result(istat)&
            bind (C, name="cudaProfilerStart")

      use iso_c_binding
      implicit none
      integer(c_int)             :: istat
    end function cuda_ProfilerStart

    function cuda_ProfilerStop() result(istat)&
             bind (C, name="cudaProfilerStop")

      use iso_c_binding
      implicit none

      integer(c_int)             :: istat
    end function cuda_ProfilerStop


   !*********************Allocate 1D Memory on Device******

   function cuda_malloc(a, width_height) result(istat) &
            bind(C, name="cudaMalloc")
        
     use iso_c_binding
     implicit none

     integer(C_SIZE_T)                    :: a
     integer(C_SIZE_T), intent(in), value :: width_height
     integer(C_INT)                       :: istat
     
   end function cuda_malloc

   !******************* Allocate pinned memory***********

   function cuda_hostalloc(a, size) result(istat) &
            bind(C, name="cudaHostAlloc")

     use iso_c_binding
     implicit none

     integer(C_SIZE_T)                    :: a
     integer(C_SIZE_T), intent(in), value :: size
     integer(C_INT)                       :: istat

   end function cuda_hostalloc

   function cuda_hostregister(a, size, dir) result(istat) &
           bind(C, name="cudaHostRegister")

     use iso_c_binding

     implicit none
     integer(C_SIZE_T)                    :: a
     integer(C_SIZE_T), intent(in), value :: size
     integer(C_INT), intent(in),value     :: dir
     integer(C_INT)                       :: istat
   end function cuda_hostregister

   !******************* Alloacte 2D Memory on Device*****

   function cuda_malloc_2d(a, width_height) result( istat) &
            bind(C, name="cudaMalloc2d")

     use iso_c_binding

     implicit none
     integer(C_SIZE_T)                    :: a
     integer(C_SIZE_T), intent(in), value :: width_height
     integer(C_INT)                       :: istat

   end function cuda_malloc_2d

   !******************* Alloacte 2D Memory on Device for coalesed access *****

   function cuda_malloc2d_pitch(a, pitch,width, height) result( istat) &
            bind(C, name="cudaMallocPitch")

     use iso_c_binding

     implicit none
     integer(C_SIZE_T)         :: a
     integer(C_SIZE_T)         :: pitch
     integer(C_SIZE_T), value  :: width
     integer(C_SIZE_T), value  :: height
     integer(C_INT)            :: istat

   end function cuda_malloc2d_pitch

  !******************* Alloacte 3D Memory on Device*****

  function cuda_malloc_3d(a,width_height_depth) result( istat) &
           bind(C, name="cudaMalloc3d")

    use iso_c_binding

    implicit none

    integer(C_SIZE_T)         :: a
    integer(C_SIZE_T), value  :: width_height_depth
    integer(C_INT)            :: istat

  end function cuda_malloc_3d

  !*************Deallocate Device Memory*****************

  function cuda_free(a) result(istat) &
           bind(C, name="cudaFree")

    use iso_c_binding

    implicit none
    integer(C_SIZE_T), value  :: a
    integer(C_INT)            :: istat

  end function cuda_free

  function cuda_freehost(a) result(istat) &
           bind(C, name="cudaFreeHost")

    use iso_c_binding

    implicit none
    integer(C_SIZE_T)   :: a
    integer(C_INT)      :: istat
  end function cuda_freehost

  !*************Copy Data from device to host / host to device**********************************

  function cuda_memcpy(dst, src, size, dir) result(istat) &
           bind(C, name="cudaMemcpy")

    use iso_c_binding

    implicit none
    integer(C_SIZE_T), value              :: dst
    integer(C_SIZE_T), value              :: src
    integer(C_SIZE_T), intent(in), value  :: size
    integer(C_INT), intent(in), value     :: dir
    integer(C_INT)                        :: istat

  end function cuda_memcpy

  function cuda_d2d(val) result(istat)&
           bind(C, name="cuda_MemcpyDeviceToDevice")

    use iso_c_binding

    implicit none
    integer(C_INT), value   :: val
    integer(C_INT)          :: istat
  end function cuda_d2d

  !******************Copy Data from device to host / host to device for 2D*******

  function cuda_memcpy2d(dst, dpitch, src, spitch, width, height , dir) result(istat) &
           bind(C, name="cudaMemcpy2D")

    use iso_c_binding

    implicit none

    integer(C_SIZE_T), value              :: dst
    integer(C_SIZE_T), intent(in), value  :: dpitch
    integer(C_SIZE_T), value              :: src
    integer(C_SIZE_T), intent(in), value  :: spitch
    integer(C_SIZE_T), intent(in), value  :: width
    integer(C_SIZE_T), intent(in), value  :: height
    integer(C_INT), intent(in), value     :: dir
    integer(C_INT)                        :: istat

  end function cuda_memcpy2d

  !**************************Copy data to device memory Async*****************

  function cuda_memcpy2dasync( dst, dpitch, src, spitch, width, height , dir, stream) result(istat) &
           bind(C, name="cudaMemcpy2DAsync")

    use iso_c_binding

    implicit none

    integer(C_SIZE_T), value                :: dst
    integer(C_SIZE_T), intent(in), value       :: dpitch
    integer(C_SIZE_T), value                :: src
    integer(C_SIZE_T), intent(in), value       :: spitch
    integer(C_SIZE_T), intent(in), value    :: width
    integer(C_SIZE_T), intent(in), value    :: height
    integer(C_INT), intent(in), value       :: dir
    integer(C_SIZE_T),value                 :: stream
    integer(C_INT)                          :: istat
  end function

  !***************Initialise memory***********************************************

  function cuda_memset(a, val, size) result(istat) &
           bind(C, name="cudaMemset")

    use iso_c_binding

    implicit none

    integer(C_SIZE_T), value              :: a
    !integer(C_INT)                       :: val
    integer(C_INT)                        :: val
    integer(C_SIZE_T), intent(in), value  :: size
    integer(C_INT)                        :: istat

  end function cuda_memset

  function c_memset(a, val, size) result(istat) &
           bind(C, name="memset")

    use iso_c_binding

    implicit none
    integer(C_SIZE_T)                    :: a
    integer(C_INT)                       :: val
    integer(C_SIZE_T), intent(in), value :: size
    integer(C_INT)                       :: istat
  end function c_memset

  !***************************** CUDA LIBRARY CALLS***************************!
  subroutine cublas_dgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)bind(C,name='cublasDgemm')

    use iso_c_binding

    implicit none
    character(1,C_CHAR),value            :: cta, ctb
    integer(C_INT),value                 :: m,n,k
    integer(C_INT), intent(in), value    :: lda,ldb,ldc
    real(C_DOUBLE),value                 :: alpha,beta
    integer(C_SIZE_T), value             :: a, b, c
  end subroutine cublas_dgemm

  subroutine cublas_dtrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) bind(C,name='cublasDtrmm')

    use iso_c_binding

    implicit none
    character(1,C_CHAR),value            :: side, uplo, trans, diag
    integer(C_INT),value                 :: m,n
    integer(C_INT), intent(in), value    :: lda,ldb
    real(C_DOUBLE), value                :: alpha
    integer(C_SIZE_T), value             :: a, b
  end subroutine cublas_dtrmm

  subroutine cublas_zgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) bind(C,name='cublasZgemm')

    use iso_c_binding

    implicit none
    character(1,C_CHAR),value            :: cta, ctb
    integer(C_INT),value                 :: m,n,k
    integer(C_INT), intent(in), value    :: lda,ldb,ldc
    complex(C_DOUBLE),value              :: alpha,beta
    integer(C_SIZE_T), value             :: a, b, c

  end subroutine cublas_zgemm

  subroutine cublas_zgemv( trans , m, n, alpha, a, lda, b, ldb, beta, c, ldc) bind(C,name='cublasZgemv')

    use iso_c_binding

    implicit none
    character(1,c_char),value       :: trans
    integer(c_int),value            :: m,n,lda,ldb,ldc
    complex(c_double_complex),value :: alpha,beta
    integer(c_size_t), value        :: a,b,c

  end subroutine cublas_zgemv

  subroutine cublas_zhemv( trans , m, alpha, a, lda, b, ldb, beta, c, ldc)bind(C,name='cublasZhemv')

    use iso_c_binding

    implicit none
    character(1,c_char),value       :: trans
    integer(c_int),value            :: m,lda,ldb,ldc
    complex(c_double_complex),value :: alpha,beta
    integer(c_size_t), value        :: a,b,c

  end subroutine cublas_zhemv

  subroutine cublas_ztrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) bind(C,name='cublasZtrmm')

    use iso_c_binding

    implicit none
    character(1,C_CHAR),value            :: side, uplo, trans, diag
    integer(C_INT),value                 :: m,n
    integer(C_INT), intent(in), value    :: lda,ldb
    complex(C_DOUBLE), value             :: alpha
    integer(C_SIZE_T), value             :: a, b

  end subroutine cublas_ztrmm

  subroutine cublas_zherk( uplo, trans, n, k, alpha, a, lda, beta, b, ldb) bind(C,name='cublasZherk')

    use iso_c_binding

    implicit none
    character(1,C_CHAR),value            :: uplo, trans
    integer(C_INT),value                 :: n, k
    integer(C_INT), intent(in), value    :: lda,ldb
    complex(C_DOUBLE), value             :: alpha, beta
    integer(c_size_t),value              :: a,b

  end subroutine cublas_zherk

  subroutine cublas_ztrmv( uplo, trans, diag, n, a, lda, b, ldb)bind(C,name='cublasZtrmv')

    use iso_c_binding

    implicit none
    character(1,C_CHAR),value            :: uplo, trans, diag
    integer(C_INT),value                 :: n
    integer(C_INT), intent(in), value    :: lda,ldb
    integer(C_SIZE_T), value             :: a, b

  end subroutine cublas_ztrmv


  subroutine cublas_zher2( uplo, n, alpha, x, incx , y, incy , a, lda)bind(C,name='cublasZher2')

    use iso_c_binding

    implicit none
    character(1,C_CHAR),value            :: uplo
    integer(C_INT),value                 :: n
    integer(C_INT), intent(in), value    :: lda,incx, incy
    complex(C_DOUBLE_COMPLEX), value     :: alpha
    integer(c_size_t),value              :: a,x,y

  end subroutine cublas_zher2

  function cuda_devicesynchronize()result(istat) &
           bind(C,name='cudaDeviceSynchronize')

    use iso_c_binding

    implicit none
    integer(C_INT)                       :: istat

  end function cuda_devicesynchronize

  function cuda_memcpyAsync( dst, src, d_size, dir,stream ) result(istat) &
           bind(C, name="cudaMemcpyAsync")

    use iso_c_binding

    implicit none
    integer(C_SIZE_T), value            :: dst
    integer(C_SIZE_T), value            :: src
    integer(C_SIZE_T),intent(in),value  :: d_size
    integer(C_INT),intent(in),value     :: dir
    integer(c_size_t),value             :: stream
    integer(C_INT)                      :: istat

  end function

  function cuda_StreamCreate( pstream)result (istat) &
           bind(C, name ="cudaStreamCreate")

    use iso_c_binding

    implicit none
    integer(C_SIZE_T) :: pstream
    integer(c_int)    :: istat
  end function

  function cuda_StreamDestroy( pstream)result (istat) &
           bind(C, name ="cudaStreamDestroy")

    use iso_c_binding

    implicit none
    integer(C_SIZE_T), value  :: pstream
    integer(c_int)            :: istat

  end function

  function cuda_streamsynchronize( pstream)result(istat) &
           bind(C,name='cudaStreamSynchronize')

    use iso_c_binding

    implicit none
    integer(C_SIZE_T),value  :: pstream
    integer(C_INT)           :: istat

  end function
end interface

end module cuda_routines
#endif /* WITH_GPU_VERSION */