mod_cuda.F90 29 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
12
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!
!    More information can be found here:
!    http://elpa.rzg.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
! This file was written by A. Marek, MPCDF


44
45
46
#include "config-f90.h"
module cuda_functions
  use iso_c_binding
47
  use precision
48
49
50
51
  implicit none

  public

52
53
54
55
56
  integer(kind=ik) :: cudaMemcpyHostToDevice
  integer(kind=ik) :: cudaMemcpyDeviceToHost
  integer(kind=ik) :: cudaHostRegisterPortable
  integer(kind=ik) :: cudaHostRegisterMapped
  integer(kind=ik) :: cudaMemcpyDeviceToDevice
Pavel Kus's avatar
Pavel Kus committed
57
58
  
  integer(kind=C_intptr_T) :: cublasHandle
59

60
  integer(kind=c_intptr_t), parameter :: size_of_double_real    = 8_rk8
61
#ifdef WANT_SINGLE_PRECISION_REAL
62
  integer(kind=c_intptr_t), parameter :: size_of_single_real    = 4_rk4
63
64
#endif

65
  integer(kind=c_intptr_t), parameter :: size_of_double_complex = 16_ck8
66
#ifdef WANT_SINGLE_PRECISION_COMPLEX
67
  integer(kind=c_intptr_t), parameter :: size_of_single_complex = 8_ck4
68
#endif
69
70

  ! functions to set and query the CUDA devices
Pavel Kus's avatar
Pavel Kus committed
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
  interface
    function cublas_create_c(handle) result(istat) &
             bind(C, name="cublasCreateFromC")
      use iso_c_binding
      implicit none
      integer(kind=C_intptr_T) :: handle
      integer(kind=C_INT)  :: istat
    end function cublas_create_c
  end interface  

  interface
    function cublas_destroy_c(handle) result(istat) &
             bind(C, name="cublasDestroyFromC")
      use iso_c_binding
      implicit none
      integer(kind=C_intptr_T) :: handle
      integer(kind=C_INT)  :: istat
    end function cublas_destroy_c
  end interface  
90

91
92
93
94
95
96
97
98
99
  interface
    function cuda_threadsynchronize_c() result(istat) &
             bind(C,name="cudaThreadSynchronizeFromC")
      use iso_c_binding
      implicit none
      integer(kind=C_INT)  :: istat
    end function cuda_threadsynchronize_c
  end interface

100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
  interface
    function cuda_setdevice_c(n) result(istat) &
             bind(C, name="cudaSetDeviceFromC")

      use iso_c_binding
      implicit none
      integer(kind=C_INT), value    :: n
      integer(kind=C_INT)           :: istat
    end function cuda_setdevice_c
  end interface

  interface
    function cuda_getdevicecount_c(n) result(istat) &
             bind(C, name="cudaGetDeviceCountFromC")
      use iso_c_binding
      implicit none
      integer(kind=C_INT), intent(out) :: n
      integer(kind=C_INT)              :: istat
    end function cuda_getdevicecount_c
  end interface

  interface
    function cuda_devicesynchronize_c()result(istat) &
             bind(C,name='cudaDeviceSynchronizeFromC')

      use iso_c_binding

      implicit none
      integer(kind=C_INT)                       :: istat

    end function cuda_devicesynchronize_c
  end interface


  ! functions to copy CUDA memory
  interface
    function cuda_memcpyDeviceToDevice_c() result(flag) &
             bind(C, name="cudaMemcpyDeviceToDeviceFromC")
      use iso_c_binding
      implicit none
      integer(kind=c_int) :: flag
    end function
  end interface

  interface
    function cuda_memcpyHostToDevice_c() result(flag) &
             bind(C, name="cudaMemcpyHostToDeviceFromC")
      use iso_c_binding
      implicit none
      integer(kind=c_int) :: flag
    end function
  end interface

  interface
    function cuda_memcpyDeviceToHost_c() result(flag) &
             bind(C, name="cudaMemcpyDeviceToHostFromC")
      use iso_c_binding
      implicit none
      integer(kind=c_int) :: flag
    end function
  end interface

  interface
    function cuda_hostRegisterPortable_c() result(flag) &
             bind(C, name="cudaHostRegisterPortableFromC")
      use iso_c_binding
      implicit none
      integer(kind=c_int) :: flag
    end function
  end interface

  interface
    function cuda_hostRegisterMapped_c() result(flag) &
             bind(C, name="cudaHostRegisterMappedFromC")
      use iso_c_binding
      implicit none
      integer(kind=c_int) :: flag
    end function
  end interface

  interface
    function cuda_memcpy_c(dst, src, size, dir) result(istat) &
             bind(C, name="cudaMemcpyFromC")

      use iso_c_binding

      implicit none
      integer(kind=C_intptr_t), value              :: dst
      integer(kind=C_intptr_t), value              :: src
189
      integer(kind=c_intptr_t), intent(in), value    :: size
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
      integer(kind=C_INT), intent(in), value       :: dir
      integer(kind=C_INT)                          :: istat

    end function cuda_memcpy_c
  end interface

  interface
    function cuda_memcpy2d_c(dst, dpitch, src, spitch, width, height , dir) result(istat) &
             bind(C, name="cudaMemcpy2dFromC")

      use iso_c_binding

      implicit none

      integer(kind=C_intptr_T), value              :: dst
205
      integer(kind=c_intptr_t), intent(in), value    :: dpitch
206
      integer(kind=C_intptr_T), value              :: src
207
208
209
      integer(kind=c_intptr_t), intent(in), value    :: spitch
      integer(kind=c_intptr_t), intent(in), value    :: width
      integer(kind=c_intptr_t), intent(in), value    :: height
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
      integer(kind=C_INT), intent(in), value       :: dir
      integer(kind=C_INT)                          :: istat

    end function cuda_memcpy2d_c
  end interface

  ! functions to allocate and free CUDA memory

  interface
    function cuda_free_c(a) result(istat) &
             bind(C, name="cudaFreeFromC")

      use iso_c_binding

      implicit none
      integer(kind=C_intptr_T), value  :: a
      integer(kind=C_INT)              :: istat

    end function cuda_free_c
  end interface

  interface
    function cuda_malloc_c(a, width_height) result(istat) &
             bind(C, name="cudaMallocFromC")

      use iso_c_binding
      implicit none

      integer(kind=C_intptr_T)                    :: a
239
      integer(kind=c_intptr_t), intent(in), value   :: width_height
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
      integer(kind=C_INT)                         :: istat

    end function cuda_malloc_c
  end interface

  interface
    function cuda_memset_c(a, val, size) result(istat) &
             bind(C, name="cudaMemsetFromC")

      use iso_c_binding

      implicit none

      integer(kind=C_intptr_T), value            :: a
      integer(kind=C_INT), value                 :: val
255
      integer(kind=c_intptr_t), intent(in), value  :: size
256
257
258
259
260
261
262
      integer(kind=C_INT)                        :: istat

    end function cuda_memset_c
  end interface

  ! cuBLAS
  interface
Pavel Kus's avatar
Pavel Kus committed
263
264
    subroutine cublas_dgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) &
                              bind(C,name='cublasDgemm_elpa_wrapper')
265
266
267
268
269
270
271
272
273

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value               :: cta, ctb
      integer(kind=C_INT),value               :: m,n,k
      integer(kind=C_INT), intent(in), value  :: lda,ldb,ldc
      real(kind=C_DOUBLE),value               :: alpha,beta
      integer(kind=C_intptr_T), value         :: a, b, c
Pavel Kus's avatar
Pavel Kus committed
274
275
      integer(kind=C_intptr_T), value         :: handle

276
277
278
    end subroutine cublas_dgemm_c
  end interface

279
  interface
Pavel Kus's avatar
Pavel Kus committed
280
281
    subroutine cublas_sgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) &
                              bind(C,name='cublasSgemm_elpa_wrapper')
282
283
284
285
286
287
288
289
290

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value               :: cta, ctb
      integer(kind=C_INT),value               :: m,n,k
      integer(kind=C_INT), intent(in), value  :: lda,ldb,ldc
      real(kind=C_FLOAT),value                :: alpha,beta
      integer(kind=C_intptr_T), value         :: a, b, c
Pavel Kus's avatar
Pavel Kus committed
291
292
      integer(kind=C_intptr_T), value         :: handle
      
293
294
295
    end subroutine cublas_sgemm_c
  end interface

296
  interface
Pavel Kus's avatar
Pavel Kus committed
297
298
    subroutine cublas_dtrmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
                              bind(C,name='cublasDtrmm_elpa_wrapper')
299
300
301
302
303
304
305
306
307

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value               :: side, uplo, trans, diag
      integer(kind=C_INT),value               :: m,n
      integer(kind=C_INT), intent(in), value  :: lda,ldb
      real(kind=C_DOUBLE), value              :: alpha
      integer(kind=C_intptr_T), value         :: a, b
Pavel Kus's avatar
Pavel Kus committed
308
309
      integer(kind=C_intptr_T), value         :: handle
      
310
311
312
    end subroutine cublas_dtrmm_c
  end interface

313
  interface
Pavel Kus's avatar
Pavel Kus committed
314
315
    subroutine cublas_strmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
                              bind(C,name='cublasStrmm_elpa_wrapper')
316
317
318
319
320
321
322
323
324

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value               :: side, uplo, trans, diag
      integer(kind=C_INT),value               :: m,n
      integer(kind=C_INT), intent(in), value  :: lda,ldb
      real(kind=C_FLOAT), value               :: alpha
      integer(kind=C_intptr_T), value         :: a, b
Pavel Kus's avatar
Pavel Kus committed
325
326
      integer(kind=C_intptr_T), value         :: handle

327
328
329
    end subroutine cublas_strmm_c
  end interface

330
  interface
Pavel Kus's avatar
Pavel Kus committed
331
332
    subroutine cublas_zgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) &
                              bind(C,name='cublasZgemm_elpa_wrapper')
333
334
335
336
337
338
339

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value              :: cta, ctb
      integer(kind=C_INT),value              :: m,n,k
      integer(kind=C_INT), intent(in), value :: lda,ldb,ldc
340
      complex(kind=C_DOUBLE_COMPLEX),value           :: alpha,beta
341
      integer(kind=C_intptr_T), value        :: a, b, c
Pavel Kus's avatar
Pavel Kus committed
342
      integer(kind=C_intptr_T), value         :: handle
343
344
345
346

    end subroutine cublas_zgemm_c
  end interface

347
  interface
Pavel Kus's avatar
Pavel Kus committed
348
349
    subroutine cublas_cgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) &
                              bind(C,name='cublasCgemm_elpa_wrapper')
350
351
352
353
354
355
356

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value              :: cta, ctb
      integer(kind=C_INT),value              :: m,n,k
      integer(kind=C_INT), intent(in), value :: lda,ldb,ldc
357
      complex(kind=C_FLOAT_COMPLEX),value            :: alpha,beta
358
      integer(kind=C_intptr_T), value        :: a, b, c
Pavel Kus's avatar
Pavel Kus committed
359
      integer(kind=C_intptr_T), value         :: handle
360
361
362
363

    end subroutine cublas_cgemm_c
  end interface

364
  interface
Pavel Kus's avatar
Pavel Kus committed
365
366
    subroutine cublas_ztrmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
                              bind(C,name='cublasZtrmm_elpa_wrapper')
367
368
369
370
371
372
373

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value              :: side, uplo, trans, diag
      integer(kind=C_INT),value              :: m,n
      integer(kind=C_INT), intent(in), value :: lda,ldb
374
      complex(kind=C_DOUBLE_COMPLEX), value          :: alpha
375
      integer(kind=C_intptr_T), value        :: a, b
Pavel Kus's avatar
Pavel Kus committed
376
      integer(kind=C_intptr_T), value         :: handle
377
378
379
380

    end subroutine cublas_ztrmm_c
  end interface

381
  interface
Pavel Kus's avatar
Pavel Kus committed
382
383
    subroutine cublas_ctrmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
                              bind(C,name='cublasCtrmm_elpa_wrapper')
384
385
386
387
388
389
390

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value              :: side, uplo, trans, diag
      integer(kind=C_INT),value              :: m,n
      integer(kind=C_INT), intent(in), value :: lda,ldb
391
      complex(kind=C_FLOAT_COMPLEX), value           :: alpha
392
      integer(kind=C_intptr_T), value        :: a, b
Pavel Kus's avatar
Pavel Kus committed
393
      integer(kind=C_intptr_T), value         :: handle
394
395
396
397

    end subroutine cublas_ctrmm_c
  end interface

398
  interface
Pavel Kus's avatar
Pavel Kus committed
399
400
    subroutine cublas_dgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
                              bind(C,name='cublasDgemv_elpa_wrapper')
401
402
403
404
405
406
407
408
409

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value               :: cta
      integer(kind=C_INT),value               :: m,n
      integer(kind=C_INT), intent(in), value  :: lda,incx,incy
      real(kind=C_DOUBLE),value               :: alpha,beta
      integer(kind=C_intptr_T), value         :: a, x, y
Pavel Kus's avatar
Pavel Kus committed
410
411
      integer(kind=C_intptr_T), value         :: handle

412
413
414
415
    end subroutine cublas_dgemv_c
  end interface

  interface
Pavel Kus's avatar
Pavel Kus committed
416
417
    subroutine cublas_sgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
                              bind(C,name='cublasSgemv_elpa_wrapper')
418
419
420
421
422
423
424
425
426

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value               :: cta
      integer(kind=C_INT),value               :: m,n
      integer(kind=C_INT), intent(in), value  :: lda,incx,incy
      real(kind=C_FLOAT),value                :: alpha,beta
      integer(kind=C_intptr_T), value         :: a, x, y
Pavel Kus's avatar
Pavel Kus committed
427
428
      integer(kind=C_intptr_T), value         :: handle

429
430
431
    end subroutine cublas_sgemv_c
  end interface

432
  interface
Pavel Kus's avatar
Pavel Kus committed
433
434
    subroutine cublas_zgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
                              bind(C,name='cublasZgemv_elpa_wrapper')
435
436
437
438
439
440
441

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value               :: cta
      integer(kind=C_INT),value               :: m,n
      integer(kind=C_INT), intent(in), value  :: lda,incx,incy
442
      complex(kind=C_DOUBLE_COMPLEX),value               :: alpha,beta
443
      integer(kind=C_intptr_T), value         :: a, x, y
Pavel Kus's avatar
Pavel Kus committed
444
445
      integer(kind=C_intptr_T), value         :: handle

446
447
448
449
    end subroutine cublas_zgemv_c
  end interface

  interface
Pavel Kus's avatar
Pavel Kus committed
450
451
    subroutine cublas_cgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
                              bind(C,name='cublasCgemv_elpa_wrapper')
452
453
454
455
456
457
458

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value               :: cta
      integer(kind=C_INT),value               :: m,n
      integer(kind=C_INT), intent(in), value  :: lda,incx,incy
459
      complex(kind=C_FLOAT_COMPLEX),value                :: alpha,beta
460
      integer(kind=C_intptr_T), value         :: a, x, y
Pavel Kus's avatar
Pavel Kus committed
461
      integer(kind=C_intptr_T), value         :: handle
Pavel Kus's avatar
Pavel Kus committed
462

Pavel Kus's avatar
Pavel Kus committed
463
    end subroutine cublas_cgemv_c
Pavel Kus's avatar
Pavel Kus committed
464
465
466
  end interface


Pavel Kus's avatar
Pavel Kus committed
467
  contains
Pavel Kus's avatar
Pavel Kus committed
468

Pavel Kus's avatar
Pavel Kus committed
469
    ! functions to set and query the CUDA devices
Pavel Kus's avatar
Pavel Kus committed
470

Pavel Kus's avatar
Pavel Kus committed
471
472
473
   function cublas_create(handle) result(success)
     use iso_c_binding
     implicit none
474

Pavel Kus's avatar
Pavel Kus committed
475
476
477
478
479
480
481
482
     integer(kind=C_intptr_t)                  :: handle
     logical                                   :: success
#ifdef WITH_GPU_VERSION
     success = cublas_create_c(handle) /= 0
#else
     success = .true.
#endif
   end function
483

Pavel Kus's avatar
Pavel Kus committed
484
485
486
   function cublas_destroy(handle) result(success)
     use iso_c_binding
     implicit none
487

Pavel Kus's avatar
Pavel Kus committed
488
489
490
491
492
493
494
495
496
     integer(kind=C_intptr_t)                  :: handle
     logical                                   :: success
#ifdef WITH_GPU_VERSION
     success = cublas_destroy_c(handle) /= 0
#else
     success = .true.
#endif
   end function
    
497
498
499
500
501
502
503
504
505
506
507
508
509
    function cuda_threadsynchronize() result(success)
      use iso_c_binding

      implicit none

      logical :: success
#ifdef WITH_GPU_VERSION
      success = cuda_threadsynchronize_c() /= 0
#else
      success = .true.
#endif
    end function cuda_threadsynchronize

510
511
512
513
514
    function cuda_setdevice(n) result(success)
      use iso_c_binding

      implicit none

515
516
      integer(kind=ik), intent(in)  :: n
      logical                       :: success
517
518
519
520
521
522
523
524
525
526
527
#ifdef WITH_GPU_VERSION
      success = cuda_setdevice_c(int(n,kind=c_int)) /= 0
#else
      success = .true.
#endif
    end function cuda_setdevice

    function cuda_getdevicecount(n) result(success)
      use iso_c_binding
      implicit none

528
      integer(kind=ik)     :: n
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
      integer(kind=c_int)  :: nCasted
      logical              :: success
#ifdef WITH_GPU_VERSION
      success = cuda_getdevicecount_c(nCasted) /=0
      n = int(nCasted)
#else
      success = .true.
      n = 0
#endif
    end function cuda_getdevicecount

    function cuda_devicesynchronize()result(success)

      use iso_c_binding

      implicit none
      logical :: success
#ifdef WITH_GPU_VERSION
      success = cuda_devicesynchronize_c() /=0
#else
549
      success = .true.
550
551
552
553
554
555
556
557
558
559
#endif
    end function cuda_devicesynchronize
    ! functions to allocate and free memory

    function cuda_malloc(a, width_height) result(success)

     use iso_c_binding
     implicit none

     integer(kind=C_intptr_t)                  :: a
560
     integer(kind=c_intptr_t), intent(in)        :: width_height
561
562
563
564
     logical                                   :: success
#ifdef WITH_GPU_VERSION
     success = cuda_malloc_c(a, width_height) /= 0
#else
565
     success = .true.
566
567
568
569
570
571
572
573
574
575
576
577
578
#endif
   end function

   function cuda_free(a) result(success)

     use iso_c_binding

     implicit none
     integer(kind=C_intptr_T) :: a
     logical                  :: success
#ifdef WITH_GPU_VERSION
     success = cuda_free_c(a) /= 0
#else
579
     success = .true.
580
581
582
583
584
585
586
587
588
589
#endif
   end function cuda_free

 function cuda_memset(a, val, size) result(success)

   use iso_c_binding

   implicit none

   integer(kind=c_intptr_t)                :: a
590
   integer(kind=ik)                        :: val
591
   integer(kind=c_intptr_t), intent(in)      :: size
592
593
594
595
   integer(kind=C_INT)                     :: istat

   logical :: success
#ifdef WITH_GPU_VERSION
596
   success= cuda_memset_c(a, int(val,kind=c_int), int(size,kind=c_intptr_t)) /=0
597
#else
598
   success = .true.
599
600
601
602
603
604
605
606
#endif
 end function cuda_memset

 ! functions to memcopy CUDA memory

 function cuda_memcpyDeviceToDevice() result(flag)
   use iso_c_binding
   implicit none
607
   integer(kind=ik) :: flag
608
609
610
611
612
613
614
615
616
#ifdef WITH_GPU_VERSION
   flag = int(cuda_memcpyDeviceToDevice_c())
#else
   flag = 0
#endif
 end function

 function cuda_memcpyHostToDevice() result(flag)
   use iso_c_binding
617
   use precision
618
   implicit none
619
   integer(kind=ik) :: flag
620
621
622
623
624
625
626
627
628
#ifdef WITH_GPU_VERSION
   flag = int(cuda_memcpyHostToDevice_c())
#else
   flag = 0
#endif
 end function

 function cuda_memcpyDeviceToHost() result(flag)
   use iso_c_binding
629
   use precision
630
   implicit none
631
   integer(kind=ik) :: flag
632
633
634
635
636
637
638
639
640
#ifdef WITH_GPU_VERSION
   flag = int( cuda_memcpyDeviceToHost_c())
#else
   flag = 0
#endif
 end function

 function cuda_hostRegisterPortable() result(flag)
   use iso_c_binding
641
   use precision
642
   implicit none
643
   integer(kind=ik) :: flag
644
645
646
647
648
649
650
651
652
#ifdef WITH_GPU_VERSION
   flag = int(cuda_hostRegisterPortable_c())
#else
   flag = 0
#endif
 end function

 function cuda_hostRegisterMapped() result(flag)
   use iso_c_binding
653
   use precision
654
   implicit none
655
   integer(kind=ik) :: flag
656
657
658
659
660
661
662
663
664
665
666
667
668
669
#ifdef WITH_GPU_VERSION
   flag = int(cuda_hostRegisterMapped_c())
#else
   flag = 0
#endif
 end function

 function cuda_memcpy(dst, src, size, dir) result(success)

      use iso_c_binding

      implicit none
      integer(kind=C_intptr_t)              :: dst
      integer(kind=C_intptr_t)              :: src
670
      integer(kind=c_intptr_t), intent(in)    :: size
671
672
      integer(kind=C_INT), intent(in)       :: dir
      logical :: success
673

674
675
676
#ifdef WITH_GPU_VERSION
        success = cuda_memcpy_c(dst, src, size, dir) /= 0
#else
677
        success = .true.
678
679
680
681
682
683
684
685
686
687
#endif
    end function

    function cuda_memcpy2d(dst, dpitch, src, spitch, width, height , dir) result(success)

      use iso_c_binding

      implicit none

      integer(kind=C_intptr_T)           :: dst
688
      integer(kind=c_intptr_t), intent(in) :: dpitch
689
      integer(kind=C_intptr_T)           :: src
690
691
692
      integer(kind=c_intptr_t), intent(in) :: spitch
      integer(kind=c_intptr_t), intent(in) :: width
      integer(kind=c_intptr_t), intent(in) :: height
693
694
695
696
697
      integer(kind=C_INT), intent(in)    :: dir
      logical                            :: success
#ifdef WITH_GPU_VERSION
      success = cuda_memcpy2d_c(dst, dpitch, src, spitch, width, height , dir) /= 0
#else
698
      success = .true.
699
700
701
702
703
704
705
706
#endif
    end function cuda_memcpy2d

    ! cuBLAS
    subroutine cublas_dgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
      use iso_c_binding

      implicit none
707
      character(1,C_CHAR),value       :: cta, ctb
708
709
710
711
712
      integer(kind=C_INT)             :: m,n,k
      integer(kind=C_INT), intent(in) :: lda,ldb,ldc
      real(kind=C_DOUBLE)             :: alpha,beta
      integer(kind=C_intptr_T)        :: a, b, c
#ifdef WITH_GPU_VERSION
Pavel Kus's avatar
Pavel Kus committed
713
      call cublas_dgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
714
715
716
#endif
    end subroutine cublas_dgemm

717
718
719
720
    subroutine cublas_sgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
      use iso_c_binding

      implicit none
721
      character(1,C_CHAR),value       :: cta, ctb
722
723
724
725
726
      integer(kind=C_INT)             :: m,n,k
      integer(kind=C_INT), intent(in) :: lda,ldb,ldc
      real(kind=C_FLOAT)              :: alpha,beta
      integer(kind=C_intptr_T)        :: a, b, c
#ifdef WITH_GPU_VERSION
Pavel Kus's avatar
Pavel Kus committed
727
      call cublas_sgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
728
729
730
#endif
    end subroutine cublas_sgemm

731
732
733
734
735
    subroutine cublas_dtrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)

      use iso_c_binding

      implicit none
736
      character(1,C_CHAR),value       :: side, uplo, trans, diag
737
738
739
740
741
      integer(kind=C_INT)             :: m,n
      integer(kind=C_INT), intent(in) :: lda,ldb
      real(kind=C_DOUBLE)             :: alpha
      integer(kind=C_intptr_T)        :: a, b
#ifdef WITH_GPU_VERSION
Pavel Kus's avatar
Pavel Kus committed
742
      call cublas_dtrmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
743
744
745
#endif
    end subroutine cublas_dtrmm

746
747
748
749
750
    subroutine cublas_strmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)

      use iso_c_binding

      implicit none
751
      character(1,C_CHAR),value       :: side, uplo, trans, diag
752
753
754
755
756
      integer(kind=C_INT)             :: m,n
      integer(kind=C_INT), intent(in) :: lda,ldb
      real(kind=C_FLOAT)              :: alpha
      integer(kind=C_intptr_T)        :: a, b
#ifdef WITH_GPU_VERSION
Pavel Kus's avatar
Pavel Kus committed
757
      call cublas_strmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
758
759
760
#endif
    end subroutine cublas_strmm

761
762
763
764
765
    subroutine cublas_zgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)

      use iso_c_binding

      implicit none
766
      character(1,C_CHAR),value       :: cta, ctb
767
768
      integer(kind=C_INT)             :: m,n,k
      integer(kind=C_INT), intent(in) :: lda,ldb,ldc
769
      complex(kind=C_DOUBLE_COMPLEX)          :: alpha,beta
770
771
      integer(kind=C_intptr_T)        :: a, b, c
#ifdef WITH_GPU_VERSION
Pavel Kus's avatar
Pavel Kus committed
772
      call cublas_zgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
773
774
775
#endif
    end subroutine cublas_zgemm

776
777
778
779
780
    subroutine cublas_cgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)

      use iso_c_binding

      implicit none
781
      character(1,C_CHAR),value       :: cta, ctb
782
783
      integer(kind=C_INT)             :: m,n,k
      integer(kind=C_INT), intent(in) :: lda,ldb,ldc
784
      complex(kind=C_FLOAT_COMPLEX)           :: alpha,beta
785
786
      integer(kind=C_intptr_T)        :: a, b, c
#ifdef WITH_GPU_VERSION
Pavel Kus's avatar
Pavel Kus committed
787
      call cublas_cgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
788
789
790
#endif
    end subroutine cublas_cgemm

791
792
793
794
795
    subroutine cublas_ztrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)

      use iso_c_binding

      implicit none
796
      character(1,C_CHAR),value       :: side, uplo, trans, diag
797
798
      integer(kind=C_INT)             :: m,n
      integer(kind=C_INT), intent(in) :: lda,ldb
799
      complex(kind=C_DOUBLE_COMPLEX)          :: alpha
800
801
      integer(kind=C_intptr_T)        :: a, b
#ifdef WITH_GPU_VERSION
Pavel Kus's avatar
Pavel Kus committed
802
      call cublas_ztrmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
803
804
805
#endif
    end subroutine cublas_ztrmm

806
807
808
809
810
    subroutine cublas_ctrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)

      use iso_c_binding

      implicit none
811
      character(1,C_CHAR),value       :: side, uplo, trans, diag
812
813
      integer(kind=C_INT)             :: m,n
      integer(kind=C_INT), intent(in) :: lda,ldb
814
      complex(kind=C_FLOAT_COMPLEX)           :: alpha
815
816
      integer(kind=C_intptr_T)        :: a, b
#ifdef WITH_GPU_VERSION
Pavel Kus's avatar
Pavel Kus committed
817
      call cublas_ctrmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
818
819
#endif
    end subroutine cublas_ctrmm
820

821
822
823
824
825
826
827
828
829
830
    subroutine cublas_dgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
      use iso_c_binding

      implicit none
      character(1,C_CHAR),value       :: cta
      integer(kind=C_INT)             :: m,n
      integer(kind=C_INT), intent(in) :: lda,incx,incy
      real(kind=C_DOUBLE)             :: alpha,beta
      integer(kind=C_intptr_T)        :: a, x, y
#ifdef WITH_GPU_VERSION
Pavel Kus's avatar
Pavel Kus committed
831
      call cublas_dgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
832
833
834
835
836
837
838
839
840
841
842
843
844
#endif
    end subroutine cublas_dgemv

    subroutine cublas_sgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
      use iso_c_binding

      implicit none
      character(1,C_CHAR),value       :: cta
      integer(kind=C_INT)             :: m,n
      integer(kind=C_INT), intent(in) :: lda,incx,incy
      real(kind=C_FLOAT)              :: alpha,beta
      integer(kind=C_intptr_T)        :: a, x, y
#ifdef WITH_GPU_VERSION
Pavel Kus's avatar
Pavel Kus committed
845
      call cublas_sgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
846
847
848
#endif
    end subroutine cublas_sgemv

849
850
851
852
853
854
855
    subroutine cublas_zgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
      use iso_c_binding

      implicit none
      character(1,C_CHAR),value       :: cta
      integer(kind=C_INT)             :: m,n
      integer(kind=C_INT), intent(in) :: lda,incx,incy
856
      complex(kind=C_DOUBLE_COMPLEX)             :: alpha,beta
857
858
      integer(kind=C_intptr_T)        :: a, x, y
#ifdef WITH_GPU_VERSION
Pavel Kus's avatar
Pavel Kus committed
859
      call cublas_zgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
860
861
862
863
864
865
866
867
868
869
#endif
    end subroutine cublas_zgemv

    subroutine cublas_cgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
      use iso_c_binding

      implicit none
      character(1,C_CHAR),value       :: cta
      integer(kind=C_INT)             :: m,n
      integer(kind=C_INT), intent(in) :: lda,incx,incy
870
      complex(kind=C_FLOAT_COMPLEX)              :: alpha,beta
871
872
      integer(kind=C_intptr_T)        :: a, x, y
#ifdef WITH_GPU_VERSION
Pavel Kus's avatar
Pavel Kus committed
873
      call cublas_cgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
874
875
876
#endif
    end subroutine cublas_cgemv

877

Pavel Kus's avatar
Pavel Kus committed
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
!     subroutine cublas_dsymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
!       use iso_c_binding
! 
!       implicit none
!       character(1,C_CHAR),value       :: cta
!       integer(kind=C_INT)             :: n
!       integer(kind=C_INT), intent(in) :: lda,incx,incy
!       real(kind=C_DOUBLE)             :: alpha,beta
!       integer(kind=C_intptr_T)        :: a, x, y
! #ifdef WITH_GPU_VERSION
!       call cublas_dsymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
! #endif
!     end subroutine cublas_dsymv
! 
!     subroutine cublas_ssymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
!       use iso_c_binding
! 
!       implicit none
!       character(1,C_CHAR),value       :: cta
!       integer(kind=C_INT)             :: n
!       integer(kind=C_INT), intent(in) :: lda,incx,incy
!       real(kind=C_FLOAT)              :: alpha,beta
!       integer(kind=C_intptr_T)        :: a, x, y
! #ifdef WITH_GPU_VERSION
!       call cublas_ssymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
! #endif
!     end subroutine cublas_ssymv
! 
!     subroutine cublas_zsymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
!       use iso_c_binding
! 
!       implicit none
!       character(1,C_CHAR),value       :: cta
!       integer(kind=C_INT)             :: n
!       integer(kind=C_INT), intent(in) :: lda,incx,incy
!       complex(kind=C_DOUBLE_COMPLEX)             :: alpha,beta
!       integer(kind=C_intptr_T)        :: a, x, y
! #ifdef WITH_GPU_VERSION
! !       call cublas_zsymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
! #endif
!     end subroutine cublas_zsymv
! 
!     subroutine cublas_csymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
!       use iso_c_binding
! 
!       implicit none
!       character(1,C_CHAR),value       :: cta
!       integer(kind=C_INT)             :: n
!       integer(kind=C_INT), intent(in) :: lda,incx,incy
!       complex(kind=C_FLOAT_COMPLEX)              :: alpha,beta
!       integer(kind=C_intptr_T)        :: a, x, y
! #ifdef WITH_GPU_VERSION
! !       call cublas_csymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
! #endif
!     end subroutine cublas_csymv
Pavel Kus's avatar
Pavel Kus committed
933
934


935
end module cuda_functions