mod_cuda.F90 28.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
12
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!
!    More information can be found here:
!    http://elpa.rzg.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
! This file was written by A. Marek, MPCDF


44 45 46
#include "config-f90.h"
module cuda_functions
  use iso_c_binding
47
  use precision
48 49 50 51
  implicit none

  public

52 53 54 55 56
  integer(kind=ik) :: cudaMemcpyHostToDevice
  integer(kind=ik) :: cudaMemcpyDeviceToHost
  integer(kind=ik) :: cudaHostRegisterPortable
  integer(kind=ik) :: cudaHostRegisterMapped
  integer(kind=ik) :: cudaMemcpyDeviceToDevice
57

58
  integer(kind=c_intptr_t), parameter :: size_of_double_real    = 8_rk8
59
#ifdef WANT_SINGLE_PRECISION_REAL
60
  integer(kind=c_intptr_t), parameter :: size_of_single_real    = 4_rk4
61 62
#endif

63
  integer(kind=c_intptr_t), parameter :: size_of_double_complex = 16_ck8
64
#ifdef WANT_SINGLE_PRECISION_COMPLEX
65
  integer(kind=c_intptr_t), parameter :: size_of_single_complex = 8_ck4
66
#endif
67 68 69

  ! functions to set and query the CUDA devices

70 71 72 73 74 75 76 77 78
  interface
    function cuda_threadsynchronize_c() result(istat) &
             bind(C,name="cudaThreadSynchronizeFromC")
      use iso_c_binding
      implicit none
      integer(kind=C_INT)  :: istat
    end function cuda_threadsynchronize_c
  end interface

79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
  interface
    function cuda_setdevice_c(n) result(istat) &
             bind(C, name="cudaSetDeviceFromC")

      use iso_c_binding
      implicit none
      integer(kind=C_INT), value    :: n
      integer(kind=C_INT)           :: istat
    end function cuda_setdevice_c
  end interface

  interface
    function cuda_getdevicecount_c(n) result(istat) &
             bind(C, name="cudaGetDeviceCountFromC")
      use iso_c_binding
      implicit none
      integer(kind=C_INT), intent(out) :: n
      integer(kind=C_INT)              :: istat
    end function cuda_getdevicecount_c
  end interface

  interface
    function cuda_devicesynchronize_c()result(istat) &
             bind(C,name='cudaDeviceSynchronizeFromC')

      use iso_c_binding

      implicit none
      integer(kind=C_INT)                       :: istat

    end function cuda_devicesynchronize_c
  end interface


  ! functions to copy CUDA memory
  interface
    function cuda_memcpyDeviceToDevice_c() result(flag) &
             bind(C, name="cudaMemcpyDeviceToDeviceFromC")
      use iso_c_binding
      implicit none
      integer(kind=c_int) :: flag
    end function
  end interface

  interface
    function cuda_memcpyHostToDevice_c() result(flag) &
             bind(C, name="cudaMemcpyHostToDeviceFromC")
      use iso_c_binding
      implicit none
      integer(kind=c_int) :: flag
    end function
  end interface

  interface
    function cuda_memcpyDeviceToHost_c() result(flag) &
             bind(C, name="cudaMemcpyDeviceToHostFromC")
      use iso_c_binding
      implicit none
      integer(kind=c_int) :: flag
    end function
  end interface

  interface
    function cuda_hostRegisterPortable_c() result(flag) &
             bind(C, name="cudaHostRegisterPortableFromC")
      use iso_c_binding
      implicit none
      integer(kind=c_int) :: flag
    end function
  end interface

  interface
    function cuda_hostRegisterMapped_c() result(flag) &
             bind(C, name="cudaHostRegisterMappedFromC")
      use iso_c_binding
      implicit none
      integer(kind=c_int) :: flag
    end function
  end interface

  interface
    function cuda_memcpy_c(dst, src, size, dir) result(istat) &
             bind(C, name="cudaMemcpyFromC")

      use iso_c_binding

      implicit none
      integer(kind=C_intptr_t), value              :: dst
      integer(kind=C_intptr_t), value              :: src
168
      integer(kind=c_intptr_t), intent(in), value    :: size
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
      integer(kind=C_INT), intent(in), value       :: dir
      integer(kind=C_INT)                          :: istat

    end function cuda_memcpy_c
  end interface

  interface
    function cuda_memcpy2d_c(dst, dpitch, src, spitch, width, height , dir) result(istat) &
             bind(C, name="cudaMemcpy2dFromC")

      use iso_c_binding

      implicit none

      integer(kind=C_intptr_T), value              :: dst
184
      integer(kind=c_intptr_t), intent(in), value    :: dpitch
185
      integer(kind=C_intptr_T), value              :: src
186 187 188
      integer(kind=c_intptr_t), intent(in), value    :: spitch
      integer(kind=c_intptr_t), intent(in), value    :: width
      integer(kind=c_intptr_t), intent(in), value    :: height
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
      integer(kind=C_INT), intent(in), value       :: dir
      integer(kind=C_INT)                          :: istat

    end function cuda_memcpy2d_c
  end interface

  ! functions to allocate and free CUDA memory

  interface
    function cuda_free_c(a) result(istat) &
             bind(C, name="cudaFreeFromC")

      use iso_c_binding

      implicit none
      integer(kind=C_intptr_T), value  :: a
      integer(kind=C_INT)              :: istat

    end function cuda_free_c
  end interface

  interface
    function cuda_malloc_c(a, width_height) result(istat) &
             bind(C, name="cudaMallocFromC")

      use iso_c_binding
      implicit none

      integer(kind=C_intptr_T)                    :: a
218
      integer(kind=c_intptr_t), intent(in), value   :: width_height
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
      integer(kind=C_INT)                         :: istat

    end function cuda_malloc_c
  end interface

  interface
    function cuda_memset_c(a, val, size) result(istat) &
             bind(C, name="cudaMemsetFromC")

      use iso_c_binding

      implicit none

      integer(kind=C_intptr_T), value            :: a
      integer(kind=C_INT), value                 :: val
234
      integer(kind=c_intptr_t), intent(in), value  :: size
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
      integer(kind=C_INT)                        :: istat

    end function cuda_memset_c
  end interface

  ! cuBLAS
  interface
    subroutine cublas_dgemm_c(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) bind(C,name='cublasDgemm')

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value               :: cta, ctb
      integer(kind=C_INT),value               :: m,n,k
      integer(kind=C_INT), intent(in), value  :: lda,ldb,ldc
      real(kind=C_DOUBLE),value               :: alpha,beta
      integer(kind=C_intptr_T), value         :: a, b, c
    end subroutine cublas_dgemm_c
  end interface

255 256 257 258 259 260 261 262 263 264 265 266 267 268
  interface
    subroutine cublas_sgemm_c(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) bind(C,name='cublasSgemm')

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value               :: cta, ctb
      integer(kind=C_INT),value               :: m,n,k
      integer(kind=C_INT), intent(in), value  :: lda,ldb,ldc
      real(kind=C_FLOAT),value                :: alpha,beta
      integer(kind=C_intptr_T), value         :: a, b, c
    end subroutine cublas_sgemm_c
  end interface

269 270 271 272 273 274 275 276 277 278 279 280 281 282
  interface
    subroutine cublas_dtrmm_c(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) bind(C,name='cublasDtrmm')

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value               :: side, uplo, trans, diag
      integer(kind=C_INT),value               :: m,n
      integer(kind=C_INT), intent(in), value  :: lda,ldb
      real(kind=C_DOUBLE), value              :: alpha
      integer(kind=C_intptr_T), value         :: a, b
    end subroutine cublas_dtrmm_c
  end interface

283 284 285 286 287 288 289 290 291 292 293 294 295 296
  interface
    subroutine cublas_strmm_c(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) bind(C,name='cublasStrmm')

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value               :: side, uplo, trans, diag
      integer(kind=C_INT),value               :: m,n
      integer(kind=C_INT), intent(in), value  :: lda,ldb
      real(kind=C_FLOAT), value               :: alpha
      integer(kind=C_intptr_T), value         :: a, b
    end subroutine cublas_strmm_c
  end interface

297
  interface
298
    subroutine cublas_zgemm_c(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) bind(C,name='cublasZgemm_elpa_wrapper')
299 300 301 302 303 304 305

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value              :: cta, ctb
      integer(kind=C_INT),value              :: m,n,k
      integer(kind=C_INT), intent(in), value :: lda,ldb,ldc
306
      complex(kind=C_DOUBLE_COMPLEX),value           :: alpha,beta
307 308 309 310 311
      integer(kind=C_intptr_T), value        :: a, b, c

    end subroutine cublas_zgemm_c
  end interface

312
  interface
313
    subroutine cublas_cgemm_c(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) bind(C,name='cublasCgemm_elpa_wrapper')
314 315 316 317 318 319 320

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value              :: cta, ctb
      integer(kind=C_INT),value              :: m,n,k
      integer(kind=C_INT), intent(in), value :: lda,ldb,ldc
321
      complex(kind=C_FLOAT_COMPLEX),value            :: alpha,beta
322 323 324 325 326
      integer(kind=C_intptr_T), value        :: a, b, c

    end subroutine cublas_cgemm_c
  end interface

327
  interface
328
    subroutine cublas_ztrmm_c(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) bind(C,name='cublasZtrmm_elpa_wrapper')
329 330 331 332 333 334 335

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value              :: side, uplo, trans, diag
      integer(kind=C_INT),value              :: m,n
      integer(kind=C_INT), intent(in), value :: lda,ldb
336
      complex(kind=C_DOUBLE_COMPLEX), value          :: alpha
337 338 339 340 341
      integer(kind=C_intptr_T), value        :: a, b

    end subroutine cublas_ztrmm_c
  end interface

342
  interface
343
    subroutine cublas_ctrmm_c(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) bind(C,name='cublasCtrmm_elpa_wrapper')
344 345 346 347 348 349 350

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value              :: side, uplo, trans, diag
      integer(kind=C_INT),value              :: m,n
      integer(kind=C_INT), intent(in), value :: lda,ldb
351
      complex(kind=C_FLOAT_COMPLEX), value           :: alpha
352 353 354 355 356
      integer(kind=C_intptr_T), value        :: a, b

    end subroutine cublas_ctrmm_c
  end interface

357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
  interface
    subroutine cublas_dgemv_c(cta, m, n, alpha, a, lda, x, incx, beta, y, incy) bind(C,name='cublasDgemv')

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value               :: cta
      integer(kind=C_INT),value               :: m,n
      integer(kind=C_INT), intent(in), value  :: lda,incx,incy
      real(kind=C_DOUBLE),value               :: alpha,beta
      integer(kind=C_intptr_T), value         :: a, x, y
    end subroutine cublas_dgemv_c
  end interface

  interface
    subroutine cublas_sgemv_c(cta, m, n, alpha, a, lda, x, incx, beta, y, incy) bind(C,name='cublasSgemv')

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value               :: cta
      integer(kind=C_INT),value               :: m,n
      integer(kind=C_INT), intent(in), value  :: lda,incx,incy
      real(kind=C_FLOAT),value                :: alpha,beta
      integer(kind=C_intptr_T), value         :: a, x, y
    end subroutine cublas_sgemv_c
  end interface

385
  interface
386
    subroutine cublas_zgemv_c(cta, m, n, alpha, a, lda, x, incx, beta, y, incy) bind(C,name='cublasZgemv_elpa_wrapper')
387 388 389 390 391 392 393

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value               :: cta
      integer(kind=C_INT),value               :: m,n
      integer(kind=C_INT), intent(in), value  :: lda,incx,incy
394
      complex(kind=C_DOUBLE_COMPLEX),value               :: alpha,beta
395 396 397 398 399
      integer(kind=C_intptr_T), value         :: a, x, y
    end subroutine cublas_zgemv_c
  end interface

  interface
400
    subroutine cublas_cgemv_c(cta, m, n, alpha, a, lda, x, incx, beta, y, incy) bind(C,name='cublasCgemv_elpa_wrapper')
401 402 403 404 405 406 407

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value               :: cta
      integer(kind=C_INT),value               :: m,n
      integer(kind=C_INT), intent(in), value  :: lda,incx,incy
408
      complex(kind=C_FLOAT_COMPLEX),value                :: alpha,beta
409 410 411 412
      integer(kind=C_intptr_T), value         :: a, x, y
    end subroutine cublas_cgemv_c
  end interface

413

Pavel Kus's avatar
Pavel Kus committed
414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
  interface
    subroutine cublas_dsymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy) bind(C,name='cublasDsymv')

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value               :: cta
      integer(kind=C_INT),value               :: n
      integer(kind=C_INT), intent(in), value  :: lda,incx,incy
      real(kind=C_DOUBLE),value               :: alpha,beta
      integer(kind=C_intptr_T), value         :: a, x, y
    end subroutine cublas_dsymv_c
  end interface

  interface
    subroutine cublas_ssymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy) bind(C,name='cublasSsymv')

      use iso_c_binding

      implicit none
      character(1,C_CHAR),value               :: cta
      integer(kind=C_INT),value               :: n
      integer(kind=C_INT), intent(in), value  :: lda,incx,incy
      real(kind=C_FLOAT),value                :: alpha,beta
      integer(kind=C_intptr_T), value         :: a, x, y
    end subroutine cublas_ssymv_c
  end interface

!   interface
!     subroutine cublas_zsymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy) bind(C,name='cublasZsymv')
! 
!       use iso_c_binding
! 
!       implicit none
!       character(1,C_CHAR),value               :: cta
!       integer(kind=C_INT),value               :: n
!       integer(kind=C_INT), intent(in), value  :: lda,incx,incy
451
!       complex(kind=C_DOUBLE_COMPLEX),value               :: alpha,beta
Pavel Kus's avatar
Pavel Kus committed
452 453 454 455 456 457 458 459 460 461 462 463 464
!       integer(kind=C_intptr_T), value         :: a, x, y
!     end subroutine cublas_zsymv_c
!   end interface
! 
!   interface
!     subroutine cublas_csymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy) bind(C,name='cublasCsymv')
! 
!       use iso_c_binding
! 
!       implicit none
!       character(1,C_CHAR),value               :: cta
!       integer(kind=C_INT),value               :: n
!       integer(kind=C_INT), intent(in), value  :: lda,incx,incy
465
!       complex(kind=C_FLOAT_COMPLEX),value                :: alpha,beta
Pavel Kus's avatar
Pavel Kus committed
466 467 468 469
!       integer(kind=C_intptr_T), value         :: a, x, y
!     end subroutine cublas_csymv_c
!   end interface

470

471 472 473 474
  contains

    ! functions to set and query the CUDA devices

475 476 477 478 479 480 481 482 483 484 485 486 487
    function cuda_threadsynchronize() result(success)
      use iso_c_binding

      implicit none

      logical :: success
#ifdef WITH_GPU_VERSION
      success = cuda_threadsynchronize_c() /= 0
#else
      success = .true.
#endif
    end function cuda_threadsynchronize

488 489 490 491 492
    function cuda_setdevice(n) result(success)
      use iso_c_binding

      implicit none

493 494
      integer(kind=ik), intent(in)  :: n
      logical                       :: success
495 496 497 498 499 500 501 502 503 504 505
#ifdef WITH_GPU_VERSION
      success = cuda_setdevice_c(int(n,kind=c_int)) /= 0
#else
      success = .true.
#endif
    end function cuda_setdevice

    function cuda_getdevicecount(n) result(success)
      use iso_c_binding
      implicit none

506
      integer(kind=ik)     :: n
507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
      integer(kind=c_int)  :: nCasted
      logical              :: success
#ifdef WITH_GPU_VERSION
      success = cuda_getdevicecount_c(nCasted) /=0
      n = int(nCasted)
#else
      success = .true.
      n = 0
#endif
    end function cuda_getdevicecount

    function cuda_devicesynchronize()result(success)

      use iso_c_binding

      implicit none
      logical :: success
#ifdef WITH_GPU_VERSION
      success = cuda_devicesynchronize_c() /=0
#else
527
      success = .true.
528 529 530 531 532 533 534 535 536 537
#endif
    end function cuda_devicesynchronize
    ! functions to allocate and free memory

    function cuda_malloc(a, width_height) result(success)

     use iso_c_binding
     implicit none

     integer(kind=C_intptr_t)                  :: a
538
     integer(kind=c_intptr_t), intent(in)        :: width_height
539 540 541 542
     logical                                   :: success
#ifdef WITH_GPU_VERSION
     success = cuda_malloc_c(a, width_height) /= 0
#else
543
     success = .true.
544 545 546 547 548 549 550 551 552 553 554 555 556
#endif
   end function

   function cuda_free(a) result(success)

     use iso_c_binding

     implicit none
     integer(kind=C_intptr_T) :: a
     logical                  :: success
#ifdef WITH_GPU_VERSION
     success = cuda_free_c(a) /= 0
#else
557
     success = .true.
558 559 560 561 562 563 564 565 566 567
#endif
   end function cuda_free

 function cuda_memset(a, val, size) result(success)

   use iso_c_binding

   implicit none

   integer(kind=c_intptr_t)                :: a
568
   integer(kind=ik)                        :: val
569
   integer(kind=c_intptr_t), intent(in)      :: size
570 571 572 573
   integer(kind=C_INT)                     :: istat

   logical :: success
#ifdef WITH_GPU_VERSION
574
   success= cuda_memset_c(a, int(val,kind=c_int), int(size,kind=c_intptr_t)) /=0
575
#else
576
   success = .true.
577 578 579 580 581 582 583 584
#endif
 end function cuda_memset

 ! functions to memcopy CUDA memory

 function cuda_memcpyDeviceToDevice() result(flag)
   use iso_c_binding
   implicit none
585
   integer(kind=ik) :: flag
586 587 588 589 590 591 592 593 594
#ifdef WITH_GPU_VERSION
   flag = int(cuda_memcpyDeviceToDevice_c())
#else
   flag = 0
#endif
 end function

 function cuda_memcpyHostToDevice() result(flag)
   use iso_c_binding
595
   use precision
596
   implicit none
597
   integer(kind=ik) :: flag
598 599 600 601 602 603 604 605 606
#ifdef WITH_GPU_VERSION
   flag = int(cuda_memcpyHostToDevice_c())
#else
   flag = 0
#endif
 end function

 function cuda_memcpyDeviceToHost() result(flag)
   use iso_c_binding
607
   use precision
608
   implicit none
609
   integer(kind=ik) :: flag
610 611 612 613 614 615 616 617 618
#ifdef WITH_GPU_VERSION
   flag = int( cuda_memcpyDeviceToHost_c())
#else
   flag = 0
#endif
 end function

 function cuda_hostRegisterPortable() result(flag)
   use iso_c_binding
619
   use precision
620
   implicit none
621
   integer(kind=ik) :: flag
622 623 624 625 626 627 628 629 630
#ifdef WITH_GPU_VERSION
   flag = int(cuda_hostRegisterPortable_c())
#else
   flag = 0
#endif
 end function

 function cuda_hostRegisterMapped() result(flag)
   use iso_c_binding
631
   use precision
632
   implicit none
633
   integer(kind=ik) :: flag
634 635 636 637 638 639 640 641 642 643 644 645 646 647
#ifdef WITH_GPU_VERSION
   flag = int(cuda_hostRegisterMapped_c())
#else
   flag = 0
#endif
 end function

 function cuda_memcpy(dst, src, size, dir) result(success)

      use iso_c_binding

      implicit none
      integer(kind=C_intptr_t)              :: dst
      integer(kind=C_intptr_t)              :: src
648
      integer(kind=c_intptr_t), intent(in)    :: size
649 650
      integer(kind=C_INT), intent(in)       :: dir
      logical :: success
651

652 653 654
#ifdef WITH_GPU_VERSION
        success = cuda_memcpy_c(dst, src, size, dir) /= 0
#else
655
        success = .true.
656 657 658 659 660 661 662 663 664 665
#endif
    end function

    function cuda_memcpy2d(dst, dpitch, src, spitch, width, height , dir) result(success)

      use iso_c_binding

      implicit none

      integer(kind=C_intptr_T)           :: dst
666
      integer(kind=c_intptr_t), intent(in) :: dpitch
667
      integer(kind=C_intptr_T)           :: src
668 669 670
      integer(kind=c_intptr_t), intent(in) :: spitch
      integer(kind=c_intptr_t), intent(in) :: width
      integer(kind=c_intptr_t), intent(in) :: height
671 672 673 674 675
      integer(kind=C_INT), intent(in)    :: dir
      logical                            :: success
#ifdef WITH_GPU_VERSION
      success = cuda_memcpy2d_c(dst, dpitch, src, spitch, width, height , dir) /= 0
#else
676
      success = .true.
677 678 679 680 681 682 683 684
#endif
    end function cuda_memcpy2d

    ! cuBLAS
    subroutine cublas_dgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
      use iso_c_binding

      implicit none
685
      character(1,C_CHAR),value       :: cta, ctb
686 687 688 689 690 691 692 693 694
      integer(kind=C_INT)             :: m,n,k
      integer(kind=C_INT), intent(in) :: lda,ldb,ldc
      real(kind=C_DOUBLE)             :: alpha,beta
      integer(kind=C_intptr_T)        :: a, b, c
#ifdef WITH_GPU_VERSION
      call cublas_dgemm_c(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
#endif
    end subroutine cublas_dgemm

695 696 697 698
    subroutine cublas_sgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
      use iso_c_binding

      implicit none
699
      character(1,C_CHAR),value       :: cta, ctb
700 701 702 703 704 705 706 707 708
      integer(kind=C_INT)             :: m,n,k
      integer(kind=C_INT), intent(in) :: lda,ldb,ldc
      real(kind=C_FLOAT)              :: alpha,beta
      integer(kind=C_intptr_T)        :: a, b, c
#ifdef WITH_GPU_VERSION
      call cublas_sgemm_c(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
#endif
    end subroutine cublas_sgemm

709 710 711 712 713
    subroutine cublas_dtrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)

      use iso_c_binding

      implicit none
714
      character(1,C_CHAR),value       :: side, uplo, trans, diag
715 716 717 718 719 720 721 722 723
      integer(kind=C_INT)             :: m,n
      integer(kind=C_INT), intent(in) :: lda,ldb
      real(kind=C_DOUBLE)             :: alpha
      integer(kind=C_intptr_T)        :: a, b
#ifdef WITH_GPU_VERSION
      call cublas_dtrmm_c(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
#endif
    end subroutine cublas_dtrmm

724 725 726 727 728
    subroutine cublas_strmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)

      use iso_c_binding

      implicit none
729
      character(1,C_CHAR),value       :: side, uplo, trans, diag
730 731 732 733 734 735 736 737 738
      integer(kind=C_INT)             :: m,n
      integer(kind=C_INT), intent(in) :: lda,ldb
      real(kind=C_FLOAT)              :: alpha
      integer(kind=C_intptr_T)        :: a, b
#ifdef WITH_GPU_VERSION
      call cublas_strmm_c(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
#endif
    end subroutine cublas_strmm

739 740 741 742 743
    subroutine cublas_zgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)

      use iso_c_binding

      implicit none
744
      character(1,C_CHAR),value       :: cta, ctb
745 746
      integer(kind=C_INT)             :: m,n,k
      integer(kind=C_INT), intent(in) :: lda,ldb,ldc
747
      complex(kind=C_DOUBLE_COMPLEX)          :: alpha,beta
748 749 750 751 752 753
      integer(kind=C_intptr_T)        :: a, b, c
#ifdef WITH_GPU_VERSION
      call cublas_zgemm_c(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
#endif
    end subroutine cublas_zgemm

754 755 756 757 758
    subroutine cublas_cgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)

      use iso_c_binding

      implicit none
759
      character(1,C_CHAR),value       :: cta, ctb
760 761
      integer(kind=C_INT)             :: m,n,k
      integer(kind=C_INT), intent(in) :: lda,ldb,ldc
762
      complex(kind=C_FLOAT_COMPLEX)           :: alpha,beta
763 764 765 766 767 768
      integer(kind=C_intptr_T)        :: a, b, c
#ifdef WITH_GPU_VERSION
      call cublas_cgemm_c(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
#endif
    end subroutine cublas_cgemm

769 770 771 772 773
    subroutine cublas_ztrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)

      use iso_c_binding

      implicit none
774
      character(1,C_CHAR),value       :: side, uplo, trans, diag
775 776
      integer(kind=C_INT)             :: m,n
      integer(kind=C_INT), intent(in) :: lda,ldb
777
      complex(kind=C_DOUBLE_COMPLEX)          :: alpha
778 779 780 781 782 783
      integer(kind=C_intptr_T)        :: a, b
#ifdef WITH_GPU_VERSION
      call cublas_ztrmm_c(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
#endif
    end subroutine cublas_ztrmm

784 785 786 787 788
    subroutine cublas_ctrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)

      use iso_c_binding

      implicit none
789
      character(1,C_CHAR),value       :: side, uplo, trans, diag
790 791
      integer(kind=C_INT)             :: m,n
      integer(kind=C_INT), intent(in) :: lda,ldb
792
      complex(kind=C_FLOAT_COMPLEX)           :: alpha
793 794 795 796 797
      integer(kind=C_intptr_T)        :: a, b
#ifdef WITH_GPU_VERSION
      call cublas_ctrmm_c(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
#endif
    end subroutine cublas_ctrmm
798

799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826
    subroutine cublas_dgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
      use iso_c_binding

      implicit none
      character(1,C_CHAR),value       :: cta
      integer(kind=C_INT)             :: m,n
      integer(kind=C_INT), intent(in) :: lda,incx,incy
      real(kind=C_DOUBLE)             :: alpha,beta
      integer(kind=C_intptr_T)        :: a, x, y
#ifdef WITH_GPU_VERSION
      call cublas_dgemv_c(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
#endif
    end subroutine cublas_dgemv

    subroutine cublas_sgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
      use iso_c_binding

      implicit none
      character(1,C_CHAR),value       :: cta
      integer(kind=C_INT)             :: m,n
      integer(kind=C_INT), intent(in) :: lda,incx,incy
      real(kind=C_FLOAT)              :: alpha,beta
      integer(kind=C_intptr_T)        :: a, x, y
#ifdef WITH_GPU_VERSION
      call cublas_sgemv_c(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
#endif
    end subroutine cublas_sgemv

827 828 829 830 831 832 833
    subroutine cublas_zgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
      use iso_c_binding

      implicit none
      character(1,C_CHAR),value       :: cta
      integer(kind=C_INT)             :: m,n
      integer(kind=C_INT), intent(in) :: lda,incx,incy
834
      complex(kind=C_DOUBLE_COMPLEX)             :: alpha,beta
835 836 837 838 839 840 841 842 843 844 845 846 847
      integer(kind=C_intptr_T)        :: a, x, y
#ifdef WITH_GPU_VERSION
      call cublas_zgemv_c(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
#endif
    end subroutine cublas_zgemv

    subroutine cublas_cgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
      use iso_c_binding

      implicit none
      character(1,C_CHAR),value       :: cta
      integer(kind=C_INT)             :: m,n
      integer(kind=C_INT), intent(in) :: lda,incx,incy
848
      complex(kind=C_FLOAT_COMPLEX)              :: alpha,beta
849 850 851 852 853 854
      integer(kind=C_intptr_T)        :: a, x, y
#ifdef WITH_GPU_VERSION
      call cublas_cgemv_c(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
#endif
    end subroutine cublas_cgemv

855

Pavel Kus's avatar
Pavel Kus committed
856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890
    subroutine cublas_dsymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
      use iso_c_binding

      implicit none
      character(1,C_CHAR),value       :: cta
      integer(kind=C_INT)             :: n
      integer(kind=C_INT), intent(in) :: lda,incx,incy
      real(kind=C_DOUBLE)             :: alpha,beta
      integer(kind=C_intptr_T)        :: a, x, y
#ifdef WITH_GPU_VERSION
      call cublas_dsymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
#endif
    end subroutine cublas_dsymv

    subroutine cublas_ssymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
      use iso_c_binding

      implicit none
      character(1,C_CHAR),value       :: cta
      integer(kind=C_INT)             :: n
      integer(kind=C_INT), intent(in) :: lda,incx,incy
      real(kind=C_FLOAT)              :: alpha,beta
      integer(kind=C_intptr_T)        :: a, x, y
#ifdef WITH_GPU_VERSION
      call cublas_ssymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
#endif
    end subroutine cublas_ssymv

    subroutine cublas_zsymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
      use iso_c_binding

      implicit none
      character(1,C_CHAR),value       :: cta
      integer(kind=C_INT)             :: n
      integer(kind=C_INT), intent(in) :: lda,incx,incy
891
      complex(kind=C_DOUBLE_COMPLEX)             :: alpha,beta
Pavel Kus's avatar
Pavel Kus committed
892 893 894 895 896 897 898 899 900 901 902 903 904
      integer(kind=C_intptr_T)        :: a, x, y
#ifdef WITH_GPU_VERSION
!       call cublas_zsymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
#endif
    end subroutine cublas_zsymv

    subroutine cublas_csymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
      use iso_c_binding

      implicit none
      character(1,C_CHAR),value       :: cta
      integer(kind=C_INT)             :: n
      integer(kind=C_INT), intent(in) :: lda,incx,incy
905
      complex(kind=C_FLOAT_COMPLEX)              :: alpha,beta
Pavel Kus's avatar
Pavel Kus committed
906 907 908 909 910 911 912
      integer(kind=C_intptr_T)        :: a, x, y
#ifdef WITH_GPU_VERSION
!       call cublas_csymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
#endif
    end subroutine cublas_csymv


913
end module cuda_functions