Commit bfaa9da5 authored by Pavel Kus's avatar Pavel Kus
Browse files

cublas interface changed to v2

parent 19861fa9
......@@ -113,6 +113,13 @@ module mod_check_for_gpu
if (wantDebugMessage) then
print '(3(a,i0))', 'MPI rank ', myid, ' uses GPU #', deviceNumber
endif
success = cublas_create(cublasHandle)
if (.not.(success)) then
print *,"Cannot create cublas handle"
stop 1
endif
endif
end function
......
......@@ -57,7 +57,7 @@
#include <alloca.h>
#include <stdint.h>
#include <complex.h>
#include <cublas.h>
#include <cublas_v2.h>
#include "config-f90.h"
......@@ -71,6 +71,46 @@
#ifdef WITH_GPU_VERSION
extern "C" {
int cublasCreateFromC(intptr_t **cublas_handle) {
// printf("in c: %p\n", *cublas_handle);
*cublas_handle = (intptr_t*) malloc(sizeof(cublasHandle_t));
// printf("in c: %p\n", *cublas_handle);
cublasStatus_t status = cublasCreate((cublasHandle_t*) *cublas_handle);
if (status == CUBLAS_STATUS_SUCCESS) {
// printf("all OK\n");
return 1;
}
else if (status == CUBLAS_STATUS_NOT_INITIALIZED) {
errormessage("Error in cublasCreate: %s\n", "the CUDA Runtime initialization failed");
return 0;
}
else if (status == CUBLAS_STATUS_ALLOC_FAILED) {
errormessage("Error in cublasCreate: %s\n", "the resources could not be allocated");
return 0;
}
else{
errormessage("Error in cublasCreate: %s\n", "unknown error");
return 0;
}
}
int cublasDestroyFromC(intptr_t *cublas_handle) {
cublasStatus_t status = cublasDestroy(*((cublasHandle_t*) *cublas_handle));
*cublas_handle = (intptr_t) NULL;
if (status == CUBLAS_STATUS_SUCCESS) {
// printf("all OK\n");
return 1;
}
else if (status == CUBLAS_STATUS_NOT_INITIALIZED) {
errormessage("Error in cublasDestroy: %s\n", "the library has not been initialized");
return 0;
}
else{
errormessage("Error in cublasCreate: %s\n", "unknown error");
return 0;
}
}
int cudaThreadSynchronizeFromC() {
cudaError_t cuerr = cudaThreadSynchronize();
......@@ -188,7 +228,85 @@ extern "C" {
return val;
}
void cublasZgemv_elpa_wrapper (char trans, int m, int n, double complex alpha,
cublasOperation_t operation_new_api(char trans) {
if (trans == 'N' || trans == 'n') {
return CUBLAS_OP_N;
}
else if (trans == 'T' || trans == 't') {
return CUBLAS_OP_T;
}
else if (trans == 'C' || trans == 'c') {
return CUBLAS_OP_C;
}
else {
errormessage("Error when transfering %c to cublasOperation_t\n",trans);
// or abort?
return CUBLAS_OP_N;
}
}
cublasFillMode_t fill_mode_new_api(char uplo) {
if (uplo == 'L' || uplo == 'l') {
return CUBLAS_FILL_MODE_LOWER;
}
else if(uplo == 'U' || uplo == 'u') {
return CUBLAS_FILL_MODE_UPPER;
}
else {
errormessage("Error when transfering %c to cublasFillMode_t\n", uplo);
// or abort?
return CUBLAS_FILL_MODE_LOWER;
}
}
cublasSideMode_t side_mode_new_api(char side) {
if (side == 'L' || side == 'l') {
return CUBLAS_SIDE_LEFT;
}
else if (side == 'R' || side == 'r') {
return CUBLAS_SIDE_RIGHT;
}
else{
errormessage("Error when transfering %c to cublasSideMode_t\n", side);
// or abort?
return CUBLAS_SIDE_LEFT;
}
}
cublasDiagType_t diag_type_new_api(char diag) {
if (diag == 'N' || diag == 'n') {
return CUBLAS_DIAG_NON_UNIT;
}
else if (diag == 'U' || diag == 'u') {
return CUBLAS_DIAG_UNIT;
}
else {
errormessage("Error when transfering %c to cublasDiagMode_t\n", diag);
// or abort?
return CUBLAS_DIAG_NON_UNIT;
}
}
void cublasDgemv_elpa_wrapper (intptr_t handle, char trans, int m, int n, double alpha,
const double *A, int lda, const double *x, int incx,
double beta, double *y, int incy) {
cublasDgemv(*((cublasHandle_t*)handle), operation_new_api(trans),
m, n, &alpha, A, lda, x, incx, &beta, y, incy);
}
void cublasSgemv_elpa_wrapper (intptr_t handle, char trans, int m, int n, float alpha,
const float *A, int lda, const float *x, int incx,
float beta, float *y, int incy) {
cublasSgemv(*((cublasHandle_t*)handle), operation_new_api(trans),
m, n, &alpha, A, lda, x, incx, &beta, y, incy);
}
void cublasZgemv_elpa_wrapper (intptr_t handle, char trans, int m, int n, double complex alpha,
const double complex *A, int lda, const double complex *x, int incx,
double complex beta, double complex *y, int incy) {
......@@ -199,10 +317,11 @@ extern "C" {
const cuDoubleComplex* x_casted = (const cuDoubleComplex*) x;
cuDoubleComplex* y_casted = (cuDoubleComplex*) y;
cublasZgemv(trans, m, n, alpha_casted, A_casted, lda, x_casted, incx, beta_casted, y_casted, incy);
cublasZgemv(*((cublasHandle_t*)handle), operation_new_api(trans),
m, n, &alpha_casted, A_casted, lda, x_casted, incx, &beta_casted, y_casted, incy);
}
void cublasCgemv_elpa_wrapper (char trans, int m, int n, float complex alpha,
void cublasCgemv_elpa_wrapper (intptr_t handle, char trans, int m, int n, float complex alpha,
const float complex *A, int lda, const float complex *x, int incx,
float complex beta, float complex *y, int incy) {
......@@ -213,10 +332,30 @@ extern "C" {
const cuFloatComplex* x_casted = (const cuFloatComplex*) x;
cuFloatComplex* y_casted = (cuFloatComplex*) y;
cublasCgemv(trans, m, n, alpha_casted, A_casted, lda, x_casted, incx, beta_casted, y_casted, incy);
cublasCgemv(*((cublasHandle_t*)handle), operation_new_api(trans),
m, n, &alpha_casted, A_casted, lda, x_casted, incx, &beta_casted, y_casted, incy);
}
void cublasZgemm_elpa_wrapper (char transa, char transb, int m, int n, int k,
void cublasDgemm_elpa_wrapper (intptr_t handle, char transa, char transb, int m, int n, int k,
double alpha, const double *A, int lda,
const double *B, int ldb, double beta,
double *C, int ldc) {
cublasDgemm(*((cublasHandle_t*)handle), operation_new_api(transa), operation_new_api(transb),
m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
}
void cublasSgemm_elpa_wrapper (intptr_t handle, char transa, char transb, int m, int n, int k,
float alpha, const float *A, int lda,
const float *B, int ldb, float beta,
float *C, int ldc) {
cublasSgemm(*((cublasHandle_t*)handle), operation_new_api(transa), operation_new_api(transb),
m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
}
void cublasZgemm_elpa_wrapper (intptr_t handle, char transa, char transb, int m, int n, int k,
double complex alpha, const double complex *A, int lda,
const double complex *B, int ldb, double complex beta,
double complex *C, int ldc) {
......@@ -228,10 +367,11 @@ extern "C" {
const cuDoubleComplex* B_casted = (const cuDoubleComplex*) B;
cuDoubleComplex* C_casted = (cuDoubleComplex*) C;
cublasZgemm(transa, transb, m, n, k, alpha_casted, A_casted, lda, B_casted, ldb, beta_casted, C_casted, ldc);
cublasZgemm(*((cublasHandle_t*)handle), operation_new_api(transa), operation_new_api(transb),
m, n, k, &alpha_casted, A_casted, lda, B_casted, ldb, &beta_casted, C_casted, ldc);
}
void cublasCgemm_elpa_wrapper (char transa, char transb, int m, int n, int k,
void cublasCgemm_elpa_wrapper (intptr_t handle, char transa, char transb, int m, int n, int k,
float complex alpha, const float complex *A, int lda,
const float complex *B, int ldb, float complex beta,
float complex *C, int ldc) {
......@@ -243,10 +383,32 @@ extern "C" {
const cuFloatComplex* B_casted = (const cuFloatComplex*) B;
cuFloatComplex* C_casted = (cuFloatComplex*) C;
cublasCgemm(transa, transb, m, n, k, alpha_casted, A_casted, lda, B_casted, ldb, beta_casted, C_casted, ldc);
cublasCgemm(*((cublasHandle_t*)handle), operation_new_api(transa), operation_new_api(transb),
m, n, k, &alpha_casted, A_casted, lda, B_casted, ldb, &beta_casted, C_casted, ldc);
}
// todo: new CUBLAS API diverged from standard BLAS api for these functions
// todo: it provides out-of-place (and apparently more efficient) implementation
// todo: by passing B twice (in place of C as well), we should fall back to in-place algorithm
void cublasDtrmm_elpa_wrapper (intptr_t handle, char side, char uplo, char transa, char diag,
int m, int n, double alpha, const double *A,
int lda, double *B, int ldb){
cublasDtrmm(*((cublasHandle_t*)handle), side_mode_new_api(side), fill_mode_new_api(uplo), operation_new_api(transa),
diag_type_new_api(diag), m, n, &alpha, A, lda, B, ldb, B, ldb);
}
void cublasStrmm_elpa_wrapper (intptr_t handle, char side, char uplo, char transa, char diag,
int m, int n, float alpha, const float *A,
int lda, float *B, int ldb){
cublasStrmm(*((cublasHandle_t*)handle), side_mode_new_api(side), fill_mode_new_api(uplo), operation_new_api(transa),
diag_type_new_api(diag), m, n, &alpha, A, lda, B, ldb, B, ldb);
}
void cublasZtrmm_elpa_wrapper (char side, char uplo, char transa, char diag,
void cublasZtrmm_elpa_wrapper (intptr_t handle, char side, char uplo, char transa, char diag,
int m, int n, double complex alpha, const double complex *A,
int lda, double complex *B, int ldb){
......@@ -255,10 +417,11 @@ extern "C" {
const cuDoubleComplex* A_casted = (const cuDoubleComplex*) A;
cuDoubleComplex* B_casted = (cuDoubleComplex*) B;
cublasZtrmm(side, uplo, transa, diag, m, n, alpha_casted, A_casted, lda, B_casted, ldb);
cublasZtrmm(*((cublasHandle_t*)handle), side_mode_new_api(side), fill_mode_new_api(uplo), operation_new_api(transa),
diag_type_new_api(diag), m, n, &alpha_casted, A_casted, lda, B_casted, ldb, B_casted, ldb);
}
void cublasCtrmm_elpa_wrapper (char side, char uplo, char transa, char diag,
void cublasCtrmm_elpa_wrapper (intptr_t handle, char side, char uplo, char transa, char diag,
int m, int n, float complex alpha, const float complex *A,
int lda, float complex *B, int ldb){
......@@ -267,7 +430,8 @@ extern "C" {
const cuFloatComplex* A_casted = (const cuFloatComplex*) A;
cuFloatComplex* B_casted = (cuFloatComplex*) B;
cublasCtrmm(side, uplo, transa, diag, m, n, alpha_casted, A_casted, lda, B_casted, ldb);
cublasCtrmm(*((cublasHandle_t*)handle), side_mode_new_api(side), fill_mode_new_api(uplo), operation_new_api(transa),
diag_type_new_api(diag), m, n, &alpha_casted, A_casted, lda, B_casted, ldb, B_casted, ldb);
}
......
......@@ -54,6 +54,8 @@ module cuda_functions
integer(kind=ik) :: cudaHostRegisterPortable
integer(kind=ik) :: cudaHostRegisterMapped
integer(kind=ik) :: cudaMemcpyDeviceToDevice
integer(kind=C_intptr_T) :: cublasHandle
integer(kind=c_intptr_t), parameter :: size_of_double_real = 8_rk8
#ifdef WANT_SINGLE_PRECISION_REAL
......@@ -66,6 +68,25 @@ module cuda_functions
#endif
! functions to set and query the CUDA devices
interface
function cublas_create_c(handle) result(istat) &
bind(C, name="cublasCreateFromC")
use iso_c_binding
implicit none
integer(kind=C_intptr_T) :: handle
integer(kind=C_INT) :: istat
end function cublas_create_c
end interface
interface
function cublas_destroy_c(handle) result(istat) &
bind(C, name="cublasDestroyFromC")
use iso_c_binding
implicit none
integer(kind=C_intptr_T) :: handle
integer(kind=C_INT) :: istat
end function cublas_destroy_c
end interface
interface
function cuda_threadsynchronize_c() result(istat) &
......@@ -239,7 +260,8 @@ module cuda_functions
! cuBLAS
interface
subroutine cublas_dgemm_c(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) bind(C,name='cublasDgemm')
subroutine cublas_dgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) &
bind(C,name='cublasDgemm_elpa_wrapper')
use iso_c_binding
......@@ -249,11 +271,14 @@ module cuda_functions
integer(kind=C_INT), intent(in), value :: lda,ldb,ldc
real(kind=C_DOUBLE),value :: alpha,beta
integer(kind=C_intptr_T), value :: a, b, c
integer(kind=C_intptr_T), value :: handle
end subroutine cublas_dgemm_c
end interface
interface
subroutine cublas_sgemm_c(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) bind(C,name='cublasSgemm')
subroutine cublas_sgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) &
bind(C,name='cublasSgemm_elpa_wrapper')
use iso_c_binding
......@@ -263,11 +288,14 @@ module cuda_functions
integer(kind=C_INT), intent(in), value :: lda,ldb,ldc
real(kind=C_FLOAT),value :: alpha,beta
integer(kind=C_intptr_T), value :: a, b, c
integer(kind=C_intptr_T), value :: handle
end subroutine cublas_sgemm_c
end interface
interface
subroutine cublas_dtrmm_c(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) bind(C,name='cublasDtrmm')
subroutine cublas_dtrmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
bind(C,name='cublasDtrmm_elpa_wrapper')
use iso_c_binding
......@@ -277,11 +305,14 @@ module cuda_functions
integer(kind=C_INT), intent(in), value :: lda,ldb
real(kind=C_DOUBLE), value :: alpha
integer(kind=C_intptr_T), value :: a, b
integer(kind=C_intptr_T), value :: handle
end subroutine cublas_dtrmm_c
end interface
interface
subroutine cublas_strmm_c(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) bind(C,name='cublasStrmm')
subroutine cublas_strmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
bind(C,name='cublasStrmm_elpa_wrapper')
use iso_c_binding
......@@ -291,11 +322,14 @@ module cuda_functions
integer(kind=C_INT), intent(in), value :: lda,ldb
real(kind=C_FLOAT), value :: alpha
integer(kind=C_intptr_T), value :: a, b
integer(kind=C_intptr_T), value :: handle
end subroutine cublas_strmm_c
end interface
interface
subroutine cublas_zgemm_c(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) bind(C,name='cublasZgemm_elpa_wrapper')
subroutine cublas_zgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) &
bind(C,name='cublasZgemm_elpa_wrapper')
use iso_c_binding
......@@ -305,12 +339,14 @@ module cuda_functions
integer(kind=C_INT), intent(in), value :: lda,ldb,ldc
complex(kind=C_DOUBLE_COMPLEX),value :: alpha,beta
integer(kind=C_intptr_T), value :: a, b, c
integer(kind=C_intptr_T), value :: handle
end subroutine cublas_zgemm_c
end interface
interface
subroutine cublas_cgemm_c(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) bind(C,name='cublasCgemm_elpa_wrapper')
subroutine cublas_cgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) &
bind(C,name='cublasCgemm_elpa_wrapper')
use iso_c_binding
......@@ -320,12 +356,14 @@ module cuda_functions
integer(kind=C_INT), intent(in), value :: lda,ldb,ldc
complex(kind=C_FLOAT_COMPLEX),value :: alpha,beta
integer(kind=C_intptr_T), value :: a, b, c
integer(kind=C_intptr_T), value :: handle
end subroutine cublas_cgemm_c
end interface
interface
subroutine cublas_ztrmm_c(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) bind(C,name='cublasZtrmm_elpa_wrapper')
subroutine cublas_ztrmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
bind(C,name='cublasZtrmm_elpa_wrapper')
use iso_c_binding
......@@ -335,12 +373,14 @@ module cuda_functions
integer(kind=C_INT), intent(in), value :: lda,ldb
complex(kind=C_DOUBLE_COMPLEX), value :: alpha
integer(kind=C_intptr_T), value :: a, b
integer(kind=C_intptr_T), value :: handle
end subroutine cublas_ztrmm_c
end interface
interface
subroutine cublas_ctrmm_c(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) bind(C,name='cublasCtrmm_elpa_wrapper')
subroutine cublas_ctrmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
bind(C,name='cublasCtrmm_elpa_wrapper')
use iso_c_binding
......@@ -350,12 +390,14 @@ module cuda_functions
integer(kind=C_INT), intent(in), value :: lda,ldb
complex(kind=C_FLOAT_COMPLEX), value :: alpha
integer(kind=C_intptr_T), value :: a, b
integer(kind=C_intptr_T), value :: handle
end subroutine cublas_ctrmm_c
end interface
interface
subroutine cublas_dgemv_c(cta, m, n, alpha, a, lda, x, incx, beta, y, incy) bind(C,name='cublasDgemv')
subroutine cublas_dgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
bind(C,name='cublasDgemv_elpa_wrapper')
use iso_c_binding
......@@ -365,11 +407,14 @@ module cuda_functions
integer(kind=C_INT), intent(in), value :: lda,incx,incy
real(kind=C_DOUBLE),value :: alpha,beta
integer(kind=C_intptr_T), value :: a, x, y
integer(kind=C_intptr_T), value :: handle
end subroutine cublas_dgemv_c
end interface
interface
subroutine cublas_sgemv_c(cta, m, n, alpha, a, lda, x, incx, beta, y, incy) bind(C,name='cublasSgemv')
subroutine cublas_sgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
bind(C,name='cublasSgemv_elpa_wrapper')
use iso_c_binding
......@@ -379,11 +424,14 @@ module cuda_functions
integer(kind=C_INT), intent(in), value :: lda,incx,incy
real(kind=C_FLOAT),value :: alpha,beta
integer(kind=C_intptr_T), value :: a, x, y
integer(kind=C_intptr_T), value :: handle
end subroutine cublas_sgemv_c
end interface
interface
subroutine cublas_zgemv_c(cta, m, n, alpha, a, lda, x, incx, beta, y, incy) bind(C,name='cublasZgemv_elpa_wrapper')
subroutine cublas_zgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
bind(C,name='cublasZgemv_elpa_wrapper')
use iso_c_binding
......@@ -393,11 +441,14 @@ module cuda_functions
integer(kind=C_INT), intent(in), value :: lda,incx,incy
complex(kind=C_DOUBLE_COMPLEX),value :: alpha,beta
integer(kind=C_intptr_T), value :: a, x, y
integer(kind=C_intptr_T), value :: handle
end subroutine cublas_zgemv_c
end interface
interface
subroutine cublas_cgemv_c(cta, m, n, alpha, a, lda, x, incx, beta, y, incy) bind(C,name='cublasCgemv_elpa_wrapper')
subroutine cublas_cgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
bind(C,name='cublasCgemv_elpa_wrapper')
use iso_c_binding
......@@ -407,71 +458,42 @@ module cuda_functions
integer(kind=C_INT), intent(in), value :: lda,incx,incy
complex(kind=C_FLOAT_COMPLEX),value :: alpha,beta
integer(kind=C_intptr_T), value :: a, x, y
end subroutine cublas_cgemv_c
end interface
interface
subroutine cublas_dsymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy) bind(C,name='cublasDsymv')
integer(kind=C_intptr_T), value :: handle
use iso_c_binding
implicit none
character(1,C_CHAR),value :: cta
integer(kind=C_INT),value :: n
integer(kind=C_INT), intent(in), value :: lda,incx,incy
real(kind=C_DOUBLE),value :: alpha,beta
integer(kind=C_intptr_T), value :: a, x, y
end subroutine cublas_dsymv_c
end subroutine cublas_cgemv_c
end interface
interface
subroutine cublas_ssymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy) bind(C,name='cublasSsymv')
use iso_c_binding
implicit none
character(1,C_CHAR),value :: cta
integer(kind=C_INT),value :: n
integer(kind=C_INT), intent(in), value :: lda,incx,incy
real(kind=C_FLOAT),value :: alpha,beta
integer(kind=C_intptr_T), value :: a, x, y
end subroutine cublas_ssymv_c
end interface
contains
! interface
! subroutine cublas_zsymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy) bind(C,name='cublasZsymv')
!
! use iso_c_binding
!
! implicit none
! character(1,C_CHAR),value :: cta
! integer(kind=C_INT),value :: n
! integer(kind=C_INT), intent(in), value :: lda,incx,incy
! complex(kind=C_DOUBLE_COMPLEX),value :: alpha,beta
! integer(kind=C_intptr_T), value :: a, x, y
! end subroutine cublas_zsymv_c
! end interface
!
! interface
! subroutine cublas_csymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy) bind(C,name='cublasCsymv')
!
! use iso_c_binding
!
! implicit none
! character(1,C_CHAR),value :: cta
! integer(kind=C_INT),value :: n
! integer(kind=C_INT), intent(in), value :: lda,incx,incy
! complex(kind=C_FLOAT_COMPLEX),value :: alpha,beta
! integer(kind=C_intptr_T), value :: a, x, y
! end subroutine cublas_csymv_c
! end interface
! functions to set and query the CUDA devices
function cublas_create(handle) result(success)
use iso_c_binding
implicit none
contains
integer(kind=C_intptr_t) :: handle
logical :: success
#ifdef WITH_GPU_VERSION
success = cublas_create_c(handle) /= 0
#else
success = .true.
#endif
end function
! functions to set and query the CUDA devices
function cublas_destroy(handle) result(success)
use iso_c_binding
implicit none
integer(kind=C_intptr_t) :: handle
logical :: success
#ifdef WITH_GPU_VERSION
success = cublas_destroy_c(handle) /= 0
#else
success = .true.
#endif
end function
function cuda_threadsynchronize() result(success)
use iso_c_binding
......@@ -688,7 +710,7 @@ module cuda_functions
real(kind=C_DOUBLE) :: alpha,beta
integer(kind=C_intptr_T) :: a, b, c
#ifdef WITH_GPU_VERSION
call cublas_dgemm_c(c