cudaFunctions.cu 16 KB
Newer Older
Pavel Kus's avatar
Pavel Kus committed
1
#define DEBUG_CUDA 1
2
3
4
#include <stdio.h>
#include <math.h>
#include <stdio.h>
5
6
7
8
9
10
11
12
13
14
15
16
//    This file is part of ELPA.
//
//    The ELPA library was originally created by the ELPA consortium,
//    consisting of the following organizations:
//
//    - Max Planck Computing and Data Facility (MPCDF), formerly known as
//      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
//    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
//      Informatik,
//    - Technische Universität München, Lehrstuhl für Informatik mit
//      Schwerpunkt Wissenschaftliches Rechnen ,
//    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
17
//    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
//      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
//      and
//    - IBM Deutschland GmbH
//
//    This particular source code file contains additions, changes and
//    enhancements authored by Intel Corporation which is not part of
//    the ELPA consortium.
//
//    More information can be found here:
//    http://elpa.mpcdf.mpg.de/
//
//    ELPA is free software: you can redistribute it and/or modify
//    it under the terms of the version 3 of the license of the
//    GNU Lesser General Public License as published by the Free
//    Software Foundation.
//
//    ELPA is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU Lesser General Public License for more details.
//
//    You should have received a copy of the GNU Lesser General Public License
//    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
//
//    ELPA reflects a substantial effort on the part of the original
//    ELPA consortium, and we ask you to respect the spirit of the
//    license that we chose: i.e., please contribute any changes you
//    may have back to the original ELPA library distribution, and keep
//    any derivatives of ELPA under the same license that we chose for
//    the original distribution, the GNU Lesser General Public License.
//
//
// --------------------------------------------------------------------------------------------------
//
// This file was written by A. Marek, MPCDF


55
56
57
58
59
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <alloca.h>
#include <stdint.h>
60
#include <complex.h>
Pavel Kus's avatar
Pavel Kus committed
61
#include <cublas_v2.h>
62
63
64
65
66
67
68
69
70
71
72
73
74

#include "config-f90.h"

#define errormessage(x, ...) do { fprintf(stderr, "%s:%d " x, __FILE__, __LINE__, __VA_ARGS__ ); } while (0)

#ifdef DEBUG_CUDA
#define debugmessage(x, ...) do { fprintf(stderr, "%s:%d " x, __FILE__, __LINE__, __VA_ARGS__ ); } while (0)
#else
#define debugmessage(x, ...)
#endif

#ifdef WITH_GPU_VERSION
extern "C" {
Pavel Kus's avatar
Pavel Kus committed
75
  
76
    int cublasCreateFromC(intptr_t *cublas_handle) {
Pavel Kus's avatar
Pavel Kus committed
77
//     printf("in c: %p\n", *cublas_handle);
78
    *cublas_handle = (intptr_t) malloc(sizeof(cublasHandle_t));
Pavel Kus's avatar
Pavel Kus committed
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
//     printf("in c: %p\n", *cublas_handle);
    cublasStatus_t status = cublasCreate((cublasHandle_t*) *cublas_handle);
    if (status == CUBLAS_STATUS_SUCCESS) {
//       printf("all OK\n");
      return 1;
    }
    else if (status == CUBLAS_STATUS_NOT_INITIALIZED) {
      errormessage("Error in cublasCreate: %s\n", "the CUDA Runtime initialization failed");
      return 0;      
    }       
    else if (status == CUBLAS_STATUS_ALLOC_FAILED) {
      errormessage("Error in cublasCreate: %s\n", "the resources could not be allocated");
      return 0;
    }
    else{
      errormessage("Error in cublasCreate: %s\n", "unknown error");
      return 0;
    }                
  }

  int cublasDestroyFromC(intptr_t *cublas_handle) {
    cublasStatus_t status = cublasDestroy(*((cublasHandle_t*) *cublas_handle));
    *cublas_handle = (intptr_t) NULL;
    if (status == CUBLAS_STATUS_SUCCESS) {
//       printf("all OK\n");
      return 1;
    }
    else if (status == CUBLAS_STATUS_NOT_INITIALIZED) {
      errormessage("Error in cublasDestroy: %s\n", "the library has not been initialized");
      return 0;      
    }       
    else{
      errormessage("Error in cublasCreate: %s\n", "unknown error");
      return 0;
    }                
  }
115
116
117
118
119
120
121
122
123
124
125

  int cudaThreadSynchronizeFromC() {
    cudaError_t cuerr = cudaThreadSynchronize();
    if (cuerr != cudaSuccess) {
      errormessage("Error in cudaThreadSynchronize: %s\n",cudaGetErrorString(cuerr));
      return 0;
    }
    return 1;
  }


126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
  int cudaSetDeviceFromC(int n) {

    cudaError_t cuerr = cudaSetDevice(n);
    if (cuerr != cudaSuccess) {
      errormessage("Error in cudaSetDevice: %s\n",cudaGetErrorString(cuerr));
      return 0;
    }
    return 1;
  }

  int cudaGetDeviceCountFromC(int *count) {

    cudaError_t cuerr = cudaGetDeviceCount(count);
    if (cuerr != cudaSuccess) {
      errormessage("Error in cudaGetDeviceCount: %s\n",cudaGetErrorString(cuerr));
      return 0;
    }
    return 1;
  }

  int cudaDeviceSynchronizeFromC() {

    cudaError_t cuerr = cudaDeviceSynchronize();
    if (cuerr != cudaSuccess) {
      errormessage("Error in cudaGetDeviceCount: %s\n",cudaGetErrorString(cuerr));
      return 0;
    }
    return 1;
  }


  int cudaMallocFromC(intptr_t *a, size_t width_height) {

    cudaError_t cuerr = cudaMalloc((void **) a, width_height);
#ifdef DEBUG_CUDA
Pavel Kus's avatar
Pavel Kus committed
161
    printf("CUDA Malloc,  pointer address: %p, size: %d \n", *a, width_height);
162
163
164
165
166
167
168
169
170
#endif
    if (cuerr != cudaSuccess) {
      errormessage("Error in cudaMalloc: %s\n",cudaGetErrorString(cuerr));
      return 0;
    }
    return 1;
  }
  int cudaFreeFromC(intptr_t *a) {
#ifdef DEBUG_CUDA
Pavel Kus's avatar
Pavel Kus committed
171
    printf("CUDA Free, pointer address: %p \n", a);
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
#endif
    cudaError_t cuerr = cudaFree(a);

    if (cuerr != cudaSuccess) {
      errormessage("Error in cudaFree: %s\n",cudaGetErrorString(cuerr));
      return 0;
    }
    return 1;
  }

  int cudaMemsetFromC(intptr_t *a, int value, size_t count) {

    cudaError_t cuerr = cudaMemset( a, value, count);
    if (cuerr != cudaSuccess) {
      errormessage("Error in cudaMemset: %s\n",cudaGetErrorString(cuerr));
      return 0;
    }
    return 1;
  }

  int cudaMemcpyFromC(intptr_t *dest, intptr_t *src, size_t count, int dir) {

    cudaError_t cuerr = cudaMemcpy( dest, src, count, (cudaMemcpyKind)dir);
    if (cuerr != cudaSuccess) {
      errormessage("Error in cudaMemcpy: %s\n",cudaGetErrorString(cuerr));
      return 0;
    }
    return 1;
  }

  int cudaMemcpy2dFromC(intptr_t *dest, size_t dpitch, intptr_t *src, size_t spitch, size_t width, size_t height, int dir) {

    cudaError_t cuerr = cudaMemcpy2D( dest, dpitch, src, spitch, width, height, (cudaMemcpyKind)dir);
    if (cuerr != cudaSuccess) {
      errormessage("Error in cudaMemcpy2d: %s\n",cudaGetErrorString(cuerr));
      return 0;
    }
    return 1;
  }
  int cudaMemcpyDeviceToDeviceFromC(void) {
      int val = cudaMemcpyDeviceToDevice;
      return val;
  }
  int cudaMemcpyHostToDeviceFromC(void) {
      int val = cudaMemcpyHostToDevice;
      return val;
  }
  int cudaMemcpyDeviceToHostFromC(void) {
      int val = cudaMemcpyDeviceToHost;
      return val;
  }
  int cudaHostRegisterPortableFromC(void) {
      int val = cudaHostRegisterPortable;
      return val;
  }
  int cudaHostRegisterMappedFromC(void) {
      int val = cudaHostRegisterMapped;
      return val;
  }
231
  
Pavel Kus's avatar
Pavel Kus committed
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
  cublasOperation_t operation_new_api(char trans) {
    if (trans == 'N' || trans == 'n') {
      return CUBLAS_OP_N;
    }
    else if (trans == 'T' || trans == 't') {
      return CUBLAS_OP_T;
    }
    else if (trans == 'C' || trans == 'c') {
      return CUBLAS_OP_C;
    }
    else {
      errormessage("Error when transfering %c to cublasOperation_t\n",trans);
      // or abort?
      return CUBLAS_OP_N;
    }
  }


  cublasFillMode_t fill_mode_new_api(char uplo) {
    if (uplo == 'L' || uplo == 'l') {
      return CUBLAS_FILL_MODE_LOWER;
    }
    else if(uplo == 'U' || uplo == 'u') {
      return CUBLAS_FILL_MODE_UPPER;
    }
    else {
      errormessage("Error when transfering %c to cublasFillMode_t\n", uplo);
      // or abort?
      return CUBLAS_FILL_MODE_LOWER;
    }
  }
  
  cublasSideMode_t side_mode_new_api(char side) {
    if (side == 'L' || side == 'l') {
      return CUBLAS_SIDE_LEFT;
    }    
    else if (side == 'R' || side == 'r') {
      return CUBLAS_SIDE_RIGHT;
    }
    else{
      errormessage("Error when transfering %c to cublasSideMode_t\n", side);
      // or abort?
      return CUBLAS_SIDE_LEFT;
    }
  }
  
  cublasDiagType_t diag_type_new_api(char diag) {
    if (diag == 'N' || diag == 'n') {
      return CUBLAS_DIAG_NON_UNIT;
    }
    else if (diag == 'U' || diag == 'u') {
      return CUBLAS_DIAG_UNIT;
    }
    else {
      errormessage("Error when transfering %c to cublasDiagMode_t\n", diag);
      // or abort?
      return CUBLAS_DIAG_NON_UNIT;
    }
  }
      

  
  void cublasDgemv_elpa_wrapper (intptr_t handle, char trans, int m, int n, double alpha,
                               const double *A, int lda,  const double *x, int incx,
                               double beta, double *y, int incy) {    
    
    cublasDgemv(*((cublasHandle_t*)handle), operation_new_api(trans), 
                m, n, &alpha, A, lda, x, incx, &beta, y, incy);     
  }
  
  void cublasSgemv_elpa_wrapper (intptr_t handle, char trans, int m, int n, float alpha,
                               const float *A, int lda,  const float *x, int incx,
                               float beta, float *y, int incy) {    
    
    cublasSgemv(*((cublasHandle_t*)handle), operation_new_api(trans), 
                m, n, &alpha, A, lda, x, incx, &beta, y, incy);     
  }
  
310
311
312
  void cublasZgemv_elpa_wrapper (intptr_t handle, char trans, int m, int n, double _Complex alpha,
                               const double _Complex *A, int lda,  const double _Complex *x, int incx,
                               double _Complex beta, double _Complex *y, int incy) {    
313
314
315
316
317
318
319
320

    cuDoubleComplex alpha_casted = *((cuDoubleComplex*)(&alpha));
    cuDoubleComplex beta_casted = *((cuDoubleComplex*)(&beta));
    
    const cuDoubleComplex* A_casted = (const cuDoubleComplex*) A;
    const cuDoubleComplex* x_casted = (const cuDoubleComplex*) x;
    cuDoubleComplex* y_casted = (cuDoubleComplex*) y;
    
Pavel Kus's avatar
Pavel Kus committed
321
322
    cublasZgemv(*((cublasHandle_t*)handle), operation_new_api(trans), 
                m, n, &alpha_casted, A_casted, lda, x_casted, incx, &beta_casted, y_casted, incy);     
323
324
  }
  
325
326
327
  void cublasCgemv_elpa_wrapper (intptr_t handle, char trans, int m, int n, float _Complex alpha,
                               const float _Complex *A, int lda,  const float _Complex *x, int incx,
                               float _Complex beta, float _Complex *y, int incy) {    
328

329
330
    cuFloatComplex alpha_casted = *((cuFloatComplex*)(&alpha));
    cuFloatComplex beta_casted = *((cuFloatComplex*)(&beta));
331
    
332
333
334
335
    const cuFloatComplex* A_casted = (const cuFloatComplex*) A;
    const cuFloatComplex* x_casted = (const cuFloatComplex*) x;
    cuFloatComplex* y_casted = (cuFloatComplex*) y;
    
Pavel Kus's avatar
Pavel Kus committed
336
337
    cublasCgemv(*((cublasHandle_t*)handle), operation_new_api(trans), 
                m, n, &alpha_casted, A_casted, lda, x_casted, incx, &beta_casted, y_casted, incy);     
338
339
  }
  
Pavel Kus's avatar
Pavel Kus committed
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
  
  void cublasDgemm_elpa_wrapper (intptr_t handle, char transa, char transb, int m, int n, int k,
                               double alpha, const double *A, int lda,
                               const double *B, int ldb, double beta,
                               double *C, int ldc) {
    
    cublasDgemm(*((cublasHandle_t*)handle), operation_new_api(transa), operation_new_api(transb), 
                m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
  }

  void cublasSgemm_elpa_wrapper (intptr_t handle, char transa, char transb, int m, int n, int k,
                               float alpha, const float *A, int lda,
                               const float *B, int ldb, float beta,
                               float *C, int ldc) {
    
    cublasSgemm(*((cublasHandle_t*)handle), operation_new_api(transa), operation_new_api(transb), 
                m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
  }

  void cublasZgemm_elpa_wrapper (intptr_t handle, char transa, char transb, int m, int n, int k,
360
361
362
                               double _Complex alpha, const double _Complex *A, int lda,
                               const double _Complex *B, int ldb, double _Complex beta,
                               double _Complex *C, int ldc) {
363
364
365
366
367
368
369
370
    
    cuDoubleComplex alpha_casted = *((cuDoubleComplex*)(&alpha));
    cuDoubleComplex beta_casted = *((cuDoubleComplex*)(&beta));
    
    const cuDoubleComplex* A_casted = (const cuDoubleComplex*) A;
    const cuDoubleComplex* B_casted = (const cuDoubleComplex*) B;
    cuDoubleComplex* C_casted = (cuDoubleComplex*) C;
    
Pavel Kus's avatar
Pavel Kus committed
371
372
    cublasZgemm(*((cublasHandle_t*)handle), operation_new_api(transa), operation_new_api(transb), 
                m, n, k, &alpha_casted, A_casted, lda, B_casted, ldb, &beta_casted, C_casted, ldc);
373
374
  }

Pavel Kus's avatar
Pavel Kus committed
375
  void cublasCgemm_elpa_wrapper (intptr_t handle, char transa, char transb, int m, int n, int k,
376
377
378
                               float _Complex alpha, const float _Complex *A, int lda,
                               const float _Complex *B, int ldb, float _Complex beta,
                               float _Complex *C, int ldc) {
379
380
381
    
    cuFloatComplex alpha_casted = *((cuFloatComplex*)(&alpha));
    cuFloatComplex beta_casted = *((cuFloatComplex*)(&beta));
382
    
383
384
385
386
    const cuFloatComplex* A_casted = (const cuFloatComplex*) A;
    const cuFloatComplex* B_casted = (const cuFloatComplex*) B;
    cuFloatComplex* C_casted = (cuFloatComplex*) C;
    
Pavel Kus's avatar
Pavel Kus committed
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
    cublasCgemm(*((cublasHandle_t*)handle), operation_new_api(transa), operation_new_api(transb), 
                m, n, k, &alpha_casted, A_casted, lda, B_casted, ldb, &beta_casted, C_casted, ldc);
  }

  
  // todo: new CUBLAS API diverged from standard BLAS api for these functions
  // todo: it provides out-of-place (and apparently more efficient) implementation
  // todo: by passing B twice (in place of C as well), we should fall back to in-place algorithm
  
  void cublasDtrmm_elpa_wrapper (intptr_t handle, char side, char uplo, char transa, char diag,
                               int m, int n, double alpha, const double *A,
                               int lda, double *B, int ldb){

    cublasDtrmm(*((cublasHandle_t*)handle), side_mode_new_api(side), fill_mode_new_api(uplo), operation_new_api(transa), 
                diag_type_new_api(diag), m, n, &alpha, A, lda, B, ldb, B, ldb);
  }

  void cublasStrmm_elpa_wrapper (intptr_t handle, char side, char uplo, char transa, char diag,
                               int m, int n, float alpha, const float *A,
                               int lda, float *B, int ldb){

    cublasStrmm(*((cublasHandle_t*)handle), side_mode_new_api(side), fill_mode_new_api(uplo), operation_new_api(transa), 
                diag_type_new_api(diag), m, n, &alpha, A, lda, B, ldb, B, ldb);
410
411
  }

Pavel Kus's avatar
Pavel Kus committed
412
  void cublasZtrmm_elpa_wrapper (intptr_t handle, char side, char uplo, char transa, char diag,
413
414
                               int m, int n, double _Complex alpha, const double _Complex *A,
                               int lda, double _Complex *B, int ldb){
415
416
417
418
419
420

    cuDoubleComplex alpha_casted = *((cuDoubleComplex*)(&alpha));
    
    const cuDoubleComplex* A_casted = (const cuDoubleComplex*) A;
    cuDoubleComplex* B_casted = (cuDoubleComplex*) B;    
    
Pavel Kus's avatar
Pavel Kus committed
421
422
    cublasZtrmm(*((cublasHandle_t*)handle), side_mode_new_api(side), fill_mode_new_api(uplo), operation_new_api(transa), 
                diag_type_new_api(diag), m, n, &alpha_casted, A_casted, lda, B_casted, ldb, B_casted, ldb);
423
424
  }

Pavel Kus's avatar
Pavel Kus committed
425
  void cublasCtrmm_elpa_wrapper (intptr_t handle, char side, char uplo, char transa, char diag,
426
427
                               int m, int n, float _Complex alpha, const float _Complex *A,
                               int lda, float _Complex *B, int ldb){
428

429
430
431
432
    cuFloatComplex alpha_casted = *((cuFloatComplex*)(&alpha));
    
    const cuFloatComplex* A_casted = (const cuFloatComplex*) A;
    cuFloatComplex* B_casted = (cuFloatComplex*) B;    
433
    
Pavel Kus's avatar
Pavel Kus committed
434
435
    cublasCtrmm(*((cublasHandle_t*)handle), side_mode_new_api(side), fill_mode_new_api(uplo), operation_new_api(transa), 
                diag_type_new_api(diag), m, n, &alpha_casted, A_casted, lda, B_casted, ldb, B_casted, ldb);
436
437
438
  }

  
439
440
}
#endif /* WITH_GPU_VERSION */