interface_c_kernel.F90 17.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
12
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
13
14
15
16
17
18
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!
!    More information can be found here:
Andreas Marek's avatar
Andreas Marek committed
19
!    https://elpa.mpcdf.mpg.de/
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.

41
! This is a module contains all CUDA C Calls
42
43
44
45
46
47
! it was provided by NVIDIA with their ELPA GPU port and
! adapted for an ELPA release by A.Marek, RZG

#include "config-f90.h"

module cuda_c_kernel
48

49
  implicit none
50

51
  interface
52
53
    subroutine launch_compute_hh_trafo_c_kernel_real_double(q, hh, hh_tau, nev, nb, ldq, ncols) &
               bind(c)
54
55
      use iso_c_binding
      implicit none
56
57
58
      integer(kind=c_int), value :: nev, nb, ldq, ncols
      integer(kind=c_intptr_t), value :: q
      integer(c_intptr_t), value :: hh_tau ,hh
59
60
    end subroutine
  end interface
61

62
#ifdef WANT_SINGLE_PRECISION_REAL
63
  interface
64
65
    subroutine launch_compute_hh_trafo_c_kernel_real_single(q, hh, hh_tau, nev, nb, ldq, ncols) &
               bind(c)
66
67
      use iso_c_binding
      implicit none
68
69
70
      integer(kind=c_int), value :: nev, nb, ldq, ncols
      integer(kind=c_intptr_t), value :: q
      integer(c_intptr_t), value :: hh_tau ,hh
71
72
73
74
75
    end subroutine
  end interface
#endif

  interface
76
77
    subroutine launch_compute_hh_trafo_c_kernel_complex_double(q, hh, hh_tau, nev, nb, ldq, ncols) &
               bind(c)
78
79
      use iso_c_binding
      implicit none
80
      integer(kind=c_int), value :: nev, nb, ldq, ncols
81
82
      integer(kind=c_intptr_t), value :: q
      integer(kind=c_intptr_t), value :: hh_tau ,hh
83
84
85
86
87
    end subroutine
  end interface

#ifdef WANT_SINGLE_PRECISION_COMPLEX
  interface
88
89
    subroutine launch_compute_hh_trafo_c_kernel_complex_single(q, hh, hh_tau, nev, nb, ldq, ncols) &
               bind(c)
90
91
      use iso_c_binding
      implicit none
92
      integer(kind=c_int), value :: nev, nb, ldq, ncols
93
94
      integer(kind=c_intptr_t), value :: q
      integer(kind=c_intptr_t), value :: hh_tau ,hh
95
96
97
98
99
    end subroutine
  end interface
#endif

  interface
Andreas Marek's avatar
Andreas Marek committed
100
    subroutine launch_my_unpack_c_kernel_real_double(row_count, n_offset, max_idx,stripe_width, a_dim2, stripe_count, &
101
               l_nev,row_group_dev, a_dev) bind(c)
102
103
      use iso_c_binding
      implicit none
104
105
106
      integer(kind=c_int), value :: row_count
      integer(kind=c_int), value :: n_offset, max_idx,stripe_width, a_dim2, stripe_count, l_nev
      integer(kind=c_intptr_t), value :: a_dev, row_group_dev
107
108
109
    end subroutine
  end interface

110
#ifdef WANT_SINGLE_PRECISION_REAL
111
  interface
Andreas Marek's avatar
Andreas Marek committed
112
    subroutine launch_my_unpack_c_kernel_real_single(row_count, n_offset, max_idx,stripe_width, a_dim2, stripe_count, &
113
               l_nev,row_group_dev, a_dev) bind(c)
114
115
      use iso_c_binding
      implicit none
116
117
118
      integer(kind=c_int), value :: row_count
      integer(kind=c_int), value :: n_offset, max_idx,stripe_width, a_dim2, stripe_count, l_nev
      integer(kind=c_intptr_t), value :: a_dev, row_group_dev
119
120
121
122
123
    end subroutine
  end interface
#endif

  interface
Andreas Marek's avatar
Andreas Marek committed
124
    subroutine launch_my_pack_c_kernel_real_double(row_count, n_offset, max_idx,stripe_width, a_dim2, &
125
               stripe_count, l_nev, a_dev, row_group_dev) bind(c)
126
127
      use iso_c_binding
      implicit none
128
      integer(kind=c_int), value :: row_count, n_offset, max_idx, stripe_width, a_dim2, stripe_count, l_nev
129
130
131
132
133
      integer(kind=c_intptr_t), value :: a_dev
      integer(kind=c_intptr_t), value :: row_group_dev
    end subroutine
  end interface

134
#ifdef WANT_SINGLE_PRECISION_REAL
135
  interface
Andreas Marek's avatar
Andreas Marek committed
136
    subroutine launch_my_pack_c_kernel_real_single(row_count, n_offset, max_idx,stripe_width, a_dim2, stripe_count, &
137
               l_nev, a_dev, row_group_dev) bind(c)
138
139
      use iso_c_binding
      implicit none
140
      integer(kind=c_int), value :: row_count, n_offset, max_idx, stripe_width, a_dim2, stripe_count, l_nev
141
142
      integer(kind=c_intptr_t), value :: a_dev
      integer(kind=c_intptr_t), value :: row_group_dev
143
144
    end subroutine
  end interface
145
146
147
#endif

  interface
Andreas Marek's avatar
Andreas Marek committed
148
    subroutine launch_extract_hh_tau_c_kernel_real_double(hh, hh_tau, nb, n, is_zero) &
149
               bind(c)
150
151
      use iso_c_binding
      implicit none
152
153
      integer(kind=c_intptr_t), value :: hh
      integer(kind=c_intptr_t), value :: hh_tau
154
155
      integer(kind=c_int), value :: nb, n
      integer(kind=c_int), value :: is_zero
156
157
    end subroutine
  end interface
158

159
#ifdef WANT_SINGLE_PRECISION_REAL
160
  interface
Andreas Marek's avatar
Andreas Marek committed
161
    subroutine launch_extract_hh_tau_c_kernel_real_single(hh, hh_tau, nb, n, is_zero) &
162
               bind(c)
163
164
      use iso_c_binding
      implicit none
165
166
      integer(kind=c_intptr_t), value :: hh
      integer(kind=c_intptr_t), value :: hh_tau
167
168
      integer(kind=c_int), value :: nb, n
      integer(kind=c_int), value :: is_zero
169
170
    end subroutine
  end interface
171
#endif
172
173

  interface
Andreas Marek's avatar
Andreas Marek committed
174
    subroutine launch_my_unpack_c_kernel_complex_double(row_count, n_offset, max_idx, stripe_width, a_dim2, &
175
               stripe_count, l_nev, row_group_dev, a_dev) bind(c)
176
177
      use iso_c_binding
      implicit none
178
179
180
      integer(kind=c_int), value :: row_count
      integer(kind=c_int), value :: n_offset, max_idx,stripe_width, a_dim2, stripe_count,l_nev
      integer(kind=c_intptr_t), value :: a_dev, row_group_dev
181
182
183
    end subroutine
  end interface

184
185
#ifdef WANT_SINGLE_PRECISION_COMPLEX
 interface
Andreas Marek's avatar
Andreas Marek committed
186
    subroutine launch_my_unpack_c_kernel_complex_single(row_count, n_offset, max_idx, stripe_width, a_dim2, stripe_count, l_nev, &
187
               row_group_dev, a_dev) bind(c)
188
189
      use iso_c_binding
      implicit none
190
191
192
      integer(kind=c_int), value :: row_count
      integer(kind=c_int), value :: n_offset, max_idx,stripe_width, a_dim2, stripe_count,l_nev
      integer(kind=c_intptr_t), value :: a_dev, row_group_dev
193
194
195
196
    end subroutine
  end interface
#endif

197
  interface
Andreas Marek's avatar
Andreas Marek committed
198
    subroutine launch_my_pack_c_kernel_complex_double(row_count, n_offset, max_idx,stripe_width,a_dim2, &
199
               stripe_count, l_nev, a_dev, row_group_dev) bind(c)
200
201
      use iso_c_binding
      implicit none
202
      integer(kind=c_int), value :: row_count, n_offset, max_idx, stripe_width, a_dim2,stripe_count, l_nev
203
204
205
206
207
      integer(kind=c_intptr_t), value :: a_dev
      integer(kind=c_intptr_t), value :: row_group_dev
    end subroutine
  end interface

208
#ifdef WANT_SINGLE_PRECISION_COMPLEX
209
  interface
Andreas Marek's avatar
Andreas Marek committed
210
    subroutine launch_my_pack_c_kernel_complex_single(row_count, n_offset, max_idx,stripe_width,a_dim2, &
211
               stripe_count, l_nev, a_dev, row_group_dev) bind(c)
212
213
      use iso_c_binding
      implicit none
214
      integer(kind=c_int), value :: row_count, n_offset, max_idx, stripe_width, a_dim2,stripe_count, l_nev
215
216
217
218
219
220
221
      integer(kind=c_intptr_t), value :: a_dev
      integer(kind=c_intptr_t), value :: row_group_dev
    end subroutine
  end interface
#endif

  interface
Andreas Marek's avatar
Andreas Marek committed
222
    subroutine launch_extract_hh_tau_c_kernel_complex_double(hh, hh_tau, nb, n, is_zero) &
223
               bind(c)
224
225
      use iso_c_binding
      implicit none
226
227
      integer(kind=c_intptr_t), value :: hh
      integer(kind=c_intptr_t), value :: hh_tau
228
229
      integer(kind=c_int), value :: nb, n
      integer(kind=c_int), value :: is_zero
230
231
232
233
    end subroutine
  end interface

#ifdef WANT_SINGLE_PRECISION_COMPLEX
234
  interface
Andreas Marek's avatar
Andreas Marek committed
235
    subroutine launch_extract_hh_tau_c_kernel_complex_single(hh, hh_tau, nb, n, is_zero) &
236
               bind(c)
237
238
      use iso_c_binding
      implicit none
239
240
      integer(kind=c_intptr_t), value :: hh
      integer(kind=c_intptr_t), value :: hh_tau
241
242
      integer(kind=c_int), value :: nb, n
      integer(kind=c_int), value :: is_zero
243
244
    end subroutine
  end interface
245
246
#endif

247
  contains
248

249
    subroutine launch_compute_hh_trafo_gpu_kernel_real_double(q, hh, hh_tau, nev, nb, ldq, ncols)
250
251
      use iso_c_binding
      implicit none
252
253
254
      integer(kind=c_int) :: nev, nb, ldq, ncols
      integer(kind=c_intptr_t) :: q
      integer(c_intptr_t) :: hh_tau ,hh
255
#ifdef WITH_GPU_VERSION
256
      call launch_compute_hh_trafo_c_kernel_real_double(q, hh, hh_tau, nev, nb, ldq, ncols)
257
258
#endif
    end subroutine
259

260
#ifdef WANT_SINGLE_PRECISION_REAL
261
    subroutine launch_compute_hh_trafo_gpu_kernel_real_single(q, hh, hh_tau, nev, nb, ldq, ncols)
262
263
      use iso_c_binding
      implicit none
264
265
266
      integer(kind=c_int) :: nev, nb, ldq, ncols
      integer(kind=c_intptr_t) :: q
      integer(c_intptr_t) :: hh_tau ,hh
267
#ifdef WITH_GPU_VERSION
268
      call launch_compute_hh_trafo_c_kernel_real_single(q, hh, hh_tau, nev, nb, ldq, ncols)
269
270
271
272
#endif
    end subroutine
#endif

273
    subroutine launch_compute_hh_trafo_gpu_kernel_complex_double(q, hh, hh_tau, nev, nb, ldq, ncols)
274
275
      use iso_c_binding
      implicit none
276
      integer(kind=c_int) :: nev, nb, ldq, ncols
277
278
      integer(kind=c_intptr_t) :: q
      integer(kind=c_intptr_t) :: hh_tau ,hh
279
#ifdef WITH_GPU_VERSION
280
      call launch_compute_hh_trafo_c_kernel_complex_double(q, hh, hh_tau, nev, nb, ldq, ncols)
281
282
283
284
#endif
    end subroutine

#ifdef WANT_SINGLE_PRECISION_COMPLEX
285
    subroutine launch_compute_hh_trafo_gpu_kernel_complex_single(q, hh, hh_tau, nev, nb, ldq, ncols)
286
287
      use iso_c_binding
      implicit none
288
      integer(kind=c_int) :: nev, nb, ldq, ncols
289
290
      integer(kind=c_intptr_t) :: q
      integer(kind=c_intptr_t) :: hh_tau ,hh
291
#ifdef WITH_GPU_VERSION
292
      call launch_compute_hh_trafo_c_kernel_complex_single(q, hh, hh_tau, nev, nb, ldq, ncols)
293
294
#endif
    end subroutine
295
296
#endif

Andreas Marek's avatar
Andreas Marek committed
297
    subroutine launch_my_unpack_gpu_kernel_real_double(row_count, n_offset, max_idx,stripe_width, a_dim2, stripe_count, &
298
               l_nev,row_group_dev, a_dev)
299
300
      use iso_c_binding
      implicit none
301
302
303
      integer(kind=c_int) :: row_count
      integer(kind=c_int) :: n_offset, max_idx,stripe_width, a_dim2, stripe_count, l_nev
      integer(kind=c_intptr_t) :: a_dev, row_group_dev
304
#ifdef WITH_GPU_VERSION
Andreas Marek's avatar
Andreas Marek committed
305
      call launch_my_unpack_c_kernel_real_double(row_count, n_offset, max_idx,stripe_width, a_dim2, stripe_count, &
306
           l_nev,row_group_dev, a_dev)
307
308
309
#endif
    end subroutine

310
#ifdef WANT_SINGLE_PRECISION_REAL
Andreas Marek's avatar
Andreas Marek committed
311
    subroutine launch_my_unpack_gpu_kernel_real_single(row_count, n_offset, max_idx,stripe_width, a_dim2, stripe_count, &
312
               l_nev,row_group_dev, a_dev)
313
314
      use iso_c_binding
      implicit none
315
316
317
      integer(kind=c_int) :: row_count
      integer(kind=c_int) :: n_offset, max_idx,stripe_width, a_dim2, stripe_count, l_nev
      integer(kind=c_intptr_t) :: a_dev, row_group_dev
318
#ifdef WITH_GPU_VERSION
Andreas Marek's avatar
Andreas Marek committed
319
      call launch_my_unpack_c_kernel_real_single(row_count, n_offset, max_idx,stripe_width, a_dim2, stripe_count, &
320
           l_nev,row_group_dev, a_dev)
321
322
323
324
#endif
    end subroutine
#endif

Andreas Marek's avatar
Andreas Marek committed
325
    subroutine launch_my_pack_gpu_kernel_real_double(row_count, n_offset, max_idx,stripe_width, a_dim2, &
326
               stripe_count, l_nev, a_dev, row_group_dev)
327
328
      use iso_c_binding
      implicit none
329
      integer(kind=c_int) :: row_count, n_offset, max_idx, stripe_width, a_dim2, stripe_count, l_nev
330
331
332
      integer(kind=c_intptr_t) :: a_dev
      integer(kind=c_intptr_t) :: row_group_dev
#ifdef WITH_GPU_VERSION
Andreas Marek's avatar
Andreas Marek committed
333
      call launch_my_pack_c_kernel_real_double(row_count, n_offset, max_idx,stripe_width, a_dim2, stripe_count, l_nev, a_dev, &
334
           row_group_dev)
335
336
337
#endif
    end subroutine

338
#ifdef WANT_SINGLE_PRECISION_REAL
Andreas Marek's avatar
Andreas Marek committed
339
    subroutine launch_my_pack_gpu_kernel_real_single(row_count, n_offset, max_idx,stripe_width, &
340
               a_dim2, stripe_count, l_nev, a_dev, row_group_dev)
341
342
      use iso_c_binding
      implicit none
343
      integer(kind=c_int) :: row_count, n_offset, max_idx, stripe_width, a_dim2, stripe_count, l_nev
344
345
346
      integer(kind=c_intptr_t) :: a_dev
      integer(kind=c_intptr_t) :: row_group_dev
#ifdef WITH_GPU_VERSION
Andreas Marek's avatar
Andreas Marek committed
347
      call launch_my_pack_c_kernel_real_single(row_count, n_offset, max_idx,stripe_width, a_dim2, stripe_count, l_nev, a_dev, &
348
           row_group_dev)
349
350
351
352
#endif
    end subroutine
#endif

Andreas Marek's avatar
Andreas Marek committed
353
    subroutine launch_extract_hh_tau_gpu_kernel_real_double(hh, hh_tau, nb, n, is_zero)
354
355
      use iso_c_binding
      implicit none
356
357
      integer(kind=c_intptr_t) :: hh
      integer(kind=c_intptr_t) :: hh_tau
358
359
      integer(kind=c_int) :: nb, n
      integer(kind=c_int) :: is_zero
360
#ifdef WITH_GPU_VERSION
Andreas Marek's avatar
Andreas Marek committed
361
      call launch_extract_hh_tau_c_kernel_real_double(hh, hh_tau, nb, n, is_zero)
362
363
364
#endif
    end subroutine

365
#ifdef WANT_SINGLE_PRECISION_REAL
Andreas Marek's avatar
Andreas Marek committed
366
    subroutine launch_extract_hh_tau_gpu_kernel_real_single(hh, hh_tau, nb, n, is_zero)
367
368
      use iso_c_binding
      implicit none
369
370
      integer(kind=c_intptr_t) :: hh
      integer(kind=c_intptr_t) :: hh_tau
371
372
      integer(kind=c_int) :: nb, n
      integer(kind=c_int) :: is_zero
373
#ifdef WITH_GPU_VERSION
Andreas Marek's avatar
Andreas Marek committed
374
      call launch_extract_hh_tau_c_kernel_real_single(hh, hh_tau, nb, n, is_zero)
375
376
#endif
    end subroutine
377
378
#endif

Andreas Marek's avatar
Andreas Marek committed
379
    subroutine launch_my_unpack_gpu_kernel_complex_double(row_count, n_offset, max_idx, stripe_width, &
380
               a_dim2, stripe_count, l_nev, row_group_dev, a_dev)
381
382
      use iso_c_binding
      implicit none
383
384
385
      integer(kind=c_int) :: row_count
      integer(kind=c_int) :: n_offset, max_idx,stripe_width, a_dim2, stripe_count,l_nev
      integer(kind=c_intptr_t) :: a_dev, row_group_dev
386
#ifdef WITH_GPU_VERSION
Andreas Marek's avatar
Andreas Marek committed
387
      call launch_my_unpack_c_kernel_complex_double(row_count, n_offset, max_idx, stripe_width, a_dim2, stripe_count, l_nev, &
388
           row_group_dev, a_dev)
389
390
391
#endif
    end subroutine

392
#ifdef WANT_SINGLE_PRECISION_COMPLEX
Andreas Marek's avatar
Andreas Marek committed
393
    subroutine launch_my_unpack_gpu_kernel_complex_single(row_count, n_offset, max_idx, stripe_width, &
394
               a_dim2, stripe_count, l_nev, row_group_dev, a_dev)
395
396
      use iso_c_binding
      implicit none
397
398
399
      integer(kind=c_int) :: row_count
      integer(kind=c_int) :: n_offset, max_idx,stripe_width, a_dim2, stripe_count,l_nev
      integer(kind=c_intptr_t) :: a_dev, row_group_dev
400
#ifdef WITH_GPU_VERSION
Andreas Marek's avatar
Andreas Marek committed
401
      call launch_my_unpack_c_kernel_complex_single(row_count, n_offset, max_idx, stripe_width, a_dim2, stripe_count, l_nev, &
402
           row_group_dev, a_dev)
403
404
405
406
#endif
    end subroutine
#endif

Andreas Marek's avatar
Andreas Marek committed
407
    subroutine launch_my_pack_gpu_kernel_complex_double(row_count, n_offset, max_idx,stripe_width,a_dim2, &
408
               stripe_count, l_nev, a_dev, row_group_dev)
409
410
      use iso_c_binding
      implicit none
411
      integer(kind=c_int) :: row_count, n_offset, max_idx, stripe_width, a_dim2,stripe_count, l_nev
412
413
414
      integer(kind=c_intptr_t) :: a_dev
      integer(kind=c_intptr_t) :: row_group_dev
#ifdef WITH_GPU_VERSION
Andreas Marek's avatar
Andreas Marek committed
415
      call launch_my_pack_c_kernel_complex_double(row_count, n_offset, max_idx,stripe_width,a_dim2, stripe_count, l_nev, a_dev, &
416
           row_group_dev)
417
418
419
#endif
    end subroutine

420
#ifdef WANT_SINGLE_PRECISION_COMPLEX
Andreas Marek's avatar
Andreas Marek committed
421
    subroutine launch_my_pack_gpu_kernel_complex_single(row_count, n_offset, max_idx,stripe_width,a_dim2, &
422
               stripe_count, l_nev, a_dev, row_group_dev)
423
424
      use iso_c_binding
      implicit none
425
      integer(kind=c_int) :: row_count, n_offset, max_idx, stripe_width, a_dim2,stripe_count, l_nev
426
427
428
      integer(kind=c_intptr_t) :: a_dev
      integer(kind=c_intptr_t) :: row_group_dev
#ifdef WITH_GPU_VERSION
429
430
      call launch_my_pack_c_kernel_complex_single(row_count, n_offset, max_idx,stripe_width,a_dim2, stripe_count, l_nev, a_dev, &
           row_group_dev)
431
432
#endif
    end subroutine
433
#endif
434

Andreas Marek's avatar
Andreas Marek committed
435
    subroutine launch_extract_hh_tau_gpu_kernel_complex_double(hh, hh_tau, nb, n, is_zero)
436
437
      use iso_c_binding
      implicit none
438
439
      integer(kind=c_intptr_t) :: hh
      integer(kind=c_intptr_t) :: hh_tau
440
441
      integer(kind=c_int) :: nb, n
      integer(kind=c_int) :: is_zero
442
#ifdef WITH_GPU_VERSION
Andreas Marek's avatar
Andreas Marek committed
443
      call launch_extract_hh_tau_c_kernel_complex_double(hh, hh_tau, nb, n, is_zero)
444
445
446
#endif
    end subroutine

447
#ifdef WANT_SINGLE_PRECISION_COMPLEX
Andreas Marek's avatar
Andreas Marek committed
448
    subroutine launch_extract_hh_tau_gpu_kernel_complex_single(hh, hh_tau, nb, n, is_zero)
449
450
      use iso_c_binding
      implicit none
451
452
      integer(kind=c_intptr_t) :: hh
      integer(kind=c_intptr_t) :: hh_tau
453
454
      integer(kind=c_int) :: nb, n
      integer(kind=c_int) :: is_zero
455
#ifdef WITH_GPU_VERSION
Andreas Marek's avatar
Andreas Marek committed
456
      call launch_extract_hh_tau_c_kernel_complex_single(hh, hh_tau, nb, n, is_zero)
457
458
459
#endif
    end subroutine
#endif
460

461
end module cuda_c_kernel