elpa2_utilities.F90 29.3 KB
Newer Older
1
2
3
4
5
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
6
7
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
8
9
10
11
12
13
14
15
16
17
18
19
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaftrn,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!
!    More information can be found here:
20
!    http://elpa.mpcdf.mpg.de/
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".



! ELPA2 -- 2-stage solver for ELPA
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".


60

61
#include "config-f90.h"
Andreas Marek's avatar
Andreas Marek committed
62
#include <elpa/elpa_kernel_constants.h>
63
64

module ELPA2_utilities
65
  use ELPA_utilities
66
  use precision
67
68
69
70
71
72
73
74
75
  implicit none

  PRIVATE ! By default, all routines contained are private

  ! The following routines are public:

  public :: get_actual_real_kernel_name, get_actual_complex_kernel_name
  public :: REAL_ELPA_KERNEL_GENERIC, REAL_ELPA_KERNEL_GENERIC_SIMPLE, &
            REAL_ELPA_KERNEL_BGP, REAL_ELPA_KERNEL_BGQ,                &
76
77
            REAL_ELPA_KERNEL_SSE, REAL_ELPA_KERNEL_SSE_BLOCK2,         &
            REAL_ELPA_KERNEL_SSE_BLOCK4, REAL_ELPA_KERNEL_SSE_BLOCK6,  &
78
            REAL_ELPA_KERNEL_AVX_BLOCK2,                               &
79
            REAL_ELPA_KERNEL_AVX_BLOCK4, REAL_ELPA_KERNEL_AVX_BLOCK6,  &
80
            REAL_ELPA_KERNEL_AVX2_BLOCK2,                              &
81
            REAL_ELPA_KERNEL_AVX2_BLOCK4, REAL_ELPA_KERNEL_AVX2_BLOCK6, &
82
            REAL_ELPA_KERNEL_GPU
83
84
85
86


  public :: COMPLEX_ELPA_KERNEL_GENERIC, COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE, &
            COMPLEX_ELPA_KERNEL_BGP, COMPLEX_ELPA_KERNEL_BGQ,                &
87
88
            COMPLEX_ELPA_KERNEL_SSE, COMPLEX_ELPA_KERNEL_SSE_BLOCK1,         &
            COMPLEX_ELPA_KERNEL_SSE_BLOCK2,                                  &
89
            COMPLEX_ELPA_KERNEL_AVX_BLOCK1,COMPLEX_ELPA_KERNEL_AVX_BLOCK2,   &
90
            COMPLEX_ELPA_KERNEL_AVX2_BLOCK1,COMPLEX_ELPA_KERNEL_AVX2_BLOCK2, &
91
            COMPLEX_ELPA_KERNEL_GPU
92
93
94
95
96
97
98
99
100
101

  public :: REAL_ELPA_KERNEL_NAMES, COMPLEX_ELPA_KERNEL_NAMES

  public :: get_actual_complex_kernel, get_actual_real_kernel

  public :: check_allowed_complex_kernels, check_allowed_real_kernels

  public :: AVAILABLE_COMPLEX_ELPA_KERNELS, AVAILABLE_REAL_ELPA_KERNELS

  public :: print_available_real_kernels, print_available_complex_kernels
102
  public :: query_available_real_kernels, query_available_complex_kernels
103
104
105

  public :: qr_decomposition_via_environment_variable

Andreas Marek's avatar
Andreas Marek committed
106
107
108
109
110
111
  integer, parameter :: number_of_real_kernels           = ELPA2_NUMBER_OF_REAL_KERNELS
  integer, parameter :: REAL_ELPA_KERNEL_GENERIC         = ELPA2_REAL_KERNEL_GENERIC
  integer, parameter :: REAL_ELPA_KERNEL_GENERIC_SIMPLE  = ELPA2_REAL_KERNEL_GENERIC_SIMPLE
  integer, parameter :: REAL_ELPA_KERNEL_BGP             = ELPA2_REAL_KERNEL_BGP
  integer, parameter :: REAL_ELPA_KERNEL_BGQ             = ELPA2_REAL_KERNEL_BGQ
  integer, parameter :: REAL_ELPA_KERNEL_SSE             = ELPA2_REAL_KERNEL_SSE
112
113
114
  integer, parameter :: REAL_ELPA_KERNEL_SSE_BLOCK2      = ELPA2_REAL_KERNEL_SSE_BLOCK2
  integer, parameter :: REAL_ELPA_KERNEL_SSE_BLOCK4      = ELPA2_REAL_KERNEL_SSE_BLOCK4
  integer, parameter :: REAL_ELPA_KERNEL_SSE_BLOCK6      = ELPA2_REAL_KERNEL_SSE_BLOCK6
Andreas Marek's avatar
Andreas Marek committed
115
116
117
  integer, parameter :: REAL_ELPA_KERNEL_AVX_BLOCK2      = ELPA2_REAL_KERNEL_AVX_BLOCK2
  integer, parameter :: REAL_ELPA_KERNEL_AVX_BLOCK4      = ELPA2_REAL_KERNEL_AVX_BLOCK4
  integer, parameter :: REAL_ELPA_KERNEL_AVX_BLOCK6      = ELPA2_REAL_KERNEL_AVX_BLOCK6
118
119
120
  integer, parameter :: REAL_ELPA_KERNEL_AVX2_BLOCK2     = ELPA2_REAL_KERNEL_AVX2_BLOCK2
  integer, parameter :: REAL_ELPA_KERNEL_AVX2_BLOCK4     = ELPA2_REAL_KERNEL_AVX2_BLOCK4
  integer, parameter :: REAL_ELPA_KERNEL_AVX2_BLOCK6     = ELPA2_REAL_KERNEL_AVX2_BLOCK6
121
  integer(kind=ik), parameter :: REAL_ELPA_KERNEL_GPU    = ELPA2_REAL_KERNEL_GPU
122
123

#if defined(WITH_REAL_AVX_BLOCK2_KERNEL)
124
125

#ifndef WITH_ONE_SPECIFIC_REAL_KERNEL
126
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_GENERIC
127
128
129
#else /* WITH_ONE_SPECIFIC_REAL_KERNEL */

#ifdef WITH_REAL_GENERIC_KERNEL
130
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_GENERIC
131
132
#endif
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
133
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_GENERIC_SIMPLE
134
135
#endif
#ifdef WITH_REAL_SSE_KERNEL
136
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_SSE
137
138
#endif
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
139
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BLOCK2
140
141
#endif
#ifdef WITH_REAL_AVX_BLOCK4_KERNEL
142
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BLOCK4
143
144
#endif
#ifdef WITH_REAL_AVX_BLOCK6_KERNEL
145
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BLOCK6
146
147
#endif
#ifdef WITH_REAL_BGP_KERNEL
148
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BGP
149
150
#endif
#ifdef WITH_REAL_BGQ_KERNEL
151
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BGQ
152
153
#endif
#ifdef WITH_GPU_VERSION
154
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_GPU
155
156
157
158
#endif

#endif /* WITH_ONE_SPECIFIC_REAL_KERNEL */

Andreas Marek's avatar
Andreas Marek committed
159
#else /* WITH_REAL_AVX_BLOCK2_KERNEL */
160
161

#ifndef WITH_ONE_SPECIFIC_REAL_KERNEL
162
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_GENERIC
163
164
165
#else /* WITH_ONE_SPECIFIC_REAL_KERNEL */

#ifdef WITH_REAL_GENERIC_KERNEL
166
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_GENERIC
167
168
#endif
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
169
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_GENERIC_SIMPLE
170
171
#endif
#ifdef WITH_REAL_SSE_KERNEL
172
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_SSE
173
174
#endif
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
175
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BLOCK2
176
177
#endif
#ifdef WITH_REAL_AVX_BLOCK4_KERNEL
178
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BLOCK4
179
180
#endif
#ifdef WITH_REAL_AVX_BLOCK6_KERNEL
181
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BLOCK6
182
183
#endif
#ifdef WITH_REAL_BGP_KERNEL
184
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BGP
185
186
#endif
#ifdef WITH_REAL_BGQ_KERNEL
187
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BGQ
188
#endif
189
#ifdef WITH_GPU_VERSION
190
  integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_GPU
191
192
193
194
#endif

#endif  /* WITH_ONE_SPECIFIC_REAL_KERNEL */

Andreas Marek's avatar
Andreas Marek committed
195
#endif /* WITH_REAL_AVX_BLOCK2_KERNEL */
196

197
198
199
200
201
202
  character(35), parameter, dimension(number_of_real_kernels) :: &
  REAL_ELPA_KERNEL_NAMES =    (/"REAL_ELPA_KERNEL_GENERIC         ", &
                                "REAL_ELPA_KERNEL_GENERIC_SIMPLE  ", &
                                "REAL_ELPA_KERNEL_BGP             ", &
                                "REAL_ELPA_KERNEL_BGQ             ", &
                                "REAL_ELPA_KERNEL_SSE             ", &
203
204
205
                                "REAL_ELPA_KERNEL_SSE_BLOCK2      ", &
                                "REAL_ELPA_KERNEL_SSE_BLOCK4      ", &
                                "REAL_ELPA_KERNEL_SSE_BLOCK6      ", &
206
207
                                "REAL_ELPA_KERNEL_AVX_BLOCK2      ", &
                                "REAL_ELPA_KERNEL_AVX_BLOCK4      ", &
208
                                "REAL_ELPA_KERNEL_AVX_BLOCK6      ", &
209
210
                                "REAL_ELPA_KERNEL_AVX2_BLOCK2     ", &
                                "REAL_ELPA_KERNEL_AVX2_BLOCK4     ", &
211
                                "REAL_ELPA_KERNEL_AVX2_BLOCK6     ", &
212
                                "REAL_ELPA_KERNEL_GPU             "/)
213

Andreas Marek's avatar
Andreas Marek committed
214
215
216
217
218
219
  integer, parameter :: number_of_complex_kernels           = ELPA2_NUMBER_OF_COMPLEX_KERNELS
  integer, parameter :: COMPLEX_ELPA_KERNEL_GENERIC         = ELPA2_COMPLEX_KERNEL_GENERIC
  integer, parameter :: COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE  = ELPA2_COMPLEX_KERNEL_GENERIC_SIMPLE
  integer, parameter :: COMPLEX_ELPA_KERNEL_BGP             = ELPA2_COMPLEX_KERNEL_BGP
  integer, parameter :: COMPLEX_ELPA_KERNEL_BGQ             = ELPA2_COMPLEX_KERNEL_BGQ
  integer, parameter :: COMPLEX_ELPA_KERNEL_SSE             = ELPA2_COMPLEX_KERNEL_SSE
220
221
  integer, parameter :: COMPLEX_ELPA_KERNEL_SSE_BLOCK1      = ELPA2_COMPLEX_KERNEL_SSE_BLOCK1
  integer, parameter :: COMPLEX_ELPA_KERNEL_SSE_BLOCK2      = ELPA2_COMPLEX_KERNEL_SSE_BLOCK2
Andreas Marek's avatar
Andreas Marek committed
222
223
  integer, parameter :: COMPLEX_ELPA_KERNEL_AVX_BLOCK1      = ELPA2_COMPLEX_KERNEL_AVX_BLOCK1
  integer, parameter :: COMPLEX_ELPA_KERNEL_AVX_BLOCK2      = ELPA2_COMPLEX_KERNEL_AVX_BLOCK2
224
225
  integer, parameter :: COMPLEX_ELPA_KERNEL_AVX2_BLOCK1     = ELPA2_COMPLEX_KERNEL_AVX2_BLOCK1
  integer, parameter :: COMPLEX_ELPA_KERNEL_AVX2_BLOCK2     = ELPA2_COMPLEX_KERNEL_AVX2_BLOCK2
226
  integer(kind=ik), parameter :: COMPLEX_ELPA_KERNEL_GPU    = ELPA2_COMPLEX_KERNEL_GPU
227
228

#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL)
229
230

#ifndef WITH_ONE_SPECIFIC_COMPLEX_KERNEL
231
  integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_GENERIC
232
233
234
235
#else /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */

! go through all kernels and set them
#ifdef WITH_COMPLEX_GENERIC_KERNEL
236
  integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_GENERIC
237
#endif
238
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
239
  integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE
240
241
#endif
#ifdef WITH_COMPLEX_SSE_KERNEL
242
  integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_SSE
243
244
#endif
#ifdef WITH_COMPLEX_AVX1_BLOCK1_KERNEL
245
  integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_AVX_BLOCK1
246
247
#endif
#ifdef WITH_COMPLEX_AVX1_BLOCK2_KERNEL
248
  integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_AVX_BLOCK2
249
250
#endif
#ifdef WITH_GPU_VERSION
251
  integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_GPU
252
253
254
255
256
257
258
#endif

#endif /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */

#else /* WITH_COMPLEX_AVX_BLOCK1_KERNEL */

#ifndef WITH_ONE_SPECIFIC_COMPLEX_KERNEL
259
  integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_GENERIC
260
261
262
263
264

#else /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */

! go through all kernels and set them
#ifdef WITH_COMPLEX_GENERIC_KERNEL
265
  integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_GENERIC
266
267
#endif
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
268
  integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE
269
270
#endif
#ifdef WITH_COMPLEX_SSE_KERNEL
271
  integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_SSE
272
273
#endif
#ifdef WITH_COMPLEX_AVX1_BLOCK1_KERNEL
274
  integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_AVX_BLOCK1
275
276
#endif
#ifdef WITH_COMPLEX_AVX1_BLOCK2_KERNEL
277
  integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_AVX_BLOCK2
278
279
#endif
#ifdef WITH_GPU_VERSION
280
  integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_GPU
281
282
283
284
285
286
#endif

#endif /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */

#endif /* WITH_COMPLEX_AVX_BLOCK1_KERNEL */

287
288
289
290
291
292
  character(35), parameter, dimension(number_of_complex_kernels) :: &
  COMPLEX_ELPA_KERNEL_NAMES = (/"COMPLEX_ELPA_KERNEL_GENERIC         ", &
                                "COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE  ", &
                                "COMPLEX_ELPA_KERNEL_BGP             ", &
                                "COMPLEX_ELPA_KERNEL_BGQ             ", &
                                "COMPLEX_ELPA_KERNEL_SSE             ", &
293
294
                                "COMPLEX_ELPA_KERNEL_SSE_BLOCK1      ", &
                                "COMPLEX_ELPA_KERNEL_SSE_BLOCK2      ", &
295
                                "COMPLEX_ELPA_KERNEL_AVX_BLOCK1      ", &
296
                                "COMPLEX_ELPA_KERNEL_AVX_BLOCK2      ", &
297
                                "COMPLEX_ELPA_KERNEL_AVX2_BLOCK1     ", &
298
                                "COMPLEX_ELPA_KERNEL_AVX2_BLOCK2     ", &
299
                                "COMPLEX_ELPA_KERNEL_GPU             "/)
300

301
  integer(kind=ik), parameter                           ::             &
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
           AVAILABLE_REAL_ELPA_KERNELS(number_of_real_kernels) =       &
                                      (/                               &
#if WITH_REAL_GENERIC_KERNEL
                                        1                              &
#else
                                        0                              &
#endif
#if WITH_REAL_GENERIC_SIMPLE_KERNEL
                                          ,1                           &
#else
                                          ,0                           &
#endif
#if WITH_REAL_BGP_KERNEL
                                            ,1                         &
#else
                                            ,0                         &
#endif
#if WITH_REAL_BGQ_KERNEL
                                              ,1                       &
#else
                                              ,0                       &
#endif
#if WITH_REAL_SSE_KERNEL
                                                ,1                     &
#else
                                                ,0                     &
#endif
329
#if WITH_REAL_SSE_BLOCK2_KERNEL
330
331
332
333
                                                  ,1                   &
#else
                                                  ,0                   &
#endif
334
#if WITH_REAL_SSE_BLOCK4_KERNEL
335
336
337
338
                                                    ,1                 &
#else
                                                    ,0                 &
#endif
339
#if WITH_REAL_SSE_BLOCK6_KERNEL
340
341
342
                                                      ,1               &
#else
                                                      ,0               &
343

344
#endif
345
#if WITH_REAL_AVX_BLOCK2_KERNEL
346
347
348
349
                                                        ,1             &
#else
                                                        ,0             &
#endif
350
351
352
353
354
355
356
357
358
#if WITH_REAL_AVX_BLOCK4_KERNEL
                                                          ,1           &
#else
                                                          ,0           &
#endif
#if WITH_REAL_AVX_BLOCK6_KERNEL
                                                            ,1         &
#else
                                                            ,0         &
359
#endif
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
#if WITH_REAL_AVX2_BLOCK2_KERNEL
                                                              ,1       &
#else
                                                              ,0       &
#endif
#if WITH_REAL_AVX2_BLOCK4_KERNEL
                                                               ,1      &
#else
                                                               ,0      &
#endif
#if WITH_REAL_AVX2_BLOCK6_KERNEL
                                                               ,1      &
#else
                                                               ,0      &
#endif
375
376
377
378
379
#ifdef WITH_GPU_VERSION
                                                                 ,1    &
#else
                                                                 ,0    &
#endif
380
381
                                                       /)

382
  integer(kind=ik), parameter ::                                          &
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
           AVAILABLE_COMPLEX_ELPA_KERNELS(number_of_complex_kernels) =    &
                                      (/                                  &
#if WITH_COMPLEX_GENERIC_KERNEL
                                        1                                 &
#else
                                        0                                 &
#endif
#if WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
                                          ,1                              &
#else
                                          ,0                              &
#endif
#if WITH_COMPLEX_BGP_KERNEL
                                            ,1                            &
#else
                                            ,0                            &
#endif
#if WITH_COMPLEX_BGQ_KERNEL
                                              ,1                          &
#else
                                              ,0                          &
#endif
#if WITH_COMPLEX_SSE_KERNEL
                                                ,1                        &
#else
                                                ,0                        &
#endif
410
#if WITH_COMPLEX_SSE_BLOCK1_KERNEL
411
412
413
414
                                                  ,1                      &
#else
                                                  ,0                      &
#endif
415
#if WITH_COMPLEX_SSE_BLOCK2_KERNEL
416
417
418
419
                                                    ,1                    &
#else
                                                    ,0                    &
#endif
420
#if WITH_COMPLEX_AVX_BLOCK1_KERNEL
421
422
423
424
                                                      ,1                  &
#else
                                                      ,0                  &
#endif
425
426
427
428
429
#if WITH_COMPLEX_AVX_BLOCK2_KERNEL
                                                        ,1                &
#else
                                                        ,0                &
#endif
430
431
432
433
434
435
436
437
438
439
#if WITH_COMPLEX_AVX2_BLOCK1_KERNEL
                                                         ,1               &
#else
                                                         ,0               &
#endif
#if WITH_COMPLEX_AVX2_BLOCK2_KERNEL
                                                           ,1             &
#else
                                                           ,0             &
#endif
440
441
442
443
444
445
#ifdef WITH_GPU_VERSION
                                                             ,1           &
#else
                                                             ,0           &
#endif
                                                               /)
446
447
448
449
450
451
452

!******
  contains
    subroutine print_available_real_kernels
#ifdef HAVE_DETAILED_TIMINGS
      use timings
#endif
453
      use precision
454
455
      implicit none

456
      integer(kind=ik) :: i
457
458
459
460
461

#ifdef HAVE_DETAILED_TIMINGS
      call timer%start("print_available_real_kernels")
#endif

462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
      do i=1, number_of_real_kernels
        if (AVAILABLE_REAL_ELPA_KERNELS(i) .eq. 1) then
          write(*,*) REAL_ELPA_KERNEL_NAMES(i)
        endif
      enddo
      write(*,*) " "
      write(*,*) " At the moment the following kernel would be choosen:"
      write(*,*) get_actual_real_kernel_name()

#ifdef HAVE_DETAILED_TIMINGS
      call timer%stop("print_available_real_kernels")
#endif

    end subroutine print_available_real_kernels

    subroutine query_available_real_kernels
#ifdef HAVE_DETAILED_TIMINGS
      use timings
#endif
      implicit none

      integer :: i

#ifdef HAVE_DETAILED_TIMINGS
      call timer%start("query_available_real_kernels")
#endif

489
490
491
492
493
494
495
496
497
498
      do i=1, number_of_real_kernels
        if (AVAILABLE_REAL_ELPA_KERNELS(i) .eq. 1) then
          write(error_unit,*) REAL_ELPA_KERNEL_NAMES(i)
        endif
      enddo
      write(error_unit,*) " "
      write(error_unit,*) " At the moment the following kernel would be choosen:"
      write(error_unit,*) get_actual_real_kernel_name()

#ifdef HAVE_DETAILED_TIMINGS
499
      call timer%stop("query_available_real_kernels")
500
501
#endif

502
    end subroutine query_available_real_kernels
503
504
505
506
507

    subroutine print_available_complex_kernels
#ifdef HAVE_DETAILED_TIMINGS
      use timings
#endif
508
      use precision
509
510
      implicit none

511
      integer(kind=ik) :: i
512
513
514
515
#ifdef HAVE_DETAILED_TIMINGS
      call timer%start("print_available_complex_kernels")
#endif

516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
      do i=1, number_of_complex_kernels
        if (AVAILABLE_COMPLEX_ELPA_KERNELS(i) .eq. 1) then
           write(*,*) COMPLEX_ELPA_KERNEL_NAMES(i)
        endif
      enddo
      write(*,*) " "
      write(*,*) " At the moment the following kernel would be choosen:"
      write(*,*) get_actual_complex_kernel_name()

#ifdef HAVE_DETAILED_TIMINGS
      call timer%stop("print_available_complex_kernels")
#endif

    end subroutine print_available_complex_kernels

    subroutine query_available_complex_kernels
#ifdef HAVE_DETAILED_TIMINGS
      use timings
#endif

      implicit none

      integer :: i
#ifdef HAVE_DETAILED_TIMINGS
      call timer%start("query_available_complex_kernels")
#endif

543
544
545
546
547
548
549
550
551
552
      do i=1, number_of_complex_kernels
        if (AVAILABLE_COMPLEX_ELPA_KERNELS(i) .eq. 1) then
           write(error_unit,*) COMPLEX_ELPA_KERNEL_NAMES(i)
        endif
      enddo
      write(error_unit,*) " "
      write(error_unit,*) " At the moment the following kernel would be choosen:"
      write(error_unit,*) get_actual_complex_kernel_name()

#ifdef HAVE_DETAILED_TIMINGS
553
      call timer%stop("query_available_complex_kernels")
554
555
#endif

556
    end subroutine query_available_complex_kernels
557
558
559
560
561

    function get_actual_real_kernel() result(actual_kernel)
#ifdef HAVE_DETAILED_TIMINGS
      use timings
#endif
562
      use precision
563
564
      implicit none

565
      integer(kind=ik) :: actual_kernel
566
567
568
569
570
571
572
573
574
575

#ifdef HAVE_DETAILED_TIMINGS
      call timer%start("get_actual_real_kernel")
#endif


      ! if kernel is not choosen via api
      ! check whether set by environment variable
      actual_kernel = real_kernel_via_environment_variable()

576
577
578
!#ifdef WITH_GPU_VERSION
!      actual_kernel = REAL_ELPA_KERNEL_GPU
!#endif
579
580
581
582
583
      if (actual_kernel .eq. 0) then
        ! if not then set default kernel
        actual_kernel = DEFAULT_REAL_ELPA_KERNEL
      endif

584
585
586
587
588
589
!#ifdef WITH_GPU_VERSION
!      if (actual_kernel .ne. REAL_ELPA_KERNEL_GPU) then
!        print *,"if build with GPU you cannot choose another real kernel"
!        stop
!      endif
!#endif
590

591
592
593
594
595
596
597
598
599
600
#ifdef HAVE_DETAILED_TIMINGS
      call timer%stop("get_actual_real_kernel")
#endif

    end function get_actual_real_kernel

    function get_actual_real_kernel_name() result(actual_kernel_name)
#ifdef HAVE_DETAILED_TIMINGS
      use timings
#endif
601
      use precision
602
603
      implicit none

604
605
      character(35)    :: actual_kernel_name
      integer(kind=ik) :: actual_kernel
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623

#ifdef HAVE_DETAILED_TIMINGS
      call timer%start("get_actual_real_kernel_name")
#endif

      actual_kernel = get_actual_real_kernel()
      actual_kernel_name = REAL_ELPA_KERNEL_NAMES(actual_kernel)

#ifdef HAVE_DETAILED_TIMINGS
      call timer%stop("get_actual_real_kernel_name")
#endif

    end function get_actual_real_kernel_name

    function get_actual_complex_kernel() result(actual_kernel)
#ifdef HAVE_DETAILED_TIMINGS
      use timings
#endif
624
      use precision
625
      implicit none
626
      integer(kind=ik) :: actual_kernel
627
628
629
630
631
632
633
634
635
636

#ifdef HAVE_DETAILED_TIMINGS
      call timer%start("get_actual_complex_kernel")
#endif


     ! if kernel is not choosen via api
     ! check whether set by environment variable
     actual_kernel = complex_kernel_via_environment_variable()

637
638
639
!#ifdef WITH_GPU_VERSION
!     actual_kernel = COMPLEX_ELPA_KERNEL_GPU
!#endif
640
641
642
643
644
     if (actual_kernel .eq. 0) then
       ! if not then set default kernel
       actual_kernel = DEFAULT_COMPLEX_ELPA_KERNEL
     endif

645
646
647
648
649
650
!#ifdef WITH_GPU_VERSION
!      if (actual_kernel .ne. COMPLEX_ELPA_KERNEL_GPU) then
!        print *,"if build with GPU you cannot choose another complex kernel"
!        stop
!      endif
!#endif
651
652


653
654
655
656
657
658
659
660
661
662
#ifdef HAVE_DETAILED_TIMINGS
     call timer%stop("get_actual_complex_kernel")
#endif

   end function get_actual_complex_kernel

   function get_actual_complex_kernel_name() result(actual_kernel_name)
#ifdef HAVE_DETAILED_TIMINGS
     use timings
#endif
663
     use precision
664
     implicit none
665
666
     character(35)    :: actual_kernel_name
     integer(kind=ik) :: actual_kernel
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684

#ifdef HAVE_DETAILED_TIMINGS
     call timer%start("get_actual_complex_kernel_name")
#endif

     actual_kernel = get_actual_complex_kernel()
     actual_kernel_name = COMPLEX_ELPA_KERNEL_NAMES(actual_kernel)

#ifdef HAVE_DETAILED_TIMINGS
     call timer%stop("get_actual_complex_kernel_name")
#endif

   end function get_actual_complex_kernel_name

   function check_allowed_real_kernels(THIS_REAL_ELPA_KERNEL) result(err)
#ifdef HAVE_DETAILED_TIMINGS
     use timings
#endif
685
     use precision
686
     implicit none
687
688
     integer(kind=ik), intent(in) :: THIS_REAL_ELPA_KERNEL
     logical                      :: err
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706

#ifdef HAVE_DETAILED_TIMINGS
     call timer%start("check_allowed_real_kernels")
#endif
     err = .false.

     if (AVAILABLE_REAL_ELPA_KERNELS(THIS_REAL_ELPA_KERNEL) .ne. 1) err=.true.

#ifdef HAVE_DETAILED_TIMINGS
     call timer%stop("check_allowed_real_kernels")
#endif

   end function check_allowed_real_kernels

   function check_allowed_complex_kernels(THIS_COMPLEX_ELPA_KERNEL) result(err)
#ifdef HAVE_DETAILED_TIMINGS
     use timings
#endif
707
     use precision
708
     implicit none
709
710
     integer(kind=ik), intent(in) :: THIS_COMPLEX_ELPA_KERNEL
     logical                      :: err
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
#ifdef HAVE_DETAILED_TIMINGS
     call timer%start("check_allowed_complex_kernels")
#endif
     err = .false.

     if (AVAILABLE_COMPLEX_ELPA_KERNELS(THIS_COMPLEX_ELPA_KERNEL) .ne. 1) err=.true.

#ifdef HAVE_DETAILED_TIMINGS
     call timer%stop("check_allowed_complex_kernels")
#endif

   end function check_allowed_complex_kernels

   function qr_decomposition_via_environment_variable(useQR) result(isSet)
#ifdef HAVE_DETAILED_TIMINGS
     use timings
#endif
728
     use precision
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
     implicit none
     logical, intent(out) :: useQR
     logical              :: isSet
     CHARACTER(len=255)   :: ELPA_QR_DECOMPOSITION

#ifdef HAVE_DETAILED_TIMINGS
     call timer%start("qr_decomposition_via_environment_variable")
#endif

     isSet = .false.

#if defined(HAVE_ENVIRONMENT_CHECKING)
     call get_environment_variable("ELPA_QR_DECOMPOSITION",ELPA_QR_DECOMPOSITION)
#endif
     if (trim(ELPA_QR_DECOMPOSITION) .eq. "yes") then
       useQR = .true.
       isSet = .true.
     endif
     if (trim(ELPA_QR_DECOMPOSITION) .eq. "no") then
       useQR = .false.
       isSet = .true.
     endif

#ifdef HAVE_DETAILED_TIMINGS
     call timer%stop("qr_decomposition_via_environment_variable")
#endif

   end function qr_decomposition_via_environment_variable

   function real_kernel_via_environment_variable() result(kernel)
#ifdef HAVE_DETAILED_TIMINGS
     use timings
#endif
762
     use precision
763
     implicit none
764
     integer(kind=ik)   :: kernel
765
     CHARACTER(len=255) :: REAL_KERNEL_ENVIRONMENT
766
     integer(kind=ik)   :: i
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794

#ifdef HAVE_DETAILED_TIMINGS
     call timer%start("real_kernel_via_environment_variable")
#endif

#if defined(HAVE_ENVIRONMENT_CHECKING)
     call get_environment_variable("REAL_ELPA_KERNEL",REAL_KERNEL_ENVIRONMENT)
#endif
     do i=1,size(REAL_ELPA_KERNEL_NAMES(:))
       !     if (trim(dummy_char) .eq. trim(REAL_ELPA_KERNEL_NAMES(i))) then
       if (trim(REAL_KERNEL_ENVIRONMENT) .eq. trim(REAL_ELPA_KERNEL_NAMES(i))) then
         kernel = i
         exit
       else
         kernel = 0
       endif
     enddo

#ifdef HAVE_DETAILED_TIMINGS
     call timer%stop("real_kernel_via_environment_variable")
#endif

   end function real_kernel_via_environment_variable

   function complex_kernel_via_environment_variable() result(kernel)
#ifdef HAVE_DETAILED_TIMINGS
     use timings
#endif
795
     use precision
796
797
798
799
     implicit none
     integer :: kernel

     CHARACTER(len=255) :: COMPLEX_KERNEL_ENVIRONMENT
800
     integer(kind=ik)   :: i
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826

#ifdef HAVE_DETAILED_TIMINGS
     call timer%start("complex_kernel_via_environment_variable")
#endif

#if defined(HAVE_ENVIRONMENT_CHECKING)
     call get_environment_variable("COMPLEX_ELPA_KERNEL",COMPLEX_KERNEL_ENVIRONMENT)
#endif

     do i=1,size(COMPLEX_ELPA_KERNEL_NAMES(:))
       if (trim(COMPLEX_ELPA_KERNEL_NAMES(i)) .eq. trim(COMPLEX_KERNEL_ENVIRONMENT)) then
         kernel = i
         exit
       else
         kernel = 0
       endif
     enddo

#ifdef HAVE_DETAILED_TIMINGS
     call timer%stop("complex_kernel_via_environment_variable")
#endif

   end function
!-------------------------------------------------------------------------------

end module ELPA2_utilities