elpa1_template.F90 23.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#if 0
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
#endif

55
#include "../general/sanity.F90"
56
#include "../general/error_checking.inc"
57

58
59
60
61
62
63
64
#ifdef ACTIVATE_SKEW
function elpa_solve_skew_evp_&
         &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
   &_impl (obj, &
#else
65
66
function elpa_solve_evp_&
         &MATH_DATATYPE&
Pavel Kus's avatar
Pavel Kus committed
67
68
   &_1stage_&
   &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
69
   &_impl (obj, &
70
#endif
Andreas Marek's avatar
Andreas Marek committed
71
72
73
74
75
76
77
78
79
80
81
#ifdef REDISTRIBUTE_MATRIX
   aExtern, &
#else
   a, &
#endif
   ev, &
#ifdef REDISTRIBUTE_MATRIX
   qExtern) result(success)
#else
   q) result(success)
#endif
82
83
   use precision
   use cuda_functions
84
85
   use hip_functions
   use elpa_gpu
86
   use mod_check_for_gpu
87
   use, intrinsic :: iso_c_binding
88
   use elpa_abstract_impl
89
90
   use elpa_mpi
   use elpa1_compute
91
   use elpa_omp
Andreas Marek's avatar
Andreas Marek committed
92
93
94
#ifdef REDISTRIBUTE_MATRIX
   use elpa_scalapack_interfaces
#endif
95
   use solve_tridi
96
   use thread_affinity
97
   implicit none
Pavel Kus's avatar
Pavel Kus committed
98
#include "../general/precision_kinds.F90"
Andreas Marek's avatar
Andreas Marek committed
99
100
101
102
   class(elpa_abstract_impl_t), intent(inout)                         :: obj
   real(kind=REAL_DATATYPE), intent(out)                              :: ev(obj%na)

#ifdef REDISTRIBUTE_MATRIX
Pavel Kus's avatar
Pavel Kus committed
103

104
#ifdef USE_ASSUMED_SIZE
Andreas Marek's avatar
Andreas Marek committed
105
106
   MATH_DATATYPE(kind=rck), intent(inout), target                     :: aExtern(obj%local_nrows,*)
   MATH_DATATYPE(kind=rck), optional,target,intent(out)               :: qExtern(obj%local_nrows,*)
107
#else
Andreas Marek's avatar
Andreas Marek committed
108
   MATH_DATATYPE(kind=rck), intent(inout), target                     :: aExtern(obj%local_nrows,obj%local_ncols)
109
#ifdef ACTIVATE_SKEW
Andreas Marek's avatar
Andreas Marek committed
110
111
112
113
   MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: qExtern(obj%local_nrows,2*obj%local_ncols)
#else
   MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: qExtern(obj%local_nrows,obj%local_ncols)
#endif
114
#endif /* USE_ASSUMED_SIZE */
Andreas Marek's avatar
Andreas Marek committed
115
116
117
118
119
120
121
122

#else /* REDISTRIBUTE_MATRIX */

#ifdef USE_ASSUMED_SIZE
   MATH_DATATYPE(kind=rck), intent(inout), target                     :: a(obj%local_nrows,*)
   MATH_DATATYPE(kind=rck), optional,target,intent(out)               :: q(obj%local_nrows,*)
#else
   MATH_DATATYPE(kind=rck), intent(inout), target                     :: a(obj%local_nrows,obj%local_ncols)
123
#ifdef ACTIVATE_SKEW
Carolin Penke's avatar
Carolin Penke committed
124
125
126
127
   MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,2*obj%local_ncols)
#else
   MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,obj%local_ncols)
#endif
128
129
#endif /* USE_ASSUMED_SIZE */

Andreas Marek's avatar
Andreas Marek committed
130
131
#endif /* REDISTRIBUTE_MATRIX */

132
#ifdef REDISTRIBUTE_MATRIX
Andreas Marek's avatar
Andreas Marek committed
133
134
    MATH_DATATYPE(kind=rck), pointer                                  :: a(:,:)
    MATH_DATATYPE(kind=rck), pointer                                  :: q(:,:)
135
#endif
Pavel Kus's avatar
Pavel Kus committed
136
137

#if REALCASE == 1
Andreas Marek's avatar
Andreas Marek committed
138
139
140
   real(kind=C_DATATYPE_KIND), allocatable           :: tau(:)
   real(kind=C_DATATYPE_KIND), allocatable, target   :: q_dummy(:,:)
   real(kind=C_DATATYPE_KIND), pointer               :: q_actual(:,:)
141
142
143
#endif /* REALCASE */

#if COMPLEXCASE == 1
Andreas Marek's avatar
Andreas Marek committed
144
145
   real(kind=REAL_DATATYPE), allocatable             :: q_real(:,:)
   complex(kind=C_DATATYPE_KIND), allocatable        :: tau(:)
Andreas Marek's avatar
Andreas Marek committed
146
   complex(kind=C_DATATYPE_KIND), allocatable,target :: q_dummy(:,:)
Andreas Marek's avatar
Andreas Marek committed
147
   complex(kind=C_DATATYPE_KIND), pointer            :: q_actual(:,:)
148
149
#endif /* COMPLEXCASE */

150

151
   integer(kind=c_int)                             :: l_cols, l_rows, l_cols_nev, np_rows, np_cols
152
   integer(kind=MPI_KIND)                          :: np_rowsMPI, np_colsMPI
153

154
   logical                                         :: useGPU
Carolin Penke's avatar
Carolin Penke committed
155
156
   integer(kind=c_int)                             :: skewsymmetric
   logical                                         :: isSkewsymmetric
157
158
   logical                                         :: success

159
160
   logical                                         :: do_useGPU, do_useGPU_tridiag, &
                                                      do_useGPU_solve_tridi, do_useGPU_trans_ev
161
162
   integer(kind=ik)                                :: numberOfGPUDevices

163
164
   integer(kind=c_int)                             :: my_pe, n_pes, my_prow, my_pcol
   integer(kind=MPI_KIND)                          :: mpierr, my_peMPI, n_pesMPI, my_prowMPI, my_pcolMPI
165
166
   real(kind=C_DATATYPE_KIND), allocatable         :: e(:)
   logical                                         :: wantDebug
167
   integer(kind=c_int)                             :: istat, debug, gpu
168
   character(200)                                  :: errorMessage
Andreas Marek's avatar
Andreas Marek committed
169
   integer(kind=ik)                                :: na, nev, nblk, matrixCols, &
Andreas Marek's avatar
Andreas Marek committed
170
                                                      mpi_comm_rows, mpi_comm_cols,        &
Andreas Marek's avatar
Andreas Marek committed
171
                                                      mpi_comm_all, check_pd, i, error, matrixRows
172
   real(kind=C_DATATYPE_KIND)                      :: thres_pd
Andreas Marek's avatar
Andreas Marek committed
173
174
175
176

#ifdef REDISTRIBUTE_MATRIX
   integer(kind=ik)                                :: nblkInternal, matrixOrder
   character(len=1)                                :: layoutInternal, layoutExternal
177
178
   integer(kind=c_int)                             :: external_blacs_ctxt
   integer(kind=BLAS_KIND)                         :: external_blacs_ctxt_
Andreas Marek's avatar
Andreas Marek committed
179
180
181
182
183
184
185
186
187
188
189
   integer(kind=BLAS_KIND)                         :: np_rows_, np_cols_, my_prow_, my_pcol_
   integer(kind=BLAS_KIND)                         :: np_rows__, np_cols__, my_prow__, my_pcol__
   integer(kind=BLAS_KIND)                         :: sc_desc_(1:9), sc_desc(1:9)
   integer(kind=BLAS_KIND)                         :: na_rows_, na_cols_, info_, blacs_ctxt_
   integer(kind=ik)                                :: mpi_comm_rows_, mpi_comm_cols_
   integer(kind=MPI_KIND)                          :: mpi_comm_rowsMPI_, mpi_comm_colsMPI_
   character(len=1), parameter                     :: matrixLayouts(2) = [ 'C', 'R' ]

   MATH_DATATYPE(kind=rck), allocatable, target               :: aIntern(:,:)
   MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable, target   :: qIntern(:,:)
#endif
190
191
   integer(kind=c_int)                             :: pinningInfo

Pavel Kus's avatar
Pavel Kus committed
192
   logical                                         :: do_tridiag, do_solve, do_trans_ev
193
   integer(kind=ik)                                :: nrThreads, limitThreads
Carolin Penke's avatar
Carolin Penke committed
194
   integer(kind=ik)                                :: global_index
Andreas Marek's avatar
Andreas Marek committed
195
196

   logical                                         :: reDistributeMatrix, doRedistributeMatrix
197

198
199
200
#ifdef ACTIVATE_SKEW
   call obj%timer%start("elpa_solve_skew_evp_&
#else
201
   call obj%timer%start("elpa_solve_evp_&
202
#endif
203
204
205
   &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
206
   &")
207

Andreas Marek's avatar
Andreas Marek committed
208
209
210
211
   reDistributeMatrix = .false.

   matrixRows = obj%local_nrows
   matrixCols = obj%local_ncols
212

Andreas Marek's avatar
Andreas Marek committed
213
214
215
216
217
   call obj%get("mpi_comm_parent", mpi_comm_all, error)
   if (error .ne. ELPA_OK) then
     print *,"Problem getting option. Aborting..."
     stop
   endif
218

Andreas Marek's avatar
Andreas Marek committed
219
220
221
   call mpi_comm_rank(int(mpi_comm_all,kind=MPI_KIND), my_peMPI, mpierr)
   my_pe = int(my_peMPI,kind=c_int)

222
#ifdef WITH_OPENMP_TRADITIONAL
223
224
225
226
227
   ! store the number of OpenMP threads used in the calling function
   ! restore this at the end of ELPA 2
   omp_threads_caller = omp_get_max_threads()

   ! check the number of threads that ELPA should use internally
228
#if defined(THREADING_SUPPORT_CHECK) && defined(ALLOW_THREAD_LIMITING) && !defined(HAVE_SUFFICIENT_MPI_THREADING_SUPPORT)
229
230
   call obj%get("limit_openmp_threads",limitThreads,error)
   if (limitThreads .eq. 0) then
231
#endif
232
233
     call obj%get("omp_threads",nrThreads,error)
     call omp_set_num_threads(nrThreads)
234
#if defined(THREADING_SUPPORT_CHECK) && defined(ALLOW_THREAD_LIMITING) && !defined(HAVE_SUFFICIENT_MPI_THREADING_SUPPORT)
235
236
237
238
   else
     nrThreads = 1
     call omp_set_num_threads(nrThreads)
   endif
239
#endif
Andreas Marek's avatar
Andreas Marek committed
240
241
242
#else
   nrThreads = 1
#endif
243
244
245
#ifdef WITH_NVTX
   call nvtxRangePush("elpa1")
#endif
246
247
248
249
250
251
252
253
   call obj%get("output_pinning_information", pinningInfo, error)
   if (error .ne. ELPA_OK) then
     print *,"Problem setting option for debug. Aborting..."
     stop
   endif
   
   if (pinningInfo .eq. 1) then
     call init_thread_affinity(nrThreads)
Andreas Marek's avatar
Andreas Marek committed
254

255
256
257
258
     call check_thread_affinity()
     if (my_pe .eq. 0) call print_thread_affinity(my_pe)
     call cleanup_thread_affinity()
   endif
Andreas Marek's avatar
Andreas Marek committed
259
   success = .true.
Andreas Marek's avatar
Andreas Marek committed
260

261
#ifdef REDISTRIBUTE_MATRIX
Andreas Marek's avatar
Andreas Marek committed
262
   if (present(qExtern)) then
263
#else
Andreas Marek's avatar
Andreas Marek committed
264
   if (present(q)) then
265
#endif
266
     obj%eigenvalues_only = .false.
Andreas Marek's avatar
Andreas Marek committed
267
   else
268
     obj%eigenvalues_only = .true.
Andreas Marek's avatar
Andreas Marek committed
269
270
   endif

271
272
   na         = obj%na
   nev        = obj%nev
Andreas Marek's avatar
Andreas Marek committed
273
   matrixRows = obj%local_nrows
274
275
276
   nblk       = obj%nblk
   matrixCols = obj%local_ncols

Andreas Marek's avatar
Andreas Marek committed
277
278
279
280
281
282
283
284
285
286
287
288
   call obj%get("mpi_comm_rows",mpi_comm_rows,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem getting option. Aborting..."
     stop
   endif
   call obj%get("mpi_comm_cols",mpi_comm_cols,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem getting option. Aborting..."
     stop
   endif

#ifdef REDISTRIBUTE_MATRIX
Andreas Marek's avatar
Andreas Marek committed
289
#include "../helpers/elpa_redistribute_template.F90"
Andreas Marek's avatar
Andreas Marek committed
290
291
#endif /* REDISTRIBUTE_MATRIX */

Andreas Marek's avatar
Andreas Marek committed
292
293
294
295
296
297
298
   ! special case na = 1
   if (na .eq. 1) then
#if REALCASE == 1
     ev(1) = a(1,1)
#endif
#if COMPLEXCASE == 1
     ev(1) = real(a(1,1))
Pavel Kus's avatar
Pavel Kus committed
299
#endif
Andreas Marek's avatar
Andreas Marek committed
300
     if (.not.(obj%eigenvalues_only)) then
Pavel Kus's avatar
Pavel Kus committed
301
       q(1,1) = ONE
Andreas Marek's avatar
Andreas Marek committed
302
     endif
303
304

     ! restore original OpenMP settings
305
#ifdef WITH_OPENMP_TRADITIONAL
306
307
     call omp_set_num_threads(omp_threads_caller)
#endif
308
309
310
#ifdef ACTIVATE_SKEW
     call obj%timer%stop("elpa_solve_skew_evp_&
#else
Andreas Marek's avatar
Andreas Marek committed
311
     call obj%timer%stop("elpa_solve_evp_&
312
#endif
Andreas Marek's avatar
Andreas Marek committed
313
314
315
316
317
318
319
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &")
     return
   endif

320
321
322
323
324
   if (nev == 0) then
     nev = 1
     obj%eigenvalues_only = .true.
   endif

325
   if (gpu_vendor() == NVIDIA_GPU) then
326
327
328
329
330
331
332
333
334
335
336
337
338
339
     call obj%get("gpu",gpu,error)
     if (error .ne. ELPA_OK) then
       print *,"Problem getting option for GPU. Aborting..."
       stop
     endif
     if (gpu .eq. 1) then
       print *,"You still use the deprecated option 'gpu', consider switching to 'nvidia-gpu'. Will set the new keyword &
              & 'nvidia-gpu' now"
       call obj%set("nvidia-gpu",gpu,error)
       if (error .ne. ELPA_OK) then
         print *,"Problem setting option for NVIDIA GPU. Aborting..."
         stop
       endif
     endif
340
341
342
343
344
345
346
347
348
349
350
     call obj%get("nvidia-gpu",gpu,error)
     if (error .ne. ELPA_OK) then
       print *,"Problem getting option for NVIDIA GPU. Aborting..."
       stop
     endif
   else if (gpu_vendor() == AMD_GPU) then
     call obj%get("amd-gpu",gpu,error)
     if (error .ne. ELPA_OK) then
       print *,"Problem getting option for AMD GPU. Aborting..."
       stop
     endif
351
352
353
354
355
356
   else if (gpu_vendor() == INTEL_GPU) then
     call obj%get("intel-gpu",gpu,error)
     if (error .ne. ELPA_OK) then
       print *,"Problem getting option for INTEL GPU. Aborting..."
       stop
     endif
357
358
   else
     gpu = 0
Andreas Marek's avatar
Andreas Marek committed
359
   endif
360

361
   if (gpu .eq. 1) then
362
363
364
365
     useGPU =.true.
   else
     useGPU = .false.
   endif
366

367
368
369
370
371
372
373
374
375
376
377
#ifdef ACTIVATE_SKEW
   !call obj%get("is_skewsymmetric",skewsymmetric,error)
   !if (error .ne. ELPA_OK) then
   !  print *,"Problem getting option for skewsymmetric. Aborting..."
   !  stop
   !endif
   !isSkewsymmetric = (skewsymmetric == 1)
   isSkewsymmetric = .true.
#else
   isSkewsymmetric = .false.
#endif
378

379
   call obj%timer%start("mpi_communication")
380

381
382
383
384
385
   call mpi_comm_rank(int(mpi_comm_rows,kind=MPI_KIND), my_prowMPI, mpierr)
   call mpi_comm_rank(int(mpi_comm_cols,kind=MPI_KIND), my_pcolMPI, mpierr)

   my_prow = int(my_prowMPI,kind=c_int)
   my_pcol = int(my_pcolMPI,kind=c_int)
386

387
388
   call mpi_comm_size(int(mpi_comm_rows,kind=MPI_KIND), np_rowsMPI, mpierr)
   call mpi_comm_size(int(mpi_comm_cols,kind=MPI_KIND), np_colsMPI, mpierr)
389
390
391

   np_rows = int(np_rowsMPI,kind=c_int)
   np_cols = int(np_colsMPI,kind=c_int)
392

393
   call obj%timer%stop("mpi_communication")
394

Andreas Marek's avatar
Andreas Marek committed
395
396
   call obj%get("debug", debug,error)
   if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
397
     print *,"Problem setting option for debug. Aborting..."
Andreas Marek's avatar
Andreas Marek committed
398
399
     stop
   endif
400
   wantDebug = debug == 1
401
   do_useGPU = .false.
402

403

404
   if (useGPU) then
405
     call obj%timer%start("check_for_gpu")
406

407
     if (check_for_gpu(obj, my_pe, numberOfGPUDevices, wantDebug=wantDebug)) then
408
409
       do_useGPU = .true.
       ! set the neccessary parameters
410
       call set_gpu_parameters()
411
412
413
414
415
     else
       print *,"GPUs are requested but not detected! Aborting..."
       success = .false.
       return
     endif
416
417
418
419
420
421
422
423
#ifdef WITH_OPENMP_TRADITIONAL
     ! check the number of threads that ELPA should use internally
     ! in the GPU case at the moment only _1_ thread internally is allowed
     call obj%get("omp_threads", nrThreads, error)
     if (nrThreads .ne. 1) then
       print *,"Experimental feature: Using OpenMP with GPU code paths needs internal to ELPA _1_ OpenMP thread"
       print *,"setting 1 openmp thread now"
       call obj%set("omp_threads",1, error)
424
       nrThreads=1
425
426
427
       call omp_set_num_threads(nrThreads)
     endif
#endif
428
     call obj%timer%stop("check_for_gpu")
429
   endif
Andreas Marek's avatar
Andreas Marek committed
430

431

432
433
434
435
436
437
438
439
440
441
   do_useGPU_tridiag = do_useGPU
   do_useGPU_solve_tridi = do_useGPU
   do_useGPU_trans_ev = do_useGPU
   ! only if we want (and can) use GPU in general, look what are the
   ! requirements for individual routines. Implicitly they are all set to 1, so
   ! unles specified otherwise by the user, GPU versions of all individual
   ! routines should be used
   if(do_useGPU) then
     call obj%get("gpu_tridiag", gpu, error)
     if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
442
       print *,"Problem getting option for gpu_tridiag. Aborting..."
443
444
445
446
447
448
       stop
     endif
     do_useGPU_tridiag = (gpu == 1)

     call obj%get("gpu_solve_tridi", gpu, error)
     if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
449
       print *,"Problem getting option for gpu_solve_tridi. Aborting..."
450
451
452
453
454
455
       stop
     endif
     do_useGPU_solve_tridi = (gpu == 1)

     call obj%get("gpu_trans_ev", gpu, error)
     if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
456
       print *,"Problem getting option for gpu_trans_ev. Aborting..."
457
458
459
460
461
       stop
     endif
     do_useGPU_trans_ev = (gpu == 1)
   endif
   ! for elpa1 the easy thing is, that the individual phases of the algorithm
462
   ! do not share any data on the GPU.
463
464


Andreas Marek's avatar
Andreas Marek committed
465
   ! allocate a dummy q_intern, if eigenvectors should not be commputed and thus q is NOT present
466
   if (.not.(obj%eigenvalues_only)) then
Andreas Marek's avatar
Andreas Marek committed
467
     q_actual => q(1:matrixRows,1:matrixCols)
Andreas Marek's avatar
Andreas Marek committed
468
   else
469
470
     allocate(q_dummy(1:matrixRows,1:matrixCols), stat=istat, errmsg=errorMessage)
     check_allocate("elpa1_template: q_dummy", istat, errorMessage)
Andreas Marek's avatar
Andreas Marek committed
471
472
473
     q_actual => q_dummy
   endif

474
475
476
477
478
479
480
#if COMPLEXCASE == 1
   l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a and q
   l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of q

   l_cols_nev = local_index(nev, my_pcol, np_cols, nblk, -1) ! Local columns corresponding to nev

   allocate(q_real(l_rows,l_cols), stat=istat, errmsg=errorMessage)
481
   check_allocate("elpa1_template: q_real", istat, errorMessage)
482
483
#endif
   allocate(e(na), tau(na), stat=istat, errmsg=errorMessage)
484
   check_allocate("elpa1_template: e, tau", istat, errorMessage)
485

486
487
   ! start the computations
   ! as default do all three steps (this might change at some point)
Pavel Kus's avatar
Pavel Kus committed
488
   do_tridiag  = .true.
489
490
491
   do_solve    = .true.
   do_trans_ev = .true.

Pavel Kus's avatar
Pavel Kus committed
492
   if (do_tridiag) then
493
     call obj%timer%start("forward")
Pavel Kus's avatar
Pavel Kus committed
494
495
496
#ifdef HAVE_LIKWID
     call likwid_markerStartRegion("tridi")
#endif
497
498
499
#ifdef WITH_NVTX
     call nvtxRangePush("tridi")
#endif
500
501
502
503
     call tridiag_&
     &MATH_DATATYPE&
     &_&
     &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
504
505
     & (obj, na, a, matrixRows, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU_tridiag, wantDebug, &
        nrThreads, isSkewsymmetric)
Pavel Kus's avatar
Pavel Kus committed
506

507
508
509
#ifdef WITH_NVTX
     call nvtxRangePop()
#endif
Pavel Kus's avatar
Pavel Kus committed
510
511
512
#ifdef HAVE_LIKWID
     call likwid_markerStopRegion("tridi")
#endif
513
     call obj%timer%stop("forward")
Pavel Kus's avatar
Pavel Kus committed
514
    endif  !do_tridiag
515
516
517

    if (do_solve) then
     call obj%timer%start("solve")
Pavel Kus's avatar
Pavel Kus committed
518
519
520
#ifdef HAVE_LIKWID
     call likwid_markerStartRegion("solve")
#endif
521
522
523
#ifdef WITH_NVTX
     call nvtxRangePush("solve")
#endif
524
525
526
     call solve_tridi_&
     &PRECISION&
     & (obj, na, nev, ev, e,  &
527
#if REALCASE == 1
Andreas Marek's avatar
Andreas Marek committed
528
        q_actual, matrixRows,          &
529
530
#endif
#if COMPLEXCASE == 1
531
        q_real, l_rows,  &
532
#endif
533
534
        nblk, matrixCols, mpi_comm_all, mpi_comm_rows, mpi_comm_cols, do_useGPU_solve_tridi, wantDebug, &
                success, nrThreads)
Pavel Kus's avatar
Pavel Kus committed
535

536
537
538
#ifdef WITH_NVTX
     call nvtxRangePop()
#endif
Pavel Kus's avatar
Pavel Kus committed
539
540
541
#ifdef HAVE_LIKWID
     call likwid_markerStopRegion("solve")
#endif
542
543
544
     call obj%timer%stop("solve")
     if (.not.(success)) return
   endif !do_solve
545

Andreas Marek's avatar
Andreas Marek committed
546
547
548
   if (obj%eigenvalues_only) then
     do_trans_ev = .false.
   else
Andreas Marek's avatar
Andreas Marek committed
549
550
     call obj%get("check_pd",check_pd,error)
     if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
551
       print *,"Problem setting option for check_pd. Aborting..."
Andreas Marek's avatar
Andreas Marek committed
552
553
       stop
     endif
Andreas Marek's avatar
Andreas Marek committed
554
     if (check_pd .eq. 1) then
Wenzhe Yu's avatar
Wenzhe Yu committed
555
556
557
       call obj%get("thres_pd_&
       &PRECISION&
       &",thres_pd,error)
558
       if (error .ne. ELPA_OK) then
Wenzhe Yu's avatar
Wenzhe Yu committed
559
560
561
          print *,"Problem getting option for thres_pd_&
          &PRECISION&
          &. Aborting..."
562
563
564
          stop
       endif

Andreas Marek's avatar
Andreas Marek committed
565
566
       check_pd = 0
       do i = 1, na
567
         if (ev(i) .gt. thres_pd) then
Andreas Marek's avatar
Andreas Marek committed
568
569
570
571
572
573
574
575
576
577
578
579
           check_pd = check_pd + 1
         endif
       enddo
       if (check_pd .lt. na) then
         ! not positiv definite => eigenvectors needed
         do_trans_ev = .true.
       else
         do_trans_ev = .false.
       endif
     endif ! check_pd
   endif ! eigenvalues_only

580
   if (do_trans_ev) then
Andreas Marek's avatar
Andreas Marek committed
581
    ! q must be given thats why from here on we can use q and not q_actual
582
#if COMPLEXCASE == 1
583
     q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
584
#endif
Carolin Penke's avatar
Carolin Penke committed
585
586
     if (isSkewsymmetric) then
     ! Extra transformation step for skew-symmetric matrix. Multiplication with diagonal complex matrix D.
587
     ! This makes the eigenvectors complex.
Carolin Penke's avatar
Carolin Penke committed
588
     ! For now real part of eigenvectors is generated in first half of q, imaginary part in second part.
Andreas Marek's avatar
Andreas Marek committed
589
590
       q(1:matrixRows, matrixCols+1:2*matrixCols) = 0.0
       do i = 1, matrixRows
Carolin Penke's avatar
Carolin Penke committed
591
592
593
594
595
596
!        global_index = indxl2g(i, nblk, my_prow, 0, np_rows)
         global_index = np_rows*nblk*((i-1)/nblk) + MOD(i-1,nblk) + MOD(np_rows+my_prow-0, np_rows)*nblk + 1
         if (mod(global_index-1,4) .eq. 0) then
            ! do nothing
         end if
         if (mod(global_index-1,4) .eq. 1) then
Andreas Marek's avatar
Andreas Marek committed
597
598
            q(i,matrixCols+1:2*matrixCols) = q(i,1:matrixCols)
            q(i,1:matrixCols) = 0
Carolin Penke's avatar
Carolin Penke committed
599
600
         end if
         if (mod(global_index-1,4) .eq. 2) then
Andreas Marek's avatar
Andreas Marek committed
601
            q(i,1:matrixCols) = -q(i,1:matrixCols)
Carolin Penke's avatar
Carolin Penke committed
602
603
         end if
         if (mod(global_index-1,4) .eq. 3) then
Andreas Marek's avatar
Andreas Marek committed
604
605
            q(i,matrixCols+1:2*matrixCols) = -q(i,1:matrixCols)
            q(i,1:matrixCols) = 0
Carolin Penke's avatar
Carolin Penke committed
606
         end if
607
       end do
Carolin Penke's avatar
Carolin Penke committed
608
     endif
609

Andreas Marek's avatar
Andreas Marek committed
610
     call obj%timer%start("back")
Pavel Kus's avatar
Pavel Kus committed
611
612
613
#ifdef HAVE_LIKWID
     call likwid_markerStartRegion("trans_ev")
#endif
614
615
616
#ifdef WITH_NVTX
     call nvtxRangePush("trans_ev")
#endif
Carolin Penke's avatar
Carolin Penke committed
617
     ! In the skew-symmetric case this transforms the real part
Andreas Marek's avatar
Andreas Marek committed
618
     call trans_ev_&
619
     &MATH_DATATYPE&
Andreas Marek's avatar
Andreas Marek committed
620
     &_&
621
     &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
622
     & (obj, na, nev, a, matrixRows, tau, q, matrixRows, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev)
Carolin Penke's avatar
Carolin Penke committed
623
624
625
626
     if (isSkewsymmetric) then
       ! Transform imaginary part
       ! Transformation of real and imaginary part could also be one call of trans_ev_tridi acting on the n x 2n matrix.
       call trans_ev_&
Andreas Marek's avatar
Andreas Marek committed
627
628
629
             &MATH_DATATYPE&
             &_&
             &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
630
             & (obj, na, nev, a, matrixRows, tau, q(1:matrixRows, matrixCols+1:2*matrixCols), matrixRows, nblk, matrixCols, &
Andreas Marek's avatar
Andreas Marek committed
631
                mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev)
Carolin Penke's avatar
Carolin Penke committed
632
       endif
Pavel Kus's avatar
Pavel Kus committed
633

634
635
636
#ifdef WITH_NVTX
     call nvtxRangePop()
#endif
Pavel Kus's avatar
Pavel Kus committed
637
638
639
#ifdef HAVE_LIKWID
     call likwid_markerStopRegion("trans_ev")
#endif
Andreas Marek's avatar
Andreas Marek committed
640
     call obj%timer%stop("back")
641
   endif ! do_trans_ev
Andreas Marek's avatar
Andreas Marek committed
642
643
644

#if COMPLEXCASE == 1
    deallocate(q_real, stat=istat, errmsg=errorMessage)
645
    check_deallocate("elpa1_template: q_real", istat, errorMessage)
646
#endif
Andreas Marek's avatar
Andreas Marek committed
647

648
   deallocate(e, tau, stat=istat, errmsg=errorMessage)
649
   check_deallocate("elpa1_template: e, tau", istat, errorMessage)
650

651
   if (obj%eigenvalues_only) then
Andreas Marek's avatar
Andreas Marek committed
652
     deallocate(q_dummy, stat=istat, errmsg=errorMessage)
653
     check_deallocate("elpa1_template: q_dummy", istat, errorMessage)
Andreas Marek's avatar
Andreas Marek committed
654
655
   endif

656
657
658
#ifdef WITH_NVTX
   call nvtxRangePop()
#endif
659
   ! restore original OpenMP settings
660
#ifdef WITH_OPENMP_TRADITIONAL
661
662
663
   call omp_set_num_threads(omp_threads_caller)
#endif

Andreas Marek's avatar
Andreas Marek committed
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
#ifdef REDISTRIBUTE_MATRIX
   ! redistribute back if necessary
   if (doRedistributeMatrix) then

     !if (layoutInternal /= layoutExternal) then
     !  ! maybe this can be skiped I now the process grid
     !  ! and np_rows and np_cols

     !  call obj%get("mpi_comm_rows",mpi_comm_rows,error)
     !  call mpi_comm_size(int(mpi_comm_rows,kind=MPI_KIND), np_rowsMPI, mpierr)
     !  call obj%get("mpi_comm_cols",mpi_comm_cols,error)
     !  call mpi_comm_size(int(mpi_comm_cols,kind=MPI_KIND), np_colsMPI, mpierr)

     !  np_rows = int(np_rowsMPI,kind=c_int)
     !  np_cols = int(np_colsMPI,kind=c_int)

     !  ! we get new blacs context and the local process grid coordinates
     !  call BLACS_Gridinit(external_blacs_ctxt, layoutInternal, int(np_rows,kind=BLAS_KIND), int(np_cols,kind=BLAS_KIND))
     !  call BLACS_Gridinfo(int(external_blacs_ctxt,KIND=BLAS_KIND), np_rows__, &
     !                      np_cols__, my_prow__, my_pcol__)

     !endif

     !call scal_PRECISION_GEMR2D &
     !(int(na,kind=BLAS_KIND), int(na,kind=BLAS_KIND), aIntern, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc_, aExtern, &
     !1_BLAS_KIND, 1_BLAS_KIND, sc_desc, external_blacs_ctxt)

     call scal_PRECISION_GEMR2D &
     (int(na,kind=BLAS_KIND), int(na,kind=BLAS_KIND), qIntern, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc_, qExtern, &
693
     1_BLAS_KIND, 1_BLAS_KIND, sc_desc, int(external_blacs_ctxt,kind=BLAS_KIND))
Andreas Marek's avatar
Andreas Marek committed
694
695
696
697
698
699
700
701
702


     !clean MPI communicators and blacs grid
     !of the internal re-distributed matrix
     call mpi_comm_free(mpi_comm_rowsMPI_, mpierr)
     call mpi_comm_free(mpi_comm_colsMPI_, mpierr)
     call blacs_gridexit(blacs_ctxt_)
   endif
#endif /* REDISTRIBUTE_MATRIX */
703
704
705
#ifdef ACTIVATE_SKEW
   call obj%timer%stop("elpa_solve_skew_evp_&
#else
706
   call obj%timer%stop("elpa_solve_evp_&
707
#endif
708
709
710
   &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
711
   &")
712
713
714
end function