elpa1_template.F90 21.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#if 0
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
#endif

55
#include "../general/sanity.F90"
56
#include "../general/error_checking.inc"
57
58
59

function elpa_solve_evp_&
         &MATH_DATATYPE&
Pavel Kus's avatar
Pavel Kus committed
60
61
   &_1stage_&
   &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
62
63
64
65
66
67
68
69
70
71
72
73
   &_impl (obj, &
#ifdef REDISTRIBUTE_MATRIX
   aExtern, &
#else
   a, &
#endif
   ev, &
#ifdef REDISTRIBUTE_MATRIX
   qExtern) result(success)
#else
   q) result(success)
#endif
74
75
76
   use precision
   use cuda_functions
   use mod_check_for_gpu
77
   use, intrinsic :: iso_c_binding
78
   use elpa_abstract_impl
79
80
   use elpa_mpi
   use elpa1_compute
81
   use elpa_omp
Andreas Marek's avatar
Andreas Marek committed
82
83
84
#ifdef REDISTRIBUTE_MATRIX
   use elpa_scalapack_interfaces
#endif
85
   use solve_tridi
86
   use thread_affinity
87
   implicit none
Pavel Kus's avatar
Pavel Kus committed
88
#include "../general/precision_kinds.F90"
Andreas Marek's avatar
Andreas Marek committed
89
90
91
92
   class(elpa_abstract_impl_t), intent(inout)                         :: obj
   real(kind=REAL_DATATYPE), intent(out)                              :: ev(obj%na)

#ifdef REDISTRIBUTE_MATRIX
Pavel Kus's avatar
Pavel Kus committed
93

94
#ifdef USE_ASSUMED_SIZE
Andreas Marek's avatar
Andreas Marek committed
95
96
   MATH_DATATYPE(kind=rck), intent(inout), target                     :: aExtern(obj%local_nrows,*)
   MATH_DATATYPE(kind=rck), optional,target,intent(out)               :: qExtern(obj%local_nrows,*)
97
#else
Andreas Marek's avatar
Andreas Marek committed
98
99
100
101
102
103
   MATH_DATATYPE(kind=rck), intent(inout), target                     :: aExtern(obj%local_nrows,obj%local_ncols)
#ifdef HAVE_SKEWSYMMETRIC
   MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: qExtern(obj%local_nrows,2*obj%local_ncols)
#else
   MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: qExtern(obj%local_nrows,obj%local_ncols)
#endif
104
#endif /* USE_ASSUMED_SIZE */
Andreas Marek's avatar
Andreas Marek committed
105
106
107
108
109
110
111
112

#else /* REDISTRIBUTE_MATRIX */

#ifdef USE_ASSUMED_SIZE
   MATH_DATATYPE(kind=rck), intent(inout), target                     :: a(obj%local_nrows,*)
   MATH_DATATYPE(kind=rck), optional,target,intent(out)               :: q(obj%local_nrows,*)
#else
   MATH_DATATYPE(kind=rck), intent(inout), target                     :: a(obj%local_nrows,obj%local_ncols)
113
#ifdef HAVE_SKEWSYMMETRIC
Carolin Penke's avatar
Carolin Penke committed
114
115
116
117
   MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,2*obj%local_ncols)
#else
   MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,obj%local_ncols)
#endif
118
119
#endif /* USE_ASSUMED_SIZE */

Andreas Marek's avatar
Andreas Marek committed
120
121
#endif /* REDISTRIBUTE_MATRIX */

122
#ifdef REDISTRIBUTE_MATRIX
Andreas Marek's avatar
Andreas Marek committed
123
124
    MATH_DATATYPE(kind=rck), pointer                                  :: a(:,:)
    MATH_DATATYPE(kind=rck), pointer                                  :: q(:,:)
125
#endif
Pavel Kus's avatar
Pavel Kus committed
126
127

#if REALCASE == 1
Andreas Marek's avatar
Andreas Marek committed
128
129
130
   real(kind=C_DATATYPE_KIND), allocatable           :: tau(:)
   real(kind=C_DATATYPE_KIND), allocatable, target   :: q_dummy(:,:)
   real(kind=C_DATATYPE_KIND), pointer               :: q_actual(:,:)
131
132
133
#endif /* REALCASE */

#if COMPLEXCASE == 1
Andreas Marek's avatar
Andreas Marek committed
134
135
   real(kind=REAL_DATATYPE), allocatable             :: q_real(:,:)
   complex(kind=C_DATATYPE_KIND), allocatable        :: tau(:)
Andreas Marek's avatar
Andreas Marek committed
136
   complex(kind=C_DATATYPE_KIND), allocatable,target :: q_dummy(:,:)
Andreas Marek's avatar
Andreas Marek committed
137
   complex(kind=C_DATATYPE_KIND), pointer            :: q_actual(:,:)
138
139
#endif /* COMPLEXCASE */

140

141
   integer(kind=c_int)                             :: l_cols, l_rows, l_cols_nev, np_rows, np_cols
142
   integer(kind=MPI_KIND)                          :: np_rowsMPI, np_colsMPI
143

144
   logical                                         :: useGPU
Carolin Penke's avatar
Carolin Penke committed
145
146
   integer(kind=c_int)                             :: skewsymmetric
   logical                                         :: isSkewsymmetric
147
148
   logical                                         :: success

149
150
   logical                                         :: do_useGPU, do_useGPU_tridiag, &
                                                      do_useGPU_solve_tridi, do_useGPU_trans_ev
151
152
   integer(kind=ik)                                :: numberOfGPUDevices

153
154
   integer(kind=c_int)                             :: my_pe, n_pes, my_prow, my_pcol
   integer(kind=MPI_KIND)                          :: mpierr, my_peMPI, n_pesMPI, my_prowMPI, my_pcolMPI
155
156
   real(kind=C_DATATYPE_KIND), allocatable         :: e(:)
   logical                                         :: wantDebug
157
   integer(kind=c_int)                             :: istat, debug, gpu
158
   character(200)                                  :: errorMessage
Andreas Marek's avatar
Andreas Marek committed
159
   integer(kind=ik)                                :: na, nev, nblk, matrixCols, &
Andreas Marek's avatar
Andreas Marek committed
160
                                                      mpi_comm_rows, mpi_comm_cols,        &
Andreas Marek's avatar
Andreas Marek committed
161
                                                      mpi_comm_all, check_pd, i, error, matrixRows
162
   real(kind=C_DATATYPE_KIND)                      :: thres_pd
Andreas Marek's avatar
Andreas Marek committed
163
164
165
166

#ifdef REDISTRIBUTE_MATRIX
   integer(kind=ik)                                :: nblkInternal, matrixOrder
   character(len=1)                                :: layoutInternal, layoutExternal
167
168
   integer(kind=c_int)                             :: external_blacs_ctxt
   integer(kind=BLAS_KIND)                         :: external_blacs_ctxt_
Andreas Marek's avatar
Andreas Marek committed
169
170
171
172
173
174
175
176
177
178
179
   integer(kind=BLAS_KIND)                         :: np_rows_, np_cols_, my_prow_, my_pcol_
   integer(kind=BLAS_KIND)                         :: np_rows__, np_cols__, my_prow__, my_pcol__
   integer(kind=BLAS_KIND)                         :: sc_desc_(1:9), sc_desc(1:9)
   integer(kind=BLAS_KIND)                         :: na_rows_, na_cols_, info_, blacs_ctxt_
   integer(kind=ik)                                :: mpi_comm_rows_, mpi_comm_cols_
   integer(kind=MPI_KIND)                          :: mpi_comm_rowsMPI_, mpi_comm_colsMPI_
   character(len=1), parameter                     :: matrixLayouts(2) = [ 'C', 'R' ]

   MATH_DATATYPE(kind=rck), allocatable, target               :: aIntern(:,:)
   MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable, target   :: qIntern(:,:)
#endif
180
181
   integer(kind=c_int)                             :: pinningInfo

Pavel Kus's avatar
Pavel Kus committed
182
   logical                                         :: do_tridiag, do_solve, do_trans_ev
183
   integer(kind=ik)                                :: nrThreads
Carolin Penke's avatar
Carolin Penke committed
184
   integer(kind=ik)                                :: global_index
Andreas Marek's avatar
Andreas Marek committed
185
186

   logical                                         :: reDistributeMatrix, doRedistributeMatrix
187

188
   call obj%timer%start("elpa_solve_evp_&
189
190
191
   &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
192
   &")
193

Andreas Marek's avatar
Andreas Marek committed
194
195
196
197
   reDistributeMatrix = .false.

   matrixRows = obj%local_nrows
   matrixCols = obj%local_ncols
198

Andreas Marek's avatar
Andreas Marek committed
199
200
201
202
203
   call obj%get("mpi_comm_parent", mpi_comm_all, error)
   if (error .ne. ELPA_OK) then
     print *,"Problem getting option. Aborting..."
     stop
   endif
204

Andreas Marek's avatar
Andreas Marek committed
205
206
207
   call mpi_comm_rank(int(mpi_comm_all,kind=MPI_KIND), my_peMPI, mpierr)
   my_pe = int(my_peMPI,kind=c_int)

208
#ifdef WITH_OPENMP_TRADITIONAL
209
210
211
212
213
   ! store the number of OpenMP threads used in the calling function
   ! restore this at the end of ELPA 2
   omp_threads_caller = omp_get_max_threads()

   ! check the number of threads that ELPA should use internally
Andreas Marek's avatar
Andreas Marek committed
214
215
   call obj%get("omp_threads",nrThreads,error)
   call omp_set_num_threads(nrThreads)
Andreas Marek's avatar
Andreas Marek committed
216
217
218
#else
   nrThreads = 1
#endif
219
220
221
#ifdef WITH_NVTX
   call nvtxRangePush("elpa1")
#endif
222
223
224
225
226
227
228
229
   call obj%get("output_pinning_information", pinningInfo, error)
   if (error .ne. ELPA_OK) then
     print *,"Problem setting option for debug. Aborting..."
     stop
   endif
   
   if (pinningInfo .eq. 1) then
     call init_thread_affinity(nrThreads)
Andreas Marek's avatar
Andreas Marek committed
230

231
232
233
234
     call check_thread_affinity()
     if (my_pe .eq. 0) call print_thread_affinity(my_pe)
     call cleanup_thread_affinity()
   endif
Andreas Marek's avatar
Andreas Marek committed
235
   success = .true.
Andreas Marek's avatar
Andreas Marek committed
236

237
#ifdef REDISTRIBUTE_MATRIX
Andreas Marek's avatar
Andreas Marek committed
238
   if (present(qExtern)) then
239
#else
Andreas Marek's avatar
Andreas Marek committed
240
   if (present(q)) then
241
#endif
242
     obj%eigenvalues_only = .false.
Andreas Marek's avatar
Andreas Marek committed
243
   else
244
     obj%eigenvalues_only = .true.
Andreas Marek's avatar
Andreas Marek committed
245
246
   endif

247
248
   na         = obj%na
   nev        = obj%nev
Andreas Marek's avatar
Andreas Marek committed
249
   matrixRows = obj%local_nrows
250
251
252
   nblk       = obj%nblk
   matrixCols = obj%local_ncols

Andreas Marek's avatar
Andreas Marek committed
253
254
255
256
257
258
259
260
261
262
263
264
   call obj%get("mpi_comm_rows",mpi_comm_rows,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem getting option. Aborting..."
     stop
   endif
   call obj%get("mpi_comm_cols",mpi_comm_cols,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem getting option. Aborting..."
     stop
   endif

#ifdef REDISTRIBUTE_MATRIX
Andreas Marek's avatar
Andreas Marek committed
265
#include "../helpers/elpa_redistribute_template.F90"
Andreas Marek's avatar
Andreas Marek committed
266
267
#endif /* REDISTRIBUTE_MATRIX */

Andreas Marek's avatar
Andreas Marek committed
268
269
270
271
272
273
274
   ! special case na = 1
   if (na .eq. 1) then
#if REALCASE == 1
     ev(1) = a(1,1)
#endif
#if COMPLEXCASE == 1
     ev(1) = real(a(1,1))
Pavel Kus's avatar
Pavel Kus committed
275
#endif
Andreas Marek's avatar
Andreas Marek committed
276
     if (.not.(obj%eigenvalues_only)) then
Pavel Kus's avatar
Pavel Kus committed
277
       q(1,1) = ONE
Andreas Marek's avatar
Andreas Marek committed
278
     endif
279
280

     ! restore original OpenMP settings
281
#ifdef WITH_OPENMP_TRADITIONAL
282
283
284
285
     ! store the number of OpenMP threads used in the calling function
     ! restore this at the end of ELPA 2
     call omp_set_num_threads(omp_threads_caller)
#endif
Andreas Marek's avatar
Andreas Marek committed
286
287
288
289
290
291
292
293
     call obj%timer%stop("elpa_solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &")
     return
   endif

294
295
296
297
298
   if (nev == 0) then
     nev = 1
     obj%eigenvalues_only = .true.
   endif

Andreas Marek's avatar
Andreas Marek committed
299
300
   call obj%get("gpu",gpu,error)
   if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
301
     print *,"Problem getting option for gpu. Aborting..."
Andreas Marek's avatar
Andreas Marek committed
302
303
     stop
   endif
304
   if (gpu .eq. 1) then
305
306
307
308
     useGPU =.true.
   else
     useGPU = .false.
   endif
309

Carolin Penke's avatar
Carolin Penke committed
310
311
   call obj%get("is_skewsymmetric",skewsymmetric,error)
   if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
312
     print *,"Problem getting option for skewsymmetric. Aborting..."
Carolin Penke's avatar
Carolin Penke committed
313
314
     stop
   endif
315

Carolin Penke's avatar
Carolin Penke committed
316
   isSkewsymmetric = (skewsymmetric == 1)
317

318
   call obj%timer%start("mpi_communication")
319

320
321
322
323
324
   call mpi_comm_rank(int(mpi_comm_rows,kind=MPI_KIND), my_prowMPI, mpierr)
   call mpi_comm_rank(int(mpi_comm_cols,kind=MPI_KIND), my_pcolMPI, mpierr)

   my_prow = int(my_prowMPI,kind=c_int)
   my_pcol = int(my_pcolMPI,kind=c_int)
325

326
327
   call mpi_comm_size(int(mpi_comm_rows,kind=MPI_KIND), np_rowsMPI, mpierr)
   call mpi_comm_size(int(mpi_comm_cols,kind=MPI_KIND), np_colsMPI, mpierr)
328
329
330

   np_rows = int(np_rowsMPI,kind=c_int)
   np_cols = int(np_colsMPI,kind=c_int)
331

332
   call obj%timer%stop("mpi_communication")
333

Andreas Marek's avatar
Andreas Marek committed
334
335
   call obj%get("debug", debug,error)
   if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
336
     print *,"Problem setting option for debug. Aborting..."
Andreas Marek's avatar
Andreas Marek committed
337
338
     stop
   endif
339
   wantDebug = debug == 1
340
   do_useGPU = .false.
341

342

343
   if (useGPU) then
344
     call obj%timer%start("check_for_gpu")
345

346
     if (check_for_gpu(obj, my_pe, numberOfGPUDevices, wantDebug=wantDebug)) then
347
348
349
350
351
352
353
354
355
356
357
358
       do_useGPU = .true.
       ! set the neccessary parameters
       cudaMemcpyHostToDevice   = cuda_memcpyHostToDevice()
       cudaMemcpyDeviceToHost   = cuda_memcpyDeviceToHost()
       cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice()
       cudaHostRegisterPortable = cuda_hostRegisterPortable()
       cudaHostRegisterMapped   = cuda_hostRegisterMapped()
     else
       print *,"GPUs are requested but not detected! Aborting..."
       success = .false.
       return
     endif
359
     call obj%timer%stop("check_for_gpu")
360
   endif
Andreas Marek's avatar
Andreas Marek committed
361

362

363
364
365
366
367
368
369
370
371
372
   do_useGPU_tridiag = do_useGPU
   do_useGPU_solve_tridi = do_useGPU
   do_useGPU_trans_ev = do_useGPU
   ! only if we want (and can) use GPU in general, look what are the
   ! requirements for individual routines. Implicitly they are all set to 1, so
   ! unles specified otherwise by the user, GPU versions of all individual
   ! routines should be used
   if(do_useGPU) then
     call obj%get("gpu_tridiag", gpu, error)
     if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
373
       print *,"Problem getting option for gpu_tridiag. Aborting..."
374
375
376
377
378
379
       stop
     endif
     do_useGPU_tridiag = (gpu == 1)

     call obj%get("gpu_solve_tridi", gpu, error)
     if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
380
       print *,"Problem getting option for gpu_solve_tridi. Aborting..."
381
382
383
384
385
386
       stop
     endif
     do_useGPU_solve_tridi = (gpu == 1)

     call obj%get("gpu_trans_ev", gpu, error)
     if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
387
       print *,"Problem getting option for gpu_trans_ev. Aborting..."
388
389
390
391
392
       stop
     endif
     do_useGPU_trans_ev = (gpu == 1)
   endif
   ! for elpa1 the easy thing is, that the individual phases of the algorithm
393
   ! do not share any data on the GPU.
394
395


Andreas Marek's avatar
Andreas Marek committed
396
   ! allocate a dummy q_intern, if eigenvectors should not be commputed and thus q is NOT present
397
   if (.not.(obj%eigenvalues_only)) then
Andreas Marek's avatar
Andreas Marek committed
398
     q_actual => q(1:matrixRows,1:matrixCols)
Andreas Marek's avatar
Andreas Marek committed
399
   else
400
401
     allocate(q_dummy(1:matrixRows,1:matrixCols), stat=istat, errmsg=errorMessage)
     check_allocate("elpa1_template: q_dummy", istat, errorMessage)
Andreas Marek's avatar
Andreas Marek committed
402
403
404
     q_actual => q_dummy
   endif

405
406
407
408
409
410
411
#if COMPLEXCASE == 1
   l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a and q
   l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of q

   l_cols_nev = local_index(nev, my_pcol, np_cols, nblk, -1) ! Local columns corresponding to nev

   allocate(q_real(l_rows,l_cols), stat=istat, errmsg=errorMessage)
412
   check_allocate("elpa1_template: q_real", istat, errorMessage)
413
414
#endif
   allocate(e(na), tau(na), stat=istat, errmsg=errorMessage)
415
   check_allocate("elpa1_template: e, tau", istat, errorMessage)
416

417
418
   ! start the computations
   ! as default do all three steps (this might change at some point)
Pavel Kus's avatar
Pavel Kus committed
419
   do_tridiag  = .true.
420
421
422
   do_solve    = .true.
   do_trans_ev = .true.

Pavel Kus's avatar
Pavel Kus committed
423
   if (do_tridiag) then
424
     call obj%timer%start("forward")
Pavel Kus's avatar
Pavel Kus committed
425
426
427
#ifdef HAVE_LIKWID
     call likwid_markerStartRegion("tridi")
#endif
428
429
430
#ifdef WITH_NVTX
     call nvtxRangePush("tridi")
#endif
Pavel Kus's avatar
Pavel Kus committed
431

432
433
434
435
     call tridiag_&
     &MATH_DATATYPE&
     &_&
     &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
436
     & (obj, na, a, matrixRows, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU_tridiag, wantDebug, nrThreads)
Pavel Kus's avatar
Pavel Kus committed
437

438
439
440
#ifdef WITH_NVTX
     call nvtxRangePop()
#endif
Pavel Kus's avatar
Pavel Kus committed
441
442
443
#ifdef HAVE_LIKWID
     call likwid_markerStopRegion("tridi")
#endif
444
     call obj%timer%stop("forward")
Pavel Kus's avatar
Pavel Kus committed
445
    endif  !do_tridiag
446
447
448

    if (do_solve) then
     call obj%timer%start("solve")
Pavel Kus's avatar
Pavel Kus committed
449
450
451
#ifdef HAVE_LIKWID
     call likwid_markerStartRegion("solve")
#endif
452
453
454
#ifdef WITH_NVTX
     call nvtxRangePush("solve")
#endif
455
456
457
     call solve_tridi_&
     &PRECISION&
     & (obj, na, nev, ev, e,  &
458
#if REALCASE == 1
Andreas Marek's avatar
Andreas Marek committed
459
        q_actual, matrixRows,          &
460
461
#endif
#if COMPLEXCASE == 1
462
        q_real, l_rows,  &
463
#endif
464
465
        nblk, matrixCols, mpi_comm_all, mpi_comm_rows, mpi_comm_cols, do_useGPU_solve_tridi, wantDebug, &
                success, nrThreads)
Pavel Kus's avatar
Pavel Kus committed
466

467
468
469
#ifdef WITH_NVTX
     call nvtxRangePop()
#endif
Pavel Kus's avatar
Pavel Kus committed
470
471
472
#ifdef HAVE_LIKWID
     call likwid_markerStopRegion("solve")
#endif
473
474
475
     call obj%timer%stop("solve")
     if (.not.(success)) return
   endif !do_solve
476

Andreas Marek's avatar
Andreas Marek committed
477
478
479
   if (obj%eigenvalues_only) then
     do_trans_ev = .false.
   else
Andreas Marek's avatar
Andreas Marek committed
480
481
     call obj%get("check_pd",check_pd,error)
     if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
482
       print *,"Problem setting option for check_pd. Aborting..."
Andreas Marek's avatar
Andreas Marek committed
483
484
       stop
     endif
Andreas Marek's avatar
Andreas Marek committed
485
     if (check_pd .eq. 1) then
Wenzhe Yu's avatar
Wenzhe Yu committed
486
487
488
       call obj%get("thres_pd_&
       &PRECISION&
       &",thres_pd,error)
489
       if (error .ne. ELPA_OK) then
Wenzhe Yu's avatar
Wenzhe Yu committed
490
491
492
          print *,"Problem getting option for thres_pd_&
          &PRECISION&
          &. Aborting..."
493
494
495
          stop
       endif

Andreas Marek's avatar
Andreas Marek committed
496
497
       check_pd = 0
       do i = 1, na
498
         if (ev(i) .gt. thres_pd) then
Andreas Marek's avatar
Andreas Marek committed
499
500
501
502
503
504
505
506
507
508
509
510
           check_pd = check_pd + 1
         endif
       enddo
       if (check_pd .lt. na) then
         ! not positiv definite => eigenvectors needed
         do_trans_ev = .true.
       else
         do_trans_ev = .false.
       endif
     endif ! check_pd
   endif ! eigenvalues_only

511
   if (do_trans_ev) then
Andreas Marek's avatar
Andreas Marek committed
512
    ! q must be given thats why from here on we can use q and not q_actual
513
#if COMPLEXCASE == 1
514
     q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
515
#endif
Carolin Penke's avatar
Carolin Penke committed
516
517
     if (isSkewsymmetric) then
     ! Extra transformation step for skew-symmetric matrix. Multiplication with diagonal complex matrix D.
518
     ! This makes the eigenvectors complex.
Carolin Penke's avatar
Carolin Penke committed
519
     ! For now real part of eigenvectors is generated in first half of q, imaginary part in second part.
Andreas Marek's avatar
Andreas Marek committed
520
521
       q(1:matrixRows, matrixCols+1:2*matrixCols) = 0.0
       do i = 1, matrixRows
Carolin Penke's avatar
Carolin Penke committed
522
523
524
525
526
527
!        global_index = indxl2g(i, nblk, my_prow, 0, np_rows)
         global_index = np_rows*nblk*((i-1)/nblk) + MOD(i-1,nblk) + MOD(np_rows+my_prow-0, np_rows)*nblk + 1
         if (mod(global_index-1,4) .eq. 0) then
            ! do nothing
         end if
         if (mod(global_index-1,4) .eq. 1) then
Andreas Marek's avatar
Andreas Marek committed
528
529
            q(i,matrixCols+1:2*matrixCols) = q(i,1:matrixCols)
            q(i,1:matrixCols) = 0
Carolin Penke's avatar
Carolin Penke committed
530
531
         end if
         if (mod(global_index-1,4) .eq. 2) then
Andreas Marek's avatar
Andreas Marek committed
532
            q(i,1:matrixCols) = -q(i,1:matrixCols)
Carolin Penke's avatar
Carolin Penke committed
533
534
         end if
         if (mod(global_index-1,4) .eq. 3) then
Andreas Marek's avatar
Andreas Marek committed
535
536
            q(i,matrixCols+1:2*matrixCols) = -q(i,1:matrixCols)
            q(i,1:matrixCols) = 0
Carolin Penke's avatar
Carolin Penke committed
537
         end if
538
       end do
Carolin Penke's avatar
Carolin Penke committed
539
     endif
540

Andreas Marek's avatar
Andreas Marek committed
541
     call obj%timer%start("back")
Pavel Kus's avatar
Pavel Kus committed
542
543
544
#ifdef HAVE_LIKWID
     call likwid_markerStartRegion("trans_ev")
#endif
545
546
547
#ifdef WITH_NVTX
     call nvtxRangePush("trans_ev")
#endif
Carolin Penke's avatar
Carolin Penke committed
548
     ! In the skew-symmetric case this transforms the real part
Andreas Marek's avatar
Andreas Marek committed
549
     call trans_ev_&
550
     &MATH_DATATYPE&
Andreas Marek's avatar
Andreas Marek committed
551
     &_&
552
     &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
553
     & (obj, na, nev, a, matrixRows, tau, q, matrixRows, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev)
Carolin Penke's avatar
Carolin Penke committed
554
555
556
557
     if (isSkewsymmetric) then
       ! Transform imaginary part
       ! Transformation of real and imaginary part could also be one call of trans_ev_tridi acting on the n x 2n matrix.
       call trans_ev_&
Andreas Marek's avatar
Andreas Marek committed
558
559
560
             &MATH_DATATYPE&
             &_&
             &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
561
             & (obj, na, nev, a, matrixRows, tau, q(1:matrixRows, matrixCols+1:2*matrixCols), matrixRows, nblk, matrixCols, &
Andreas Marek's avatar
Andreas Marek committed
562
                mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev)
Carolin Penke's avatar
Carolin Penke committed
563
       endif
Pavel Kus's avatar
Pavel Kus committed
564

565
566
567
#ifdef WITH_NVTX
     call nvtxRangePop()
#endif
Pavel Kus's avatar
Pavel Kus committed
568
569
570
#ifdef HAVE_LIKWID
     call likwid_markerStopRegion("trans_ev")
#endif
Andreas Marek's avatar
Andreas Marek committed
571
     call obj%timer%stop("back")
572
   endif ! do_trans_ev
Andreas Marek's avatar
Andreas Marek committed
573
574
575

#if COMPLEXCASE == 1
    deallocate(q_real, stat=istat, errmsg=errorMessage)
576
    check_deallocate("elpa1_template: q_real", istat, errorMessage)
577
#endif
Andreas Marek's avatar
Andreas Marek committed
578

579
   deallocate(e, tau, stat=istat, errmsg=errorMessage)
580
   check_deallocate("elpa1_template: e, tau", istat, errorMessage)
581

582
   if (obj%eigenvalues_only) then
Andreas Marek's avatar
Andreas Marek committed
583
     deallocate(q_dummy, stat=istat, errmsg=errorMessage)
584
     check_deallocate("elpa1_template: q_dummy", istat, errorMessage)
Andreas Marek's avatar
Andreas Marek committed
585
586
   endif

587
588
589
#ifdef WITH_NVTX
   call nvtxRangePop()
#endif
590
   ! restore original OpenMP settings
591
#ifdef WITH_OPENMP_TRADITIONAL
592
593
594
595
596
   ! store the number of OpenMP threads used in the calling function
   ! restore this at the end of ELPA 2
   call omp_set_num_threads(omp_threads_caller)
#endif

Andreas Marek's avatar
Andreas Marek committed
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
#ifdef REDISTRIBUTE_MATRIX
   ! redistribute back if necessary
   if (doRedistributeMatrix) then

     !if (layoutInternal /= layoutExternal) then
     !  ! maybe this can be skiped I now the process grid
     !  ! and np_rows and np_cols

     !  call obj%get("mpi_comm_rows",mpi_comm_rows,error)
     !  call mpi_comm_size(int(mpi_comm_rows,kind=MPI_KIND), np_rowsMPI, mpierr)
     !  call obj%get("mpi_comm_cols",mpi_comm_cols,error)
     !  call mpi_comm_size(int(mpi_comm_cols,kind=MPI_KIND), np_colsMPI, mpierr)

     !  np_rows = int(np_rowsMPI,kind=c_int)
     !  np_cols = int(np_colsMPI,kind=c_int)

     !  ! we get new blacs context and the local process grid coordinates
     !  call BLACS_Gridinit(external_blacs_ctxt, layoutInternal, int(np_rows,kind=BLAS_KIND), int(np_cols,kind=BLAS_KIND))
     !  call BLACS_Gridinfo(int(external_blacs_ctxt,KIND=BLAS_KIND), np_rows__, &
     !                      np_cols__, my_prow__, my_pcol__)

     !endif

     !call scal_PRECISION_GEMR2D &
     !(int(na,kind=BLAS_KIND), int(na,kind=BLAS_KIND), aIntern, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc_, aExtern, &
     !1_BLAS_KIND, 1_BLAS_KIND, sc_desc, external_blacs_ctxt)

     call scal_PRECISION_GEMR2D &
     (int(na,kind=BLAS_KIND), int(na,kind=BLAS_KIND), qIntern, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc_, qExtern, &
626
     1_BLAS_KIND, 1_BLAS_KIND, sc_desc, int(external_blacs_ctxt,kind=BLAS_KIND))
Andreas Marek's avatar
Andreas Marek committed
627
628
629
630
631
632
633
634
635


     !clean MPI communicators and blacs grid
     !of the internal re-distributed matrix
     call mpi_comm_free(mpi_comm_rowsMPI_, mpierr)
     call mpi_comm_free(mpi_comm_colsMPI_, mpierr)
     call blacs_gridexit(blacs_ctxt_)
   endif
#endif /* REDISTRIBUTE_MATRIX */
636
   call obj%timer%stop("elpa_solve_evp_&
637
638
639
   &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
640
   &")
641
642
643
end function