elpa1_template.F90 21 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
#if 0
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
#endif

55
#include "../general/sanity.F90"
56
#include "../general/error_checking.inc"
57 58 59

function elpa_solve_evp_&
         &MATH_DATATYPE&
Pavel Kus's avatar
Pavel Kus committed
60 61
   &_1stage_&
   &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
62 63 64 65 66 67 68 69 70 71 72 73
   &_impl (obj, &
#ifdef REDISTRIBUTE_MATRIX
   aExtern, &
#else
   a, &
#endif
   ev, &
#ifdef REDISTRIBUTE_MATRIX
   qExtern) result(success)
#else
   q) result(success)
#endif
74 75 76 77
   use precision
   use cuda_functions
   use mod_check_for_gpu
   use iso_c_binding
78
   use elpa_abstract_impl
79 80
   use elpa_mpi
   use elpa1_compute
81
   use elpa_omp
Andreas Marek's avatar
Andreas Marek committed
82 83 84
#ifdef REDISTRIBUTE_MATRIX
   use elpa_scalapack_interfaces
#endif
85

86
   implicit none
Pavel Kus's avatar
Pavel Kus committed
87
#include "../general/precision_kinds.F90"
Andreas Marek's avatar
Andreas Marek committed
88 89 90 91
   class(elpa_abstract_impl_t), intent(inout)                         :: obj
   real(kind=REAL_DATATYPE), intent(out)                              :: ev(obj%na)

#ifdef REDISTRIBUTE_MATRIX
Pavel Kus's avatar
Pavel Kus committed
92

93
#ifdef USE_ASSUMED_SIZE
Andreas Marek's avatar
Andreas Marek committed
94 95
   MATH_DATATYPE(kind=rck), intent(inout), target                     :: aExtern(obj%local_nrows,*)
   MATH_DATATYPE(kind=rck), optional,target,intent(out)               :: qExtern(obj%local_nrows,*)
96
#else
Andreas Marek's avatar
Andreas Marek committed
97 98 99 100 101 102
   MATH_DATATYPE(kind=rck), intent(inout), target                     :: aExtern(obj%local_nrows,obj%local_ncols)
#ifdef HAVE_SKEWSYMMETRIC
   MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: qExtern(obj%local_nrows,2*obj%local_ncols)
#else
   MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: qExtern(obj%local_nrows,obj%local_ncols)
#endif
103
#endif /* USE_ASSUMED_SIZE */
Andreas Marek's avatar
Andreas Marek committed
104 105 106 107 108 109 110 111

#else /* REDISTRIBUTE_MATRIX */

#ifdef USE_ASSUMED_SIZE
   MATH_DATATYPE(kind=rck), intent(inout), target                     :: a(obj%local_nrows,*)
   MATH_DATATYPE(kind=rck), optional,target,intent(out)               :: q(obj%local_nrows,*)
#else
   MATH_DATATYPE(kind=rck), intent(inout), target                     :: a(obj%local_nrows,obj%local_ncols)
112
#ifdef HAVE_SKEWSYMMETRIC
Carolin Penke's avatar
Carolin Penke committed
113 114 115 116
   MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,2*obj%local_ncols)
#else
   MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,obj%local_ncols)
#endif
117 118
#endif /* USE_ASSUMED_SIZE */

Andreas Marek's avatar
Andreas Marek committed
119 120
#endif /* REDISTRIBUTE_MATRIX */

121
#ifdef REDISTRIBUTE_MATRIX
Andreas Marek's avatar
Andreas Marek committed
122 123
    MATH_DATATYPE(kind=rck), pointer                                  :: a(:,:)
    MATH_DATATYPE(kind=rck), pointer                                  :: q(:,:)
124
#endif
Pavel Kus's avatar
Pavel Kus committed
125 126

#if REALCASE == 1
Andreas Marek's avatar
Andreas Marek committed
127 128 129
   real(kind=C_DATATYPE_KIND), allocatable           :: tau(:)
   real(kind=C_DATATYPE_KIND), allocatable, target   :: q_dummy(:,:)
   real(kind=C_DATATYPE_KIND), pointer               :: q_actual(:,:)
130 131 132
#endif /* REALCASE */

#if COMPLEXCASE == 1
Andreas Marek's avatar
Andreas Marek committed
133 134
   real(kind=REAL_DATATYPE), allocatable             :: q_real(:,:)
   complex(kind=C_DATATYPE_KIND), allocatable        :: tau(:)
Andreas Marek's avatar
Andreas Marek committed
135
   complex(kind=C_DATATYPE_KIND), allocatable,target :: q_dummy(:,:)
Andreas Marek's avatar
Andreas Marek committed
136
   complex(kind=C_DATATYPE_KIND), pointer            :: q_actual(:,:)
137 138
#endif /* COMPLEXCASE */

139

140
   integer(kind=c_int)                             :: l_cols, l_rows, l_cols_nev, np_rows, np_cols
141
   integer(kind=MPI_KIND)                          :: np_rowsMPI, np_colsMPI
142

143
   logical                                         :: useGPU
Carolin Penke's avatar
Carolin Penke committed
144 145
   integer(kind=c_int)                             :: skewsymmetric
   logical                                         :: isSkewsymmetric
146 147
   logical                                         :: success

148 149
   logical                                         :: do_useGPU, do_useGPU_tridiag, &
                                                      do_useGPU_solve_tridi, do_useGPU_trans_ev
150 151
   integer(kind=ik)                                :: numberOfGPUDevices

152 153
   integer(kind=c_int)                             :: my_pe, n_pes, my_prow, my_pcol
   integer(kind=MPI_KIND)                          :: mpierr, my_peMPI, n_pesMPI, my_prowMPI, my_pcolMPI
154 155
   real(kind=C_DATATYPE_KIND), allocatable         :: e(:)
   logical                                         :: wantDebug
156
   integer(kind=c_int)                             :: istat, debug, gpu
157
   character(200)                                  :: errorMessage
Andreas Marek's avatar
Andreas Marek committed
158
   integer(kind=ik)                                :: na, nev, nblk, matrixCols, &
Andreas Marek's avatar
Andreas Marek committed
159
                                                      mpi_comm_rows, mpi_comm_cols,        &
Andreas Marek's avatar
Andreas Marek committed
160 161 162 163 164
                                                      mpi_comm_all, check_pd, i, error, matrixRows

#ifdef REDISTRIBUTE_MATRIX
   integer(kind=ik)                                :: nblkInternal, matrixOrder
   character(len=1)                                :: layoutInternal, layoutExternal
165 166
   integer(kind=c_int)                             :: external_blacs_ctxt
   integer(kind=BLAS_KIND)                         :: external_blacs_ctxt_
Andreas Marek's avatar
Andreas Marek committed
167 168 169 170 171 172 173 174 175 176 177
   integer(kind=BLAS_KIND)                         :: np_rows_, np_cols_, my_prow_, my_pcol_
   integer(kind=BLAS_KIND)                         :: np_rows__, np_cols__, my_prow__, my_pcol__
   integer(kind=BLAS_KIND)                         :: sc_desc_(1:9), sc_desc(1:9)
   integer(kind=BLAS_KIND)                         :: na_rows_, na_cols_, info_, blacs_ctxt_
   integer(kind=ik)                                :: mpi_comm_rows_, mpi_comm_cols_
   integer(kind=MPI_KIND)                          :: mpi_comm_rowsMPI_, mpi_comm_colsMPI_
   character(len=1), parameter                     :: matrixLayouts(2) = [ 'C', 'R' ]

   MATH_DATATYPE(kind=rck), allocatable, target               :: aIntern(:,:)
   MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable, target   :: qIntern(:,:)
#endif
Pavel Kus's avatar
Pavel Kus committed
178
   logical                                         :: do_tridiag, do_solve, do_trans_ev
179
   integer(kind=ik)                                :: nrThreads
Carolin Penke's avatar
Carolin Penke committed
180
   integer(kind=ik)                                :: global_index
Andreas Marek's avatar
Andreas Marek committed
181 182

   logical                                         :: reDistributeMatrix, doRedistributeMatrix
Carolin Penke's avatar
Carolin Penke committed
183
   
184
   call obj%timer%start("elpa_solve_evp_&
185 186 187
   &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
188
   &")
189

Andreas Marek's avatar
Andreas Marek committed
190 191 192 193 194 195 196 197 198 199
   reDistributeMatrix = .false.

   matrixRows = obj%local_nrows
   matrixCols = obj%local_ncols
   
   call obj%get("mpi_comm_parent", mpi_comm_all, error)
   if (error .ne. ELPA_OK) then
     print *,"Problem getting option. Aborting..."
     stop
   endif
200

Andreas Marek's avatar
Andreas Marek committed
201 202 203
   call mpi_comm_rank(int(mpi_comm_all,kind=MPI_KIND), my_peMPI, mpierr)
   my_pe = int(my_peMPI,kind=c_int)

Andreas Marek's avatar
Andreas Marek committed
204
#ifdef WITH_OPENMP
205 206 207 208 209
   ! store the number of OpenMP threads used in the calling function
   ! restore this at the end of ELPA 2
   omp_threads_caller = omp_get_max_threads()

   ! check the number of threads that ELPA should use internally
Andreas Marek's avatar
Andreas Marek committed
210 211
   call obj%get("omp_threads",nrThreads,error)
   call omp_set_num_threads(nrThreads)
Andreas Marek's avatar
Andreas Marek committed
212 213 214
#else
   nrThreads = 1
#endif
215 216 217
#ifdef WITH_NVTX
   call nvtxRangePush("elpa1")
#endif
Andreas Marek's avatar
Andreas Marek committed
218 219


Andreas Marek's avatar
Andreas Marek committed
220
   success = .true.
Andreas Marek's avatar
Andreas Marek committed
221

222
#ifdef REDISTRIBUTE_MATRIX
Andreas Marek's avatar
Andreas Marek committed
223
   if (present(qExtern)) then
224
#else
Andreas Marek's avatar
Andreas Marek committed
225
   if (present(q)) then
226
#endif
227
     obj%eigenvalues_only = .false.
Andreas Marek's avatar
Andreas Marek committed
228
   else
229
     obj%eigenvalues_only = .true.
Andreas Marek's avatar
Andreas Marek committed
230 231
   endif

232 233
   na         = obj%na
   nev        = obj%nev
Andreas Marek's avatar
Andreas Marek committed
234
   matrixRows = obj%local_nrows
235 236 237
   nblk       = obj%nblk
   matrixCols = obj%local_ncols

Andreas Marek's avatar
Andreas Marek committed
238 239 240 241 242 243 244 245 246 247 248 249
   call obj%get("mpi_comm_rows",mpi_comm_rows,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem getting option. Aborting..."
     stop
   endif
   call obj%get("mpi_comm_cols",mpi_comm_cols,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem getting option. Aborting..."
     stop
   endif

#ifdef REDISTRIBUTE_MATRIX
Andreas Marek's avatar
Andreas Marek committed
250
#include "../helpers/elpa_redistribute_template.F90"
Andreas Marek's avatar
Andreas Marek committed
251 252
#endif /* REDISTRIBUTE_MATRIX */

Andreas Marek's avatar
Andreas Marek committed
253 254 255 256 257 258 259
   ! special case na = 1
   if (na .eq. 1) then
#if REALCASE == 1
     ev(1) = a(1,1)
#endif
#if COMPLEXCASE == 1
     ev(1) = real(a(1,1))
Pavel Kus's avatar
Pavel Kus committed
260
#endif
Andreas Marek's avatar
Andreas Marek committed
261
     if (.not.(obj%eigenvalues_only)) then
Pavel Kus's avatar
Pavel Kus committed
262
       q(1,1) = ONE
Andreas Marek's avatar
Andreas Marek committed
263
     endif
264 265 266 267 268 269 270

     ! restore original OpenMP settings
#ifdef WITH_OPENMP
     ! store the number of OpenMP threads used in the calling function
     ! restore this at the end of ELPA 2
     call omp_set_num_threads(omp_threads_caller)
#endif
Andreas Marek's avatar
Andreas Marek committed
271 272 273 274 275 276 277 278
     call obj%timer%stop("elpa_solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &")
     return
   endif

279 280 281 282 283
   if (nev == 0) then
     nev = 1
     obj%eigenvalues_only = .true.
   endif

Andreas Marek's avatar
Andreas Marek committed
284 285
   call obj%get("gpu",gpu,error)
   if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
286
     print *,"Problem getting option for gpu. Aborting..."
Andreas Marek's avatar
Andreas Marek committed
287 288
     stop
   endif
289
   if (gpu .eq. 1) then
290 291 292 293
     useGPU =.true.
   else
     useGPU = .false.
   endif
Carolin Penke's avatar
Carolin Penke committed
294 295 296
   
   call obj%get("is_skewsymmetric",skewsymmetric,error)
   if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
297
     print *,"Problem getting option for skewsymmetric. Aborting..."
Carolin Penke's avatar
Carolin Penke committed
298 299 300 301 302
     stop
   endif
    
   isSkewsymmetric = (skewsymmetric == 1)
   
303
   call obj%timer%start("mpi_communication")
304

305 306 307 308 309
   call mpi_comm_rank(int(mpi_comm_rows,kind=MPI_KIND), my_prowMPI, mpierr)
   call mpi_comm_rank(int(mpi_comm_cols,kind=MPI_KIND), my_pcolMPI, mpierr)

   my_prow = int(my_prowMPI,kind=c_int)
   my_pcol = int(my_pcolMPI,kind=c_int)
310

311 312
   call mpi_comm_size(int(mpi_comm_rows,kind=MPI_KIND), np_rowsMPI, mpierr)
   call mpi_comm_size(int(mpi_comm_cols,kind=MPI_KIND), np_colsMPI, mpierr)
313 314 315

   np_rows = int(np_rowsMPI,kind=c_int)
   np_cols = int(np_colsMPI,kind=c_int)
316

317
   call obj%timer%stop("mpi_communication")
318

Andreas Marek's avatar
Andreas Marek committed
319 320
   call obj%get("debug", debug,error)
   if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
321
     print *,"Problem setting option for debug. Aborting..."
Andreas Marek's avatar
Andreas Marek committed
322 323
     stop
   endif
324
   wantDebug = debug == 1
325
   do_useGPU = .false.
326

327
   
328
   if (useGPU) then
329
     call obj%timer%start("check_for_gpu")
330

331 332 333 334 335 336 337 338 339 340 341 342 343
     if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then
       do_useGPU = .true.
       ! set the neccessary parameters
       cudaMemcpyHostToDevice   = cuda_memcpyHostToDevice()
       cudaMemcpyDeviceToHost   = cuda_memcpyDeviceToHost()
       cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice()
       cudaHostRegisterPortable = cuda_hostRegisterPortable()
       cudaHostRegisterMapped   = cuda_hostRegisterMapped()
     else
       print *,"GPUs are requested but not detected! Aborting..."
       success = .false.
       return
     endif
344
     call obj%timer%stop("check_for_gpu")
345
   endif
Andreas Marek's avatar
Andreas Marek committed
346

347

348 349 350 351 352 353 354 355 356 357
   do_useGPU_tridiag = do_useGPU
   do_useGPU_solve_tridi = do_useGPU
   do_useGPU_trans_ev = do_useGPU
   ! only if we want (and can) use GPU in general, look what are the
   ! requirements for individual routines. Implicitly they are all set to 1, so
   ! unles specified otherwise by the user, GPU versions of all individual
   ! routines should be used
   if(do_useGPU) then
     call obj%get("gpu_tridiag", gpu, error)
     if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
358
       print *,"Problem getting option for gpu_tridiag. Aborting..."
359 360 361 362 363 364
       stop
     endif
     do_useGPU_tridiag = (gpu == 1)

     call obj%get("gpu_solve_tridi", gpu, error)
     if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
365
       print *,"Problem getting option for gpu_solve_tridi. Aborting..."
366 367 368 369 370 371
       stop
     endif
     do_useGPU_solve_tridi = (gpu == 1)

     call obj%get("gpu_trans_ev", gpu, error)
     if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
372
       print *,"Problem getting option for gpu_trans_ev. Aborting..."
373 374 375 376 377 378 379 380
       stop
     endif
     do_useGPU_trans_ev = (gpu == 1)
   endif
   ! for elpa1 the easy thing is, that the individual phases of the algorithm
   ! do not share any data on the GPU. 


Andreas Marek's avatar
Andreas Marek committed
381
   ! allocate a dummy q_intern, if eigenvectors should not be commputed and thus q is NOT present
382
   if (.not.(obj%eigenvalues_only)) then
Andreas Marek's avatar
Andreas Marek committed
383
     q_actual => q(1:matrixRows,1:matrixCols)
Andreas Marek's avatar
Andreas Marek committed
384
   else
385 386
     allocate(q_dummy(1:matrixRows,1:matrixCols), stat=istat, errmsg=errorMessage)
     check_allocate("elpa1_template: q_dummy", istat, errorMessage)
Andreas Marek's avatar
Andreas Marek committed
387 388 389
     q_actual => q_dummy
   endif

Andreas Marek's avatar
Andreas Marek committed
390 391 392 393
   ! test only
   l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of q
   l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a and q

394 395 396 397 398 399 400
#if COMPLEXCASE == 1
   l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a and q
   l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of q

   l_cols_nev = local_index(nev, my_pcol, np_cols, nblk, -1) ! Local columns corresponding to nev

   allocate(q_real(l_rows,l_cols), stat=istat, errmsg=errorMessage)
401
   check_allocate("elpa1_template: q_real", istat, errorMessage)
402 403
#endif
   allocate(e(na), tau(na), stat=istat, errmsg=errorMessage)
404
   check_allocate("elpa1_template: e, tau", istat, errorMessage)
405

406 407
   ! start the computations
   ! as default do all three steps (this might change at some point)
Pavel Kus's avatar
Pavel Kus committed
408
   do_tridiag  = .true.
409 410 411
   do_solve    = .true.
   do_trans_ev = .true.

Pavel Kus's avatar
Pavel Kus committed
412
   if (do_tridiag) then
413
     call obj%timer%start("forward")
Pavel Kus's avatar
Pavel Kus committed
414 415 416
#ifdef HAVE_LIKWID
     call likwid_markerStartRegion("tridi")
#endif
417 418 419
#ifdef WITH_NVTX
     call nvtxRangePush("tridi")
#endif
Pavel Kus's avatar
Pavel Kus committed
420

421 422 423 424
     call tridiag_&
     &MATH_DATATYPE&
     &_&
     &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
425
     & (obj, na, a, matrixRows, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU_tridiag, wantDebug, nrThreads)
Pavel Kus's avatar
Pavel Kus committed
426

427 428 429
#ifdef WITH_NVTX
     call nvtxRangePop()
#endif
Pavel Kus's avatar
Pavel Kus committed
430 431 432
#ifdef HAVE_LIKWID
     call likwid_markerStopRegion("tridi")
#endif
433
     call obj%timer%stop("forward")
Pavel Kus's avatar
Pavel Kus committed
434
    endif  !do_tridiag
435 436 437

    if (do_solve) then
     call obj%timer%start("solve")
Pavel Kus's avatar
Pavel Kus committed
438 439 440
#ifdef HAVE_LIKWID
     call likwid_markerStartRegion("solve")
#endif
441 442 443
#ifdef WITH_NVTX
     call nvtxRangePush("solve")
#endif
444 445 446
     call solve_tridi_&
     &PRECISION&
     & (obj, na, nev, ev, e,  &
447
#if REALCASE == 1
Andreas Marek's avatar
Andreas Marek committed
448
        q_actual, matrixRows,          &
449 450
#endif
#if COMPLEXCASE == 1
451
        q_real, l_rows,  &
452
#endif
Andreas Marek's avatar
Andreas Marek committed
453
        nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_solve_tridi, wantDebug, success, nrThreads)
Pavel Kus's avatar
Pavel Kus committed
454

455 456 457
#ifdef WITH_NVTX
     call nvtxRangePop()
#endif
Pavel Kus's avatar
Pavel Kus committed
458 459 460
#ifdef HAVE_LIKWID
     call likwid_markerStopRegion("solve")
#endif
461 462 463
     call obj%timer%stop("solve")
     if (.not.(success)) return
   endif !do_solve
464

Andreas Marek's avatar
Andreas Marek committed
465 466 467
   if (obj%eigenvalues_only) then
     do_trans_ev = .false.
   else
Andreas Marek's avatar
Andreas Marek committed
468 469
     call obj%get("check_pd",check_pd,error)
     if (error .ne. ELPA_OK) then
Andreas Marek's avatar
Andreas Marek committed
470
       print *,"Problem setting option for check_pd. Aborting..."
Andreas Marek's avatar
Andreas Marek committed
471 472
       stop
     endif
Andreas Marek's avatar
Andreas Marek committed
473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488
     if (check_pd .eq. 1) then
       check_pd = 0
       do i = 1, na
         if (ev(i) .gt. THRESHOLD) then
           check_pd = check_pd + 1
         endif
       enddo
       if (check_pd .lt. na) then
         ! not positiv definite => eigenvectors needed
         do_trans_ev = .true.
       else
         do_trans_ev = .false.
       endif
     endif ! check_pd
   endif ! eigenvalues_only

489
   if (do_trans_ev) then
Andreas Marek's avatar
Andreas Marek committed
490
    ! q must be given thats why from here on we can use q and not q_actual
491
#if COMPLEXCASE == 1
492
     q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
493
#endif
Carolin Penke's avatar
Carolin Penke committed
494 495 496 497
     if (isSkewsymmetric) then
     ! Extra transformation step for skew-symmetric matrix. Multiplication with diagonal complex matrix D.
     ! This makes the eigenvectors complex. 
     ! For now real part of eigenvectors is generated in first half of q, imaginary part in second part.
Andreas Marek's avatar
Andreas Marek committed
498 499
       q(1:matrixRows, matrixCols+1:2*matrixCols) = 0.0
       do i = 1, matrixRows
Carolin Penke's avatar
Carolin Penke committed
500 501 502 503 504 505
!        global_index = indxl2g(i, nblk, my_prow, 0, np_rows)
         global_index = np_rows*nblk*((i-1)/nblk) + MOD(i-1,nblk) + MOD(np_rows+my_prow-0, np_rows)*nblk + 1
         if (mod(global_index-1,4) .eq. 0) then
            ! do nothing
         end if
         if (mod(global_index-1,4) .eq. 1) then
Andreas Marek's avatar
Andreas Marek committed
506 507
            q(i,matrixCols+1:2*matrixCols) = q(i,1:matrixCols)
            q(i,1:matrixCols) = 0
Carolin Penke's avatar
Carolin Penke committed
508 509
         end if
         if (mod(global_index-1,4) .eq. 2) then
Andreas Marek's avatar
Andreas Marek committed
510
            q(i,1:matrixCols) = -q(i,1:matrixCols)
Carolin Penke's avatar
Carolin Penke committed
511 512
         end if
         if (mod(global_index-1,4) .eq. 3) then
Andreas Marek's avatar
Andreas Marek committed
513 514
            q(i,matrixCols+1:2*matrixCols) = -q(i,1:matrixCols)
            q(i,1:matrixCols) = 0
Carolin Penke's avatar
Carolin Penke committed
515 516 517
         end if
       end do       
     endif
518

Andreas Marek's avatar
Andreas Marek committed
519
     call obj%timer%start("back")
Pavel Kus's avatar
Pavel Kus committed
520 521 522
#ifdef HAVE_LIKWID
     call likwid_markerStartRegion("trans_ev")
#endif
523 524 525
#ifdef WITH_NVTX
     call nvtxRangePush("trans_ev")
#endif
Carolin Penke's avatar
Carolin Penke committed
526
     ! In the skew-symmetric case this transforms the real part
Andreas Marek's avatar
Andreas Marek committed
527
     call trans_ev_&
528
     &MATH_DATATYPE&
Andreas Marek's avatar
Andreas Marek committed
529
     &_&
530
     &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
531
     & (obj, na, nev, a, matrixRows, tau, q, matrixRows, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev)
Carolin Penke's avatar
Carolin Penke committed
532 533 534 535
     if (isSkewsymmetric) then
       ! Transform imaginary part
       ! Transformation of real and imaginary part could also be one call of trans_ev_tridi acting on the n x 2n matrix.
       call trans_ev_&
Andreas Marek's avatar
Andreas Marek committed
536 537 538
             &MATH_DATATYPE&
             &_&
             &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
539
             & (obj, na, nev, a, matrixRows, tau, q(1:matrixRows, matrixCols+1:2*matrixCols), matrixRows, nblk, matrixCols, &
Andreas Marek's avatar
Andreas Marek committed
540
                mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev)
Carolin Penke's avatar
Carolin Penke committed
541
       endif
Pavel Kus's avatar
Pavel Kus committed
542

543 544 545
#ifdef WITH_NVTX
     call nvtxRangePop()
#endif
Pavel Kus's avatar
Pavel Kus committed
546 547 548
#ifdef HAVE_LIKWID
     call likwid_markerStopRegion("trans_ev")
#endif
Andreas Marek's avatar
Andreas Marek committed
549
     call obj%timer%stop("back")
550
   endif ! do_trans_ev
Andreas Marek's avatar
Andreas Marek committed
551 552 553

#if COMPLEXCASE == 1
    deallocate(q_real, stat=istat, errmsg=errorMessage)
554
    check_deallocate("elpa1_template: q_real", istat, errorMessage)
555
#endif
Andreas Marek's avatar
Andreas Marek committed
556

557
   deallocate(e, tau, stat=istat, errmsg=errorMessage)
558
   check_deallocate("elpa1_template: e, tau", istat, errorMessage)
559

560
   if (obj%eigenvalues_only) then
Andreas Marek's avatar
Andreas Marek committed
561
     deallocate(q_dummy, stat=istat, errmsg=errorMessage)
562
     check_deallocate("elpa1_template: q_dummy", istat, errorMessage)
Andreas Marek's avatar
Andreas Marek committed
563 564
   endif

565 566 567
#ifdef WITH_NVTX
   call nvtxRangePop()
#endif
568 569 570 571 572 573 574
   ! restore original OpenMP settings
#ifdef WITH_OPENMP
   ! store the number of OpenMP threads used in the calling function
   ! restore this at the end of ELPA 2
   call omp_set_num_threads(omp_threads_caller)
#endif

Andreas Marek's avatar
Andreas Marek committed
575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603
#ifdef REDISTRIBUTE_MATRIX
   ! redistribute back if necessary
   if (doRedistributeMatrix) then

     !if (layoutInternal /= layoutExternal) then
     !  ! maybe this can be skiped I now the process grid
     !  ! and np_rows and np_cols

     !  call obj%get("mpi_comm_rows",mpi_comm_rows,error)
     !  call mpi_comm_size(int(mpi_comm_rows,kind=MPI_KIND), np_rowsMPI, mpierr)
     !  call obj%get("mpi_comm_cols",mpi_comm_cols,error)
     !  call mpi_comm_size(int(mpi_comm_cols,kind=MPI_KIND), np_colsMPI, mpierr)

     !  np_rows = int(np_rowsMPI,kind=c_int)
     !  np_cols = int(np_colsMPI,kind=c_int)

     !  ! we get new blacs context and the local process grid coordinates
     !  call BLACS_Gridinit(external_blacs_ctxt, layoutInternal, int(np_rows,kind=BLAS_KIND), int(np_cols,kind=BLAS_KIND))
     !  call BLACS_Gridinfo(int(external_blacs_ctxt,KIND=BLAS_KIND), np_rows__, &
     !                      np_cols__, my_prow__, my_pcol__)

     !endif

     !call scal_PRECISION_GEMR2D &
     !(int(na,kind=BLAS_KIND), int(na,kind=BLAS_KIND), aIntern, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc_, aExtern, &
     !1_BLAS_KIND, 1_BLAS_KIND, sc_desc, external_blacs_ctxt)

     call scal_PRECISION_GEMR2D &
     (int(na,kind=BLAS_KIND), int(na,kind=BLAS_KIND), qIntern, 1_BLAS_KIND, 1_BLAS_KIND, sc_desc_, qExtern, &
604
     1_BLAS_KIND, 1_BLAS_KIND, sc_desc, int(external_blacs_ctxt,kind=BLAS_KIND))
Andreas Marek's avatar
Andreas Marek committed
605 606 607 608 609 610 611 612 613 614


     !clean MPI communicators and blacs grid
     !of the internal re-distributed matrix
     call mpi_comm_free(mpi_comm_rowsMPI_, mpierr)
     call mpi_comm_free(mpi_comm_colsMPI_, mpierr)
     call blacs_gridexit(blacs_ctxt_)
   endif
#endif /* REDISTRIBUTE_MATRIX */

615
   call obj%timer%stop("elpa_solve_evp_&
616 617 618
   &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
619
   &")
620 621 622
end function