elpa1_template.F90 12.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
#if 0
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
#endif

55
#include "../general/sanity.F90"
56 57 58

function elpa_solve_evp_&
         &MATH_DATATYPE&
Pavel Kus's avatar
Pavel Kus committed
59 60 61
   &_1stage_&
   &PRECISION&
   &_impl (obj, a, ev, q) result(success)
62 63 64 65
   use precision
   use cuda_functions
   use mod_check_for_gpu
   use iso_c_binding
66
   use elpa_abstract_impl
67 68
   use elpa_mpi
   use elpa1_compute
69 70 71
#ifdef WITH_OPENMP
   use omp_lib
#endif
72
   implicit none
Pavel Kus's avatar
Pavel Kus committed
73
#include "../general/precision_kinds.F90"
74
   class(elpa_abstract_impl_t), intent(inout) :: obj
75
   real(kind=REAL_DATATYPE), intent(out)           :: ev(obj%na)
Pavel Kus's avatar
Pavel Kus committed
76

77
#ifdef USE_ASSUMED_SIZE
Pavel Kus's avatar
Pavel Kus committed
78 79
   MATH_DATATYPE(kind=rck), intent(inout)       :: a(obj%local_nrows,*)
   MATH_DATATYPE(kind=rck), optional,target,intent(out)  :: q(obj%local_nrows,*)
80
#else
Pavel Kus's avatar
Pavel Kus committed
81 82
   MATH_DATATYPE(kind=rck), intent(inout)       :: a(obj%local_nrows,obj%local_ncols)
   MATH_DATATYPE(kind=rck), optional, target, intent(out)  :: q(obj%local_nrows,obj%local_ncols)
83
#endif
Pavel Kus's avatar
Pavel Kus committed
84 85

#if REALCASE == 1
86
   real(kind=C_DATATYPE_KIND), allocatable         :: tau(:)
Andreas Marek's avatar
Andreas Marek committed
87 88
   real(kind=C_DATATYPE_KIND), allocatable, target         :: q_dummy(:,:)
   real(kind=C_DATATYPE_KIND), pointer             :: q_actual(:,:)
89 90 91 92 93
#endif /* REALCASE */

#if COMPLEXCASE == 1
   real(kind=REAL_DATATYPE), allocatable           :: q_real(:,:)
   complex(kind=C_DATATYPE_KIND), allocatable      :: tau(:)
Andreas Marek's avatar
Andreas Marek committed
94 95
   complex(kind=C_DATATYPE_KIND), allocatable,target :: q_dummy(:,:)
   complex(kind=C_DATATYPE_KIND), pointer          :: q_actual(:,:)
96 97 98
   integer(kind=c_int)                             :: l_cols, l_rows, l_cols_nev, np_rows, np_cols
#endif /* COMPLEXCASE */

99
   logical                                         :: useGPU
100 101
   logical                                         :: success

102 103
   logical                                         :: do_useGPU, do_useGPU_tridiag, &
                                                      do_useGPU_solve_tridi, do_useGPU_trans_ev
104 105 106 107 108
   integer(kind=ik)                                :: numberOfGPUDevices

   integer(kind=c_int)                             :: my_pe, n_pes, my_prow, my_pcol, mpierr
   real(kind=C_DATATYPE_KIND), allocatable         :: e(:)
   logical                                         :: wantDebug
109
   integer(kind=c_int)                             :: istat, debug, gpu
110
   character(200)                                  :: errorMessage
111
   integer(kind=ik)                                :: na, nev, lda, ldq, nblk, matrixCols, &
Andreas Marek's avatar
Andreas Marek committed
112
                                                      mpi_comm_rows, mpi_comm_cols,        &
Andreas Marek's avatar
Andreas Marek committed
113
                                                      mpi_comm_all, check_pd, i, error
114

Pavel Kus's avatar
Pavel Kus committed
115
   logical                                         :: do_tridiag, do_solve, do_trans_ev
116
   integer(kind=ik)                                :: nrThreads
117

118
   call obj%timer%start("elpa_solve_evp_&
119 120 121
   &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
122
   &")
123

Andreas Marek's avatar
Andreas Marek committed
124
#ifdef WITH_OPENMP
Andreas Marek's avatar
Andreas Marek committed
125 126 127
   !nrThreads = omp_get_max_threads()
   call obj%get("omp_threads",nrThreads,error)
   call omp_set_num_threads(nrThreads)
Andreas Marek's avatar
Andreas Marek committed
128 129 130 131 132
#else
   nrThreads = 1
#endif


Andreas Marek's avatar
Andreas Marek committed
133
   success = .true.
Andreas Marek's avatar
Andreas Marek committed
134 135

   if (present(q)) then
136
     obj%eigenvalues_only = .false.
Andreas Marek's avatar
Andreas Marek committed
137
   else
138
     obj%eigenvalues_only = .true.
Andreas Marek's avatar
Andreas Marek committed
139 140
   endif

141 142 143 144 145 146 147
   na         = obj%na
   nev        = obj%nev
   lda        = obj%local_nrows
   ldq        = obj%local_nrows
   nblk       = obj%nblk
   matrixCols = obj%local_ncols

Andreas Marek's avatar
Andreas Marek committed
148 149 150 151 152 153 154
   ! special case na = 1
   if (na .eq. 1) then
#if REALCASE == 1
     ev(1) = a(1,1)
#endif
#if COMPLEXCASE == 1
     ev(1) = real(a(1,1))
Pavel Kus's avatar
Pavel Kus committed
155
#endif
Andreas Marek's avatar
Andreas Marek committed
156
     if (.not.(obj%eigenvalues_only)) then
Pavel Kus's avatar
Pavel Kus committed
157
       q(1,1) = ONE
Andreas Marek's avatar
Andreas Marek committed
158 159 160 161 162 163 164 165 166
     endif
     call obj%timer%stop("elpa_solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &")
     return
   endif

167 168 169 170 171 172
   if (nev == 0) then
     nev = 1
     obj%eigenvalues_only = .true.
   endif


Andreas Marek's avatar
Andreas Marek committed
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
   call obj%get("mpi_comm_rows",mpi_comm_rows,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem setting option. Aborting..."
     stop
   endif
   call obj%get("mpi_comm_cols",mpi_comm_cols,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem setting option. Aborting..."
     stop
   endif
   call obj%get("mpi_comm_parent", mpi_comm_all,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem setting option. Aborting..."
     stop
   endif
188

Andreas Marek's avatar
Andreas Marek committed
189 190
   call obj%get("gpu",gpu,error)
   if (error .ne. ELPA_OK) then
191
     print *,"Problem getting option. Aborting..."
Andreas Marek's avatar
Andreas Marek committed
192 193
     stop
   endif
194
   if (gpu .eq. 1) then
195 196 197 198 199
     useGPU =.true.
   else
     useGPU = .false.
   endif

200
   call obj%timer%start("mpi_communication")
201 202 203 204 205 206 207 208 209 210 211 212

   call mpi_comm_rank(mpi_comm_all,my_pe,mpierr)
   call mpi_comm_size(mpi_comm_all,n_pes,mpierr)

   call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
   call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)

#if COMPLEXCASE == 1
   call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
   call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#endif

213
   call obj%timer%stop("mpi_communication")
214

Andreas Marek's avatar
Andreas Marek committed
215 216 217 218 219
   call obj%get("debug", debug,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem setting option. Aborting..."
     stop
   endif
220
   wantDebug = debug == 1
221
   do_useGPU = .false.
222

223
   
224
   if (useGPU) then
225
     call obj%timer%start("check_for_gpu")
226 227 228 229 230 231 232 233 234 235 236 237 238
     if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then
       do_useGPU = .true.
       ! set the neccessary parameters
       cudaMemcpyHostToDevice   = cuda_memcpyHostToDevice()
       cudaMemcpyDeviceToHost   = cuda_memcpyDeviceToHost()
       cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice()
       cudaHostRegisterPortable = cuda_hostRegisterPortable()
       cudaHostRegisterMapped   = cuda_hostRegisterMapped()
     else
       print *,"GPUs are requested but not detected! Aborting..."
       success = .false.
       return
     endif
239
     call obj%timer%stop("check_for_gpu")
240
   endif
Andreas Marek's avatar
Andreas Marek committed
241

242

243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
   do_useGPU_tridiag = do_useGPU
   do_useGPU_solve_tridi = do_useGPU
   do_useGPU_trans_ev = do_useGPU
   ! only if we want (and can) use GPU in general, look what are the
   ! requirements for individual routines. Implicitly they are all set to 1, so
   ! unles specified otherwise by the user, GPU versions of all individual
   ! routines should be used
   if(do_useGPU) then
     call obj%get("gpu_tridiag", gpu, error)
     if (error .ne. ELPA_OK) then
       print *,"Problem getting option. Aborting..."
       stop
     endif
     do_useGPU_tridiag = (gpu == 1)

     call obj%get("gpu_solve_tridi", gpu, error)
     if (error .ne. ELPA_OK) then
       print *,"Problem getting option. Aborting..."
       stop
     endif
     do_useGPU_solve_tridi = (gpu == 1)

     call obj%get("gpu_trans_ev", gpu, error)
     if (error .ne. ELPA_OK) then
       print *,"Problem getting option. Aborting..."
       stop
     endif
     do_useGPU_trans_ev = (gpu == 1)
   endif
   ! for elpa1 the easy thing is, that the individual phases of the algorithm
   ! do not share any data on the GPU. 


Andreas Marek's avatar
Andreas Marek committed
276
   ! allocate a dummy q_intern, if eigenvectors should not be commputed and thus q is NOT present
277
   if (.not.(obj%eigenvalues_only)) then
Andreas Marek's avatar
Andreas Marek committed
278 279 280 281 282 283
     q_actual => q(1:obj%local_nrows,1:obj%local_ncols)
   else
     allocate(q_dummy(obj%local_nrows,obj%local_ncols))
     q_actual => q_dummy
   endif

284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
#if COMPLEXCASE == 1
   l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a and q
   l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of q

   l_cols_nev = local_index(nev, my_pcol, np_cols, nblk, -1) ! Local columns corresponding to nev

   allocate(q_real(l_rows,l_cols), stat=istat, errmsg=errorMessage)
   if (istat .ne. 0) then
     print *,"solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &" // ": error when allocating q_real "//errorMessage
     stop 1
   endif
#endif
   allocate(e(na), tau(na), stat=istat, errmsg=errorMessage)
   if (istat .ne. 0) then
     print *,"solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &" // ": error when allocating e, tau "//errorMessage
     stop 1
   endif


311 312
   ! start the computations
   ! as default do all three steps (this might change at some point)
Pavel Kus's avatar
Pavel Kus committed
313
   do_tridiag  = .true.
314 315 316
   do_solve    = .true.
   do_trans_ev = .true.

Pavel Kus's avatar
Pavel Kus committed
317
   if (do_tridiag) then
318 319 320 321 322
     call obj%timer%start("forward")
     call tridiag_&
     &MATH_DATATYPE&
     &_&
     &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
323
     & (obj, na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU_tridiag, wantDebug, nrThreads)
324
     call obj%timer%stop("forward")
Pavel Kus's avatar
Pavel Kus committed
325
    endif  !do_tridiag
326 327 328 329 330 331

    if (do_solve) then
     call obj%timer%start("solve")
     call solve_tridi_&
     &PRECISION&
     & (obj, na, nev, ev, e,  &
332
#if REALCASE == 1
333
        q_actual, ldq,          &
334 335
#endif
#if COMPLEXCASE == 1
336
        q_real, l_rows,  &
337
#endif
Andreas Marek's avatar
Andreas Marek committed
338
        nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_solve_tridi, wantDebug, success, nrThreads)
339 340 341
     call obj%timer%stop("solve")
     if (.not.(success)) return
   endif !do_solve
342

Andreas Marek's avatar
Andreas Marek committed
343 344 345
   if (obj%eigenvalues_only) then
     do_trans_ev = .false.
   else
Andreas Marek's avatar
Andreas Marek committed
346 347 348 349 350
     call obj%get("check_pd",check_pd,error)
     if (error .ne. ELPA_OK) then
       print *,"Problem setting option. Aborting..."
       stop
     endif
Andreas Marek's avatar
Andreas Marek committed
351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
     if (check_pd .eq. 1) then
       check_pd = 0
       do i = 1, na
         if (ev(i) .gt. THRESHOLD) then
           check_pd = check_pd + 1
         endif
       enddo
       if (check_pd .lt. na) then
         ! not positiv definite => eigenvectors needed
         do_trans_ev = .true.
       else
         do_trans_ev = .false.
       endif
     endif ! check_pd
   endif ! eigenvalues_only

367
   if (do_trans_ev) then
Andreas Marek's avatar
Andreas Marek committed
368
    ! q must be given thats why from here on we can use q and not q_actual
369
#if COMPLEXCASE == 1
370
     q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
371
#endif
372

Andreas Marek's avatar
Andreas Marek committed
373 374
     call obj%timer%start("back")
     call trans_ev_&
375
     &MATH_DATATYPE&
Andreas Marek's avatar
Andreas Marek committed
376
     &_&
377
     &PRECISION&
378
     & (obj, na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev)
Andreas Marek's avatar
Andreas Marek committed
379
     call obj%timer%stop("back")
380
   endif ! do_trans_ev
Andreas Marek's avatar
Andreas Marek committed
381 382 383 384 385 386 387 388 389 390 391

#if COMPLEXCASE == 1
    deallocate(q_real, stat=istat, errmsg=errorMessage)
    if (istat .ne. 0) then
      print *,"solve_evp_&
      &MATH_DATATYPE&
      &_1stage_&
      &PRECISION&
      &" // ": error when deallocating q_real "//errorMessage
      stop 1
    endif
392
#endif
Andreas Marek's avatar
Andreas Marek committed
393

394 395 396 397 398 399 400 401 402 403
   deallocate(e, tau, stat=istat, errmsg=errorMessage)
   if (istat .ne. 0) then
     print *,"solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &" // ": error when deallocating e, tau "//errorMessage
     stop 1
   endif

404
   if (obj%eigenvalues_only) then
Andreas Marek's avatar
Andreas Marek committed
405 406 407 408 409 410 411 412 413 414 415
     deallocate(q_dummy, stat=istat, errmsg=errorMessage)
     if (istat .ne. 0) then
       print *,"solve_evp_&
       &MATH_DATATYPE&
       &_1stage_&
       &PRECISION&
       &" // ": error when deallocating q_dummy "//errorMessage
       stop 1
     endif
   endif

416
   call obj%timer%stop("elpa_solve_evp_&
417 418 419
   &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
420
   &")
421 422 423
end function