elpa1_template.F90 12.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
#if 0
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
#endif

55
#include "../general/sanity.F90"
56 57 58

function elpa_solve_evp_&
         &MATH_DATATYPE&
Pavel Kus's avatar
Pavel Kus committed
59 60 61
   &_1stage_&
   &PRECISION&
   &_impl (obj, a, ev, q) result(success)
62 63 64 65
   use precision
   use cuda_functions
   use mod_check_for_gpu
   use iso_c_binding
66
   use elpa_abstract_impl
67 68 69
   use elpa_mpi
   use elpa1_compute
   implicit none
Pavel Kus's avatar
Pavel Kus committed
70
#include "../general/precision_kinds.F90"
71
   class(elpa_abstract_impl_t), intent(inout) :: obj
72
   real(kind=REAL_DATATYPE), intent(out)           :: ev(obj%na)
Pavel Kus's avatar
Pavel Kus committed
73

74
#ifdef USE_ASSUMED_SIZE
Pavel Kus's avatar
Pavel Kus committed
75 76
   MATH_DATATYPE(kind=rck), intent(inout)       :: a(obj%local_nrows,*)
   MATH_DATATYPE(kind=rck), optional,target,intent(out)  :: q(obj%local_nrows,*)
77
#else
Pavel Kus's avatar
Pavel Kus committed
78 79
   MATH_DATATYPE(kind=rck), intent(inout)       :: a(obj%local_nrows,obj%local_ncols)
   MATH_DATATYPE(kind=rck), optional, target, intent(out)  :: q(obj%local_nrows,obj%local_ncols)
80
#endif
Pavel Kus's avatar
Pavel Kus committed
81 82

#if REALCASE == 1
83
   real(kind=C_DATATYPE_KIND), allocatable         :: tau(:)
Andreas Marek's avatar
Andreas Marek committed
84 85
   real(kind=C_DATATYPE_KIND), allocatable, target         :: q_dummy(:,:)
   real(kind=C_DATATYPE_KIND), pointer             :: q_actual(:,:)
86 87 88 89 90
#endif /* REALCASE */

#if COMPLEXCASE == 1
   real(kind=REAL_DATATYPE), allocatable           :: q_real(:,:)
   complex(kind=C_DATATYPE_KIND), allocatable      :: tau(:)
Andreas Marek's avatar
Andreas Marek committed
91 92
   complex(kind=C_DATATYPE_KIND), allocatable,target :: q_dummy(:,:)
   complex(kind=C_DATATYPE_KIND), pointer          :: q_actual(:,:)
93 94 95
   integer(kind=c_int)                             :: l_cols, l_rows, l_cols_nev, np_rows, np_cols
#endif /* COMPLEXCASE */

96
   logical                                         :: useGPU
97 98 99 100 101 102 103 104
   logical                                         :: success

   logical                                         :: do_useGPU
   integer(kind=ik)                                :: numberOfGPUDevices

   integer(kind=c_int)                             :: my_pe, n_pes, my_prow, my_pcol, mpierr
   real(kind=C_DATATYPE_KIND), allocatable         :: e(:)
   logical                                         :: wantDebug
105
   integer(kind=c_int)                             :: istat, debug, gpu
106
   character(200)                                  :: errorMessage
107
   integer(kind=ik)                                :: na, nev, lda, ldq, nblk, matrixCols, &
Andreas Marek's avatar
Andreas Marek committed
108
                                                      mpi_comm_rows, mpi_comm_cols,        &
Andreas Marek's avatar
Andreas Marek committed
109
                                                      mpi_comm_all, check_pd, i, error
110

111 112
   logical                                         :: do_bandred, do_solve, do_trans_ev

113
   call obj%timer%start("elpa_solve_evp_&
114 115 116
   &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
117
   &")
118

Andreas Marek's avatar
Andreas Marek committed
119
   success = .true.
Andreas Marek's avatar
Andreas Marek committed
120 121

   if (present(q)) then
122
     obj%eigenvalues_only = .false.
Andreas Marek's avatar
Andreas Marek committed
123
   else
124
     obj%eigenvalues_only = .true.
Andreas Marek's avatar
Andreas Marek committed
125 126
   endif

127 128 129 130 131 132 133
   na         = obj%na
   nev        = obj%nev
   lda        = obj%local_nrows
   ldq        = obj%local_nrows
   nblk       = obj%nblk
   matrixCols = obj%local_ncols

Andreas Marek's avatar
Andreas Marek committed
134 135 136 137 138 139 140
   ! special case na = 1
   if (na .eq. 1) then
#if REALCASE == 1
     ev(1) = a(1,1)
#endif
#if COMPLEXCASE == 1
     ev(1) = real(a(1,1))
Pavel Kus's avatar
Pavel Kus committed
141
#endif
Andreas Marek's avatar
Andreas Marek committed
142
     if (.not.(obj%eigenvalues_only)) then
Pavel Kus's avatar
Pavel Kus committed
143
       q(1,1) = ONE
Andreas Marek's avatar
Andreas Marek committed
144 145 146 147 148 149 150 151 152
     endif
     call obj%timer%stop("elpa_solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &")
     return
   endif

153 154 155 156 157 158
   if (nev == 0) then
     nev = 1
     obj%eigenvalues_only = .true.
   endif


Andreas Marek's avatar
Andreas Marek committed
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
   call obj%get("mpi_comm_rows",mpi_comm_rows,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem setting option. Aborting..."
     stop
   endif
   call obj%get("mpi_comm_cols",mpi_comm_cols,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem setting option. Aborting..."
     stop
   endif
   call obj%get("mpi_comm_parent", mpi_comm_all,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem setting option. Aborting..."
     stop
   endif
174

Andreas Marek's avatar
Andreas Marek committed
175 176 177 178 179
   call obj%get("gpu",gpu,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem setting option. Aborting..."
     stop
   endif
180
   if (gpu .eq. 1) then
181 182 183 184 185
     useGPU =.true.
   else
     useGPU = .false.
   endif

186
   call obj%timer%start("mpi_communication")
187 188 189 190 191 192 193 194 195 196 197 198

   call mpi_comm_rank(mpi_comm_all,my_pe,mpierr)
   call mpi_comm_size(mpi_comm_all,n_pes,mpierr)

   call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
   call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)

#if COMPLEXCASE == 1
   call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
   call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#endif

199
   call obj%timer%stop("mpi_communication")
200

Andreas Marek's avatar
Andreas Marek committed
201 202 203 204 205
   call obj%get("debug", debug,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem setting option. Aborting..."
     stop
   endif
206
   wantDebug = debug == 1
207
   do_useGPU = .false.
208

209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226

   if (useGPU) then
     if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then

       do_useGPU = .true.
       ! set the neccessary parameters
       cudaMemcpyHostToDevice   = cuda_memcpyHostToDevice()
       cudaMemcpyDeviceToHost   = cuda_memcpyDeviceToHost()
       cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice()
       cudaHostRegisterPortable = cuda_hostRegisterPortable()
       cudaHostRegisterMapped   = cuda_hostRegisterMapped()
     else
       print *,"GPUs are requested but not detected! Aborting..."
       success = .false.
       return
     endif
   else
     ! check whether set by environment variable
Andreas Marek's avatar
Andreas Marek committed
227 228 229 230 231
     call obj%get("gpu", gpu,error)
     if (error .ne. ELPA_OK) then
       print *,"Problem setting option. Aborting..."
       stop
     endif
232
     do_useGPU = gpu == 1
233 234

     if (do_useGPU) then
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
       if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then

         ! set the neccessary parameters
         cudaMemcpyHostToDevice   = cuda_memcpyHostToDevice()
         cudaMemcpyDeviceToHost   = cuda_memcpyDeviceToHost()
         cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice()
         cudaHostRegisterPortable = cuda_hostRegisterPortable()
         cudaHostRegisterMapped   = cuda_hostRegisterMapped()
       else
         print *,"GPUs are requested but not detected! Aborting..."
         success = .false.
         return
       endif
     endif
   endif
Andreas Marek's avatar
Andreas Marek committed
250 251

   ! allocate a dummy q_intern, if eigenvectors should not be commputed and thus q is NOT present
252
   if (.not.(obj%eigenvalues_only)) then
Andreas Marek's avatar
Andreas Marek committed
253 254 255 256 257 258
     q_actual => q(1:obj%local_nrows,1:obj%local_ncols)
   else
     allocate(q_dummy(obj%local_nrows,obj%local_ncols))
     q_actual => q_dummy
   endif

259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
#if COMPLEXCASE == 1
   l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a and q
   l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of q

   l_cols_nev = local_index(nev, my_pcol, np_cols, nblk, -1) ! Local columns corresponding to nev

   allocate(q_real(l_rows,l_cols), stat=istat, errmsg=errorMessage)
   if (istat .ne. 0) then
     print *,"solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &" // ": error when allocating q_real "//errorMessage
     stop 1
   endif
#endif
   allocate(e(na), tau(na), stat=istat, errmsg=errorMessage)
   if (istat .ne. 0) then
     print *,"solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &" // ": error when allocating e, tau "//errorMessage
     stop 1
   endif


286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
   ! start the computations
   ! as default do all three steps (this might change at some point)
   do_bandred  = .true.
   do_solve    = .true.
   do_trans_ev = .true.

   if (obj%eigenvalues_only) then
     do_trans_ev = .true.
   endif

   if (do_bandred) then
     call obj%timer%start("forward")
     call tridiag_&
     &MATH_DATATYPE&
     &_&
     &PRECISION&
302
     & (obj, na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU, wantDebug)
303 304 305 306 307 308 309 310
     call obj%timer%stop("forward")
    endif  !do_bandred

    if (do_solve) then
     call obj%timer%start("solve")
     call solve_tridi_&
     &PRECISION&
     & (obj, na, nev, ev, e,  &
311
#if REALCASE == 1
312
        q_actual, ldq,          &
313 314
#endif
#if COMPLEXCASE == 1
315
        q_real, l_rows,  &
316
#endif
317
        nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU, wantDebug, success)
318 319 320
     call obj%timer%stop("solve")
     if (.not.(success)) return
   endif !do_solve
321

Andreas Marek's avatar
Andreas Marek committed
322 323 324
   if (obj%eigenvalues_only) then
     do_trans_ev = .false.
   else
Andreas Marek's avatar
Andreas Marek committed
325 326 327 328 329
     call obj%get("check_pd",check_pd,error)
     if (error .ne. ELPA_OK) then
       print *,"Problem setting option. Aborting..."
       stop
     endif
Andreas Marek's avatar
Andreas Marek committed
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
     if (check_pd .eq. 1) then
       check_pd = 0
       do i = 1, na
         if (ev(i) .gt. THRESHOLD) then
           check_pd = check_pd + 1
         endif
       enddo
       if (check_pd .lt. na) then
         ! not positiv definite => eigenvectors needed
         do_trans_ev = .true.
       else
         do_trans_ev = .false.
       endif
     endif ! check_pd
   endif ! eigenvalues_only

346
   if (do_trans_ev) then
Andreas Marek's avatar
Andreas Marek committed
347
    ! q must be given thats why from here on we can use q and not q_actual
348
#if COMPLEXCASE == 1
349
     q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
350
#endif
351

Andreas Marek's avatar
Andreas Marek committed
352 353
     call obj%timer%start("back")
     call trans_ev_&
354
     &MATH_DATATYPE&
Andreas Marek's avatar
Andreas Marek committed
355
     &_&
356
     &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
357 358
     & (obj, na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU)
     call obj%timer%stop("back")
359
   endif ! do_trans_ev
Andreas Marek's avatar
Andreas Marek committed
360 361 362 363 364 365 366 367 368 369 370

#if COMPLEXCASE == 1
    deallocate(q_real, stat=istat, errmsg=errorMessage)
    if (istat .ne. 0) then
      print *,"solve_evp_&
      &MATH_DATATYPE&
      &_1stage_&
      &PRECISION&
      &" // ": error when deallocating q_real "//errorMessage
      stop 1
    endif
371
#endif
Andreas Marek's avatar
Andreas Marek committed
372

373 374 375 376 377 378 379 380 381 382
   deallocate(e, tau, stat=istat, errmsg=errorMessage)
   if (istat .ne. 0) then
     print *,"solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &" // ": error when deallocating e, tau "//errorMessage
     stop 1
   endif

383
   if (obj%eigenvalues_only) then
Andreas Marek's avatar
Andreas Marek committed
384 385 386 387 388 389 390 391 392 393 394
     deallocate(q_dummy, stat=istat, errmsg=errorMessage)
     if (istat .ne. 0) then
       print *,"solve_evp_&
       &MATH_DATATYPE&
       &_1stage_&
       &PRECISION&
       &" // ": error when deallocating q_dummy "//errorMessage
       stop 1
     endif
   endif

395
   call obj%timer%stop("elpa_solve_evp_&
396 397 398
   &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
399
   &")
400 401 402
end function