elpa1_template.F90 13.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#if 0
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
#endif

55
#include "../general/sanity.F90"
56
57
58

function elpa_solve_evp_&
         &MATH_DATATYPE&
Pavel Kus's avatar
Pavel Kus committed
59
60
61
   &_1stage_&
   &PRECISION&
   &_impl (obj, a, ev, q) result(success)
62
63
64
65
   use precision
   use cuda_functions
   use mod_check_for_gpu
   use iso_c_binding
66
   use elpa_abstract_impl
67
68
   use elpa_mpi
   use elpa1_compute
69
70
   use elpa_omp

71
   implicit none
Pavel Kus's avatar
Pavel Kus committed
72
#include "../general/precision_kinds.F90"
73
   class(elpa_abstract_impl_t), intent(inout) :: obj
74
   real(kind=REAL_DATATYPE), intent(out)           :: ev(obj%na)
Pavel Kus's avatar
Pavel Kus committed
75

76
#ifdef USE_ASSUMED_SIZE
Pavel Kus's avatar
Pavel Kus committed
77
78
   MATH_DATATYPE(kind=rck), intent(inout)       :: a(obj%local_nrows,*)
   MATH_DATATYPE(kind=rck), optional,target,intent(out)  :: q(obj%local_nrows,*)
79
#else
Pavel Kus's avatar
Pavel Kus committed
80
81
   MATH_DATATYPE(kind=rck), intent(inout)       :: a(obj%local_nrows,obj%local_ncols)
   MATH_DATATYPE(kind=rck), optional, target, intent(out)  :: q(obj%local_nrows,obj%local_ncols)
82
#endif
Pavel Kus's avatar
Pavel Kus committed
83
84

#if REALCASE == 1
85
   real(kind=C_DATATYPE_KIND), allocatable         :: tau(:)
Andreas Marek's avatar
Andreas Marek committed
86
87
   real(kind=C_DATATYPE_KIND), allocatable, target         :: q_dummy(:,:)
   real(kind=C_DATATYPE_KIND), pointer             :: q_actual(:,:)
88
89
90
91
92
#endif /* REALCASE */

#if COMPLEXCASE == 1
   real(kind=REAL_DATATYPE), allocatable           :: q_real(:,:)
   complex(kind=C_DATATYPE_KIND), allocatable      :: tau(:)
Andreas Marek's avatar
Andreas Marek committed
93
94
   complex(kind=C_DATATYPE_KIND), allocatable,target :: q_dummy(:,:)
   complex(kind=C_DATATYPE_KIND), pointer          :: q_actual(:,:)
95
96
97
   integer(kind=c_int)                             :: l_cols, l_rows, l_cols_nev, np_rows, np_cols
#endif /* COMPLEXCASE */

98
   logical                                         :: useGPU
99
100
   logical                                         :: success

101
102
   logical                                         :: do_useGPU, do_useGPU_tridiag, &
                                                      do_useGPU_solve_tridi, do_useGPU_trans_ev
103
104
105
106
107
   integer(kind=ik)                                :: numberOfGPUDevices

   integer(kind=c_int)                             :: my_pe, n_pes, my_prow, my_pcol, mpierr
   real(kind=C_DATATYPE_KIND), allocatable         :: e(:)
   logical                                         :: wantDebug
108
   integer(kind=c_int)                             :: istat, debug, gpu
109
   character(200)                                  :: errorMessage
110
   integer(kind=ik)                                :: na, nev, lda, ldq, nblk, matrixCols, &
Andreas Marek's avatar
Andreas Marek committed
111
                                                      mpi_comm_rows, mpi_comm_cols,        &
Andreas Marek's avatar
Andreas Marek committed
112
                                                      mpi_comm_all, check_pd, i, error
113

Pavel Kus's avatar
Pavel Kus committed
114
   logical                                         :: do_tridiag, do_solve, do_trans_ev
115
   integer(kind=ik)                                :: nrThreads
116

117
   call obj%timer%start("elpa_solve_evp_&
118
119
120
   &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
121
   &")
122

Andreas Marek's avatar
Andreas Marek committed
123
#ifdef WITH_OPENMP
124
125
126
127
128
   ! store the number of OpenMP threads used in the calling function
   ! restore this at the end of ELPA 2
   omp_threads_caller = omp_get_max_threads()

   ! check the number of threads that ELPA should use internally
Andreas Marek's avatar
Andreas Marek committed
129
130
   call obj%get("omp_threads",nrThreads,error)
   call omp_set_num_threads(nrThreads)
Andreas Marek's avatar
Andreas Marek committed
131
132
133
134
135
#else
   nrThreads = 1
#endif


Andreas Marek's avatar
Andreas Marek committed
136
   success = .true.
Andreas Marek's avatar
Andreas Marek committed
137
138

   if (present(q)) then
139
     obj%eigenvalues_only = .false.
Andreas Marek's avatar
Andreas Marek committed
140
   else
141
     obj%eigenvalues_only = .true.
Andreas Marek's avatar
Andreas Marek committed
142
143
   endif

144
145
146
147
148
149
150
   na         = obj%na
   nev        = obj%nev
   lda        = obj%local_nrows
   ldq        = obj%local_nrows
   nblk       = obj%nblk
   matrixCols = obj%local_ncols

Andreas Marek's avatar
Andreas Marek committed
151
152
153
154
155
156
157
   ! special case na = 1
   if (na .eq. 1) then
#if REALCASE == 1
     ev(1) = a(1,1)
#endif
#if COMPLEXCASE == 1
     ev(1) = real(a(1,1))
Pavel Kus's avatar
Pavel Kus committed
158
#endif
Andreas Marek's avatar
Andreas Marek committed
159
     if (.not.(obj%eigenvalues_only)) then
Pavel Kus's avatar
Pavel Kus committed
160
       q(1,1) = ONE
Andreas Marek's avatar
Andreas Marek committed
161
     endif
162
163
164
165
166
167
168

     ! restore original OpenMP settings
#ifdef WITH_OPENMP
     ! store the number of OpenMP threads used in the calling function
     ! restore this at the end of ELPA 2
     call omp_set_num_threads(omp_threads_caller)
#endif
Andreas Marek's avatar
Andreas Marek committed
169
170
171
172
173
174
175
176
     call obj%timer%stop("elpa_solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &")
     return
   endif

177
178
179
180
181
182
   if (nev == 0) then
     nev = 1
     obj%eigenvalues_only = .true.
   endif


Andreas Marek's avatar
Andreas Marek committed
183
184
   call obj%get("mpi_comm_rows",mpi_comm_rows,error)
   if (error .ne. ELPA_OK) then
185
     print *,"Problem getting option. Aborting..."
Andreas Marek's avatar
Andreas Marek committed
186
187
188
189
     stop
   endif
   call obj%get("mpi_comm_cols",mpi_comm_cols,error)
   if (error .ne. ELPA_OK) then
190
     print *,"Problem getting option. Aborting..."
Andreas Marek's avatar
Andreas Marek committed
191
192
     stop
   endif
193

Andreas Marek's avatar
Andreas Marek committed
194
195
   call obj%get("gpu",gpu,error)
   if (error .ne. ELPA_OK) then
196
     print *,"Problem getting option. Aborting..."
Andreas Marek's avatar
Andreas Marek committed
197
198
     stop
   endif
199
   if (gpu .eq. 1) then
200
201
202
203
204
     useGPU =.true.
   else
     useGPU = .false.
   endif

205
   call obj%timer%start("mpi_communication")
206
207
208
209
210
211
212
213
214

   call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
   call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)

#if COMPLEXCASE == 1
   call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
   call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#endif

215
   call obj%timer%stop("mpi_communication")
216

Andreas Marek's avatar
Andreas Marek committed
217
218
219
220
221
   call obj%get("debug", debug,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem setting option. Aborting..."
     stop
   endif
222
   wantDebug = debug == 1
223
   do_useGPU = .false.
224

225
   
226
   if (useGPU) then
227
     call obj%timer%start("check_for_gpu")
228
229
230
231
232
233
     call obj%get("mpi_comm_parent", mpi_comm_all,error)
     if (error .ne. ELPA_OK) then
       print *,"Problem getting option. Aborting..."
       stop
     endif
     call mpi_comm_rank(mpi_comm_all,my_pe,mpierr)
234
235
236
237
238
239
240
241
242
243
244
245
246
     if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then
       do_useGPU = .true.
       ! set the neccessary parameters
       cudaMemcpyHostToDevice   = cuda_memcpyHostToDevice()
       cudaMemcpyDeviceToHost   = cuda_memcpyDeviceToHost()
       cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice()
       cudaHostRegisterPortable = cuda_hostRegisterPortable()
       cudaHostRegisterMapped   = cuda_hostRegisterMapped()
     else
       print *,"GPUs are requested but not detected! Aborting..."
       success = .false.
       return
     endif
247
     call obj%timer%stop("check_for_gpu")
248
   endif
Andreas Marek's avatar
Andreas Marek committed
249

250

251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
   do_useGPU_tridiag = do_useGPU
   do_useGPU_solve_tridi = do_useGPU
   do_useGPU_trans_ev = do_useGPU
   ! only if we want (and can) use GPU in general, look what are the
   ! requirements for individual routines. Implicitly they are all set to 1, so
   ! unles specified otherwise by the user, GPU versions of all individual
   ! routines should be used
   if(do_useGPU) then
     call obj%get("gpu_tridiag", gpu, error)
     if (error .ne. ELPA_OK) then
       print *,"Problem getting option. Aborting..."
       stop
     endif
     do_useGPU_tridiag = (gpu == 1)

     call obj%get("gpu_solve_tridi", gpu, error)
     if (error .ne. ELPA_OK) then
       print *,"Problem getting option. Aborting..."
       stop
     endif
     do_useGPU_solve_tridi = (gpu == 1)

     call obj%get("gpu_trans_ev", gpu, error)
     if (error .ne. ELPA_OK) then
       print *,"Problem getting option. Aborting..."
       stop
     endif
     do_useGPU_trans_ev = (gpu == 1)
   endif
   ! for elpa1 the easy thing is, that the individual phases of the algorithm
   ! do not share any data on the GPU. 


Andreas Marek's avatar
Andreas Marek committed
284
   ! allocate a dummy q_intern, if eigenvectors should not be commputed and thus q is NOT present
285
   if (.not.(obj%eigenvalues_only)) then
Andreas Marek's avatar
Andreas Marek committed
286
287
288
289
290
291
     q_actual => q(1:obj%local_nrows,1:obj%local_ncols)
   else
     allocate(q_dummy(obj%local_nrows,obj%local_ncols))
     q_actual => q_dummy
   endif

292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
#if COMPLEXCASE == 1
   l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a and q
   l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of q

   l_cols_nev = local_index(nev, my_pcol, np_cols, nblk, -1) ! Local columns corresponding to nev

   allocate(q_real(l_rows,l_cols), stat=istat, errmsg=errorMessage)
   if (istat .ne. 0) then
     print *,"solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &" // ": error when allocating q_real "//errorMessage
     stop 1
   endif
#endif
   allocate(e(na), tau(na), stat=istat, errmsg=errorMessage)
   if (istat .ne. 0) then
     print *,"solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &" // ": error when allocating e, tau "//errorMessage
     stop 1
   endif


319
320
   ! start the computations
   ! as default do all three steps (this might change at some point)
Pavel Kus's avatar
Pavel Kus committed
321
   do_tridiag  = .true.
322
323
324
   do_solve    = .true.
   do_trans_ev = .true.

Pavel Kus's avatar
Pavel Kus committed
325
   if (do_tridiag) then
326
327
328
329
330
     call obj%timer%start("forward")
     call tridiag_&
     &MATH_DATATYPE&
     &_&
     &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
331
     & (obj, na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU_tridiag, wantDebug, nrThreads)
332
     call obj%timer%stop("forward")
Pavel Kus's avatar
Pavel Kus committed
333
    endif  !do_tridiag
334
335
336
337
338
339

    if (do_solve) then
     call obj%timer%start("solve")
     call solve_tridi_&
     &PRECISION&
     & (obj, na, nev, ev, e,  &
340
#if REALCASE == 1
341
        q_actual, ldq,          &
342
343
#endif
#if COMPLEXCASE == 1
344
        q_real, l_rows,  &
345
#endif
Andreas Marek's avatar
Andreas Marek committed
346
        nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_solve_tridi, wantDebug, success, nrThreads)
347
348
349
     call obj%timer%stop("solve")
     if (.not.(success)) return
   endif !do_solve
350

Andreas Marek's avatar
Andreas Marek committed
351
352
353
   if (obj%eigenvalues_only) then
     do_trans_ev = .false.
   else
Andreas Marek's avatar
Andreas Marek committed
354
355
356
357
358
     call obj%get("check_pd",check_pd,error)
     if (error .ne. ELPA_OK) then
       print *,"Problem setting option. Aborting..."
       stop
     endif
Andreas Marek's avatar
Andreas Marek committed
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
     if (check_pd .eq. 1) then
       check_pd = 0
       do i = 1, na
         if (ev(i) .gt. THRESHOLD) then
           check_pd = check_pd + 1
         endif
       enddo
       if (check_pd .lt. na) then
         ! not positiv definite => eigenvectors needed
         do_trans_ev = .true.
       else
         do_trans_ev = .false.
       endif
     endif ! check_pd
   endif ! eigenvalues_only

375
   if (do_trans_ev) then
Andreas Marek's avatar
Andreas Marek committed
376
    ! q must be given thats why from here on we can use q and not q_actual
377
#if COMPLEXCASE == 1
378
     q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
379
#endif
380

Andreas Marek's avatar
Andreas Marek committed
381
382
     call obj%timer%start("back")
     call trans_ev_&
383
     &MATH_DATATYPE&
Andreas Marek's avatar
Andreas Marek committed
384
     &_&
385
     &PRECISION&
386
     & (obj, na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev)
Andreas Marek's avatar
Andreas Marek committed
387
     call obj%timer%stop("back")
388
   endif ! do_trans_ev
Andreas Marek's avatar
Andreas Marek committed
389
390
391
392
393
394
395
396
397
398
399

#if COMPLEXCASE == 1
    deallocate(q_real, stat=istat, errmsg=errorMessage)
    if (istat .ne. 0) then
      print *,"solve_evp_&
      &MATH_DATATYPE&
      &_1stage_&
      &PRECISION&
      &" // ": error when deallocating q_real "//errorMessage
      stop 1
    endif
400
#endif
Andreas Marek's avatar
Andreas Marek committed
401

402
403
404
405
406
407
408
409
410
411
   deallocate(e, tau, stat=istat, errmsg=errorMessage)
   if (istat .ne. 0) then
     print *,"solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &" // ": error when deallocating e, tau "//errorMessage
     stop 1
   endif

412
   if (obj%eigenvalues_only) then
Andreas Marek's avatar
Andreas Marek committed
413
414
415
416
417
418
419
420
421
422
423
     deallocate(q_dummy, stat=istat, errmsg=errorMessage)
     if (istat .ne. 0) then
       print *,"solve_evp_&
       &MATH_DATATYPE&
       &_1stage_&
       &PRECISION&
       &" // ": error when deallocating q_dummy "//errorMessage
       stop 1
     endif
   endif

424
425
426
427
428
429
430
   ! restore original OpenMP settings
#ifdef WITH_OPENMP
   ! store the number of OpenMP threads used in the calling function
   ! restore this at the end of ELPA 2
   call omp_set_num_threads(omp_threads_caller)
#endif

431
   call obj%timer%stop("elpa_solve_evp_&
432
433
434
   &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
435
   &")
436
437
438
end function