elpa1_template.F90 12.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#if 0
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
#endif

55
#include "../general/sanity.F90"
56
57
58

function elpa_solve_evp_&
         &MATH_DATATYPE&
Pavel Kus's avatar
Pavel Kus committed
59
60
61
   &_1stage_&
   &PRECISION&
   &_impl (obj, a, ev, q) result(success)
62
63
64
65
   use precision
   use cuda_functions
   use mod_check_for_gpu
   use iso_c_binding
66
   use elpa_abstract_impl
67
68
   use elpa_mpi
   use elpa1_compute
69
70
71
#ifdef WITH_OPENMP
   use omp_lib
#endif
72
   implicit none
Pavel Kus's avatar
Pavel Kus committed
73
#include "../general/precision_kinds.F90"
74
   class(elpa_abstract_impl_t), intent(inout) :: obj
75
   real(kind=REAL_DATATYPE), intent(out)           :: ev(obj%na)
Pavel Kus's avatar
Pavel Kus committed
76

77
#ifdef USE_ASSUMED_SIZE
Pavel Kus's avatar
Pavel Kus committed
78
79
   MATH_DATATYPE(kind=rck), intent(inout)       :: a(obj%local_nrows,*)
   MATH_DATATYPE(kind=rck), optional,target,intent(out)  :: q(obj%local_nrows,*)
80
#else
Pavel Kus's avatar
Pavel Kus committed
81
82
   MATH_DATATYPE(kind=rck), intent(inout)       :: a(obj%local_nrows,obj%local_ncols)
   MATH_DATATYPE(kind=rck), optional, target, intent(out)  :: q(obj%local_nrows,obj%local_ncols)
83
#endif
Pavel Kus's avatar
Pavel Kus committed
84
85

#if REALCASE == 1
86
   real(kind=C_DATATYPE_KIND), allocatable         :: tau(:)
Andreas Marek's avatar
Andreas Marek committed
87
88
   real(kind=C_DATATYPE_KIND), allocatable, target         :: q_dummy(:,:)
   real(kind=C_DATATYPE_KIND), pointer             :: q_actual(:,:)
89
90
91
92
93
#endif /* REALCASE */

#if COMPLEXCASE == 1
   real(kind=REAL_DATATYPE), allocatable           :: q_real(:,:)
   complex(kind=C_DATATYPE_KIND), allocatable      :: tau(:)
Andreas Marek's avatar
Andreas Marek committed
94
95
   complex(kind=C_DATATYPE_KIND), allocatable,target :: q_dummy(:,:)
   complex(kind=C_DATATYPE_KIND), pointer          :: q_actual(:,:)
96
97
98
   integer(kind=c_int)                             :: l_cols, l_rows, l_cols_nev, np_rows, np_cols
#endif /* COMPLEXCASE */

99
   logical                                         :: useGPU
100
101
   logical                                         :: success

102
103
   logical                                         :: do_useGPU, do_useGPU_tridiag, &
                                                      do_useGPU_solve_tridi, do_useGPU_trans_ev
104
105
106
107
108
   integer(kind=ik)                                :: numberOfGPUDevices

   integer(kind=c_int)                             :: my_pe, n_pes, my_prow, my_pcol, mpierr
   real(kind=C_DATATYPE_KIND), allocatable         :: e(:)
   logical                                         :: wantDebug
109
   integer(kind=c_int)                             :: istat, debug, gpu
110
   character(200)                                  :: errorMessage
111
   integer(kind=ik)                                :: na, nev, lda, ldq, nblk, matrixCols, &
Andreas Marek's avatar
Andreas Marek committed
112
                                                      mpi_comm_rows, mpi_comm_cols,        &
Andreas Marek's avatar
Andreas Marek committed
113
                                                      mpi_comm_all, check_pd, i, error
114

Pavel Kus's avatar
Pavel Kus committed
115
   logical                                         :: do_tridiag, do_solve, do_trans_ev
116
   integer(kind=ik)                                :: nrThreads
117

118
   call obj%timer%start("elpa_solve_evp_&
119
120
121
   &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
122
   &")
123

Andreas Marek's avatar
Andreas Marek committed
124
#ifdef WITH_OPENMP
Andreas Marek's avatar
Andreas Marek committed
125
126
127
   !nrThreads = omp_get_max_threads()
   call obj%get("omp_threads",nrThreads,error)
   call omp_set_num_threads(nrThreads)
Andreas Marek's avatar
Andreas Marek committed
128
129
130
131
132
#else
   nrThreads = 1
#endif


Andreas Marek's avatar
Andreas Marek committed
133
   success = .true.
Andreas Marek's avatar
Andreas Marek committed
134
135

   if (present(q)) then
136
     obj%eigenvalues_only = .false.
Andreas Marek's avatar
Andreas Marek committed
137
   else
138
     obj%eigenvalues_only = .true.
Andreas Marek's avatar
Andreas Marek committed
139
140
   endif

141
142
143
144
145
146
147
   na         = obj%na
   nev        = obj%nev
   lda        = obj%local_nrows
   ldq        = obj%local_nrows
   nblk       = obj%nblk
   matrixCols = obj%local_ncols

Andreas Marek's avatar
Andreas Marek committed
148
149
150
151
152
153
154
   ! special case na = 1
   if (na .eq. 1) then
#if REALCASE == 1
     ev(1) = a(1,1)
#endif
#if COMPLEXCASE == 1
     ev(1) = real(a(1,1))
Pavel Kus's avatar
Pavel Kus committed
155
#endif
Andreas Marek's avatar
Andreas Marek committed
156
     if (.not.(obj%eigenvalues_only)) then
Pavel Kus's avatar
Pavel Kus committed
157
       q(1,1) = ONE
Andreas Marek's avatar
Andreas Marek committed
158
159
160
161
162
163
164
165
166
     endif
     call obj%timer%stop("elpa_solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &")
     return
   endif

167
168
169
170
171
172
   if (nev == 0) then
     nev = 1
     obj%eigenvalues_only = .true.
   endif


Andreas Marek's avatar
Andreas Marek committed
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
   call obj%get("mpi_comm_rows",mpi_comm_rows,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem setting option. Aborting..."
     stop
   endif
   call obj%get("mpi_comm_cols",mpi_comm_cols,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem setting option. Aborting..."
     stop
   endif
   call obj%get("mpi_comm_parent", mpi_comm_all,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem setting option. Aborting..."
     stop
   endif
188

Andreas Marek's avatar
Andreas Marek committed
189
190
   call obj%get("gpu",gpu,error)
   if (error .ne. ELPA_OK) then
191
     print *,"Problem getting option. Aborting..."
Andreas Marek's avatar
Andreas Marek committed
192
193
     stop
   endif
194
   if (gpu .eq. 1) then
195
196
197
198
199
     useGPU =.true.
   else
     useGPU = .false.
   endif

200
   call obj%timer%start("mpi_communication")
201
202
203
204
205
206
207
208
209
210
211
212

   call mpi_comm_rank(mpi_comm_all,my_pe,mpierr)
   call mpi_comm_size(mpi_comm_all,n_pes,mpierr)

   call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
   call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)

#if COMPLEXCASE == 1
   call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
   call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#endif

213
   call obj%timer%stop("mpi_communication")
214

Andreas Marek's avatar
Andreas Marek committed
215
216
217
218
219
   call obj%get("debug", debug,error)
   if (error .ne. ELPA_OK) then
     print *,"Problem setting option. Aborting..."
     stop
   endif
220
   wantDebug = debug == 1
221
   do_useGPU = .false.
222

223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238

   if (useGPU) then
     if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then

       do_useGPU = .true.
       ! set the neccessary parameters
       cudaMemcpyHostToDevice   = cuda_memcpyHostToDevice()
       cudaMemcpyDeviceToHost   = cuda_memcpyDeviceToHost()
       cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice()
       cudaHostRegisterPortable = cuda_hostRegisterPortable()
       cudaHostRegisterMapped   = cuda_hostRegisterMapped()
     else
       print *,"GPUs are requested but not detected! Aborting..."
       success = .false.
       return
     endif
239
   endif
Andreas Marek's avatar
Andreas Marek committed
240

241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
   do_useGPU_tridiag = do_useGPU
   do_useGPU_solve_tridi = do_useGPU
   do_useGPU_trans_ev = do_useGPU
   ! only if we want (and can) use GPU in general, look what are the
   ! requirements for individual routines. Implicitly they are all set to 1, so
   ! unles specified otherwise by the user, GPU versions of all individual
   ! routines should be used
   if(do_useGPU) then
     call obj%get("gpu_tridiag", gpu, error)
     if (error .ne. ELPA_OK) then
       print *,"Problem getting option. Aborting..."
       stop
     endif
     do_useGPU_tridiag = (gpu == 1)

     call obj%get("gpu_solve_tridi", gpu, error)
     if (error .ne. ELPA_OK) then
       print *,"Problem getting option. Aborting..."
       stop
     endif
     do_useGPU_solve_tridi = (gpu == 1)

     call obj%get("gpu_trans_ev", gpu, error)
     if (error .ne. ELPA_OK) then
       print *,"Problem getting option. Aborting..."
       stop
     endif
     do_useGPU_trans_ev = (gpu == 1)
   endif
   ! for elpa1 the easy thing is, that the individual phases of the algorithm
   ! do not share any data on the GPU. 


Andreas Marek's avatar
Andreas Marek committed
274
   ! allocate a dummy q_intern, if eigenvectors should not be commputed and thus q is NOT present
275
   if (.not.(obj%eigenvalues_only)) then
Andreas Marek's avatar
Andreas Marek committed
276
277
278
279
280
281
     q_actual => q(1:obj%local_nrows,1:obj%local_ncols)
   else
     allocate(q_dummy(obj%local_nrows,obj%local_ncols))
     q_actual => q_dummy
   endif

282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
#if COMPLEXCASE == 1
   l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a and q
   l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of q

   l_cols_nev = local_index(nev, my_pcol, np_cols, nblk, -1) ! Local columns corresponding to nev

   allocate(q_real(l_rows,l_cols), stat=istat, errmsg=errorMessage)
   if (istat .ne. 0) then
     print *,"solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &" // ": error when allocating q_real "//errorMessage
     stop 1
   endif
#endif
   allocate(e(na), tau(na), stat=istat, errmsg=errorMessage)
   if (istat .ne. 0) then
     print *,"solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &" // ": error when allocating e, tau "//errorMessage
     stop 1
   endif


309
310
   ! start the computations
   ! as default do all three steps (this might change at some point)
Pavel Kus's avatar
Pavel Kus committed
311
   do_tridiag  = .true.
312
313
314
   do_solve    = .true.
   do_trans_ev = .true.

Pavel Kus's avatar
Pavel Kus committed
315
   if (do_tridiag) then
316
317
318
319
320
     call obj%timer%start("forward")
     call tridiag_&
     &MATH_DATATYPE&
     &_&
     &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
321
     & (obj, na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU_tridiag, wantDebug, nrThreads)
322
     call obj%timer%stop("forward")
Pavel Kus's avatar
Pavel Kus committed
323
    endif  !do_tridiag
324
325
326
327
328
329

    if (do_solve) then
     call obj%timer%start("solve")
     call solve_tridi_&
     &PRECISION&
     & (obj, na, nev, ev, e,  &
330
#if REALCASE == 1
331
        q_actual, ldq,          &
332
333
#endif
#if COMPLEXCASE == 1
334
        q_real, l_rows,  &
335
#endif
Andreas Marek's avatar
Andreas Marek committed
336
        nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_solve_tridi, wantDebug, success, nrThreads)
337
338
339
     call obj%timer%stop("solve")
     if (.not.(success)) return
   endif !do_solve
340

Andreas Marek's avatar
Andreas Marek committed
341
342
343
   if (obj%eigenvalues_only) then
     do_trans_ev = .false.
   else
Andreas Marek's avatar
Andreas Marek committed
344
345
346
347
348
     call obj%get("check_pd",check_pd,error)
     if (error .ne. ELPA_OK) then
       print *,"Problem setting option. Aborting..."
       stop
     endif
Andreas Marek's avatar
Andreas Marek committed
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
     if (check_pd .eq. 1) then
       check_pd = 0
       do i = 1, na
         if (ev(i) .gt. THRESHOLD) then
           check_pd = check_pd + 1
         endif
       enddo
       if (check_pd .lt. na) then
         ! not positiv definite => eigenvectors needed
         do_trans_ev = .true.
       else
         do_trans_ev = .false.
       endif
     endif ! check_pd
   endif ! eigenvalues_only

365
   if (do_trans_ev) then
Andreas Marek's avatar
Andreas Marek committed
366
    ! q must be given thats why from here on we can use q and not q_actual
367
#if COMPLEXCASE == 1
368
     q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
369
#endif
370

Andreas Marek's avatar
Andreas Marek committed
371
372
     call obj%timer%start("back")
     call trans_ev_&
373
     &MATH_DATATYPE&
Andreas Marek's avatar
Andreas Marek committed
374
     &_&
375
     &PRECISION&
376
     & (obj, na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev)
Andreas Marek's avatar
Andreas Marek committed
377
     call obj%timer%stop("back")
378
   endif ! do_trans_ev
Andreas Marek's avatar
Andreas Marek committed
379
380
381
382
383
384
385
386
387
388
389

#if COMPLEXCASE == 1
    deallocate(q_real, stat=istat, errmsg=errorMessage)
    if (istat .ne. 0) then
      print *,"solve_evp_&
      &MATH_DATATYPE&
      &_1stage_&
      &PRECISION&
      &" // ": error when deallocating q_real "//errorMessage
      stop 1
    endif
390
#endif
Andreas Marek's avatar
Andreas Marek committed
391

392
393
394
395
396
397
398
399
400
401
   deallocate(e, tau, stat=istat, errmsg=errorMessage)
   if (istat .ne. 0) then
     print *,"solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &" // ": error when deallocating e, tau "//errorMessage
     stop 1
   endif

402
   if (obj%eigenvalues_only) then
Andreas Marek's avatar
Andreas Marek committed
403
404
405
406
407
408
409
410
411
412
413
     deallocate(q_dummy, stat=istat, errmsg=errorMessage)
     if (istat .ne. 0) then
       print *,"solve_evp_&
       &MATH_DATATYPE&
       &_1stage_&
       &PRECISION&
       &" // ": error when deallocating q_dummy "//errorMessage
       stop 1
     endif
   endif

414
   call obj%timer%stop("elpa_solve_evp_&
415
416
417
   &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
418
   &")
419
420
421
end function