elpa1_template.F90 11.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#if 0
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
#endif

55
#include "../general/sanity.F90"
56
57
58
59
60

function elpa_solve_evp_&
         &MATH_DATATYPE&
	 &_1stage_&
	 &PRECISION&
61
	 &_impl (obj, a, ev, q) result(success)
62
63
64
65
   use precision
   use cuda_functions
   use mod_check_for_gpu
   use iso_c_binding
66
   use elpa_abstract_impl
67
68
69
70
   use elpa_mpi
   use elpa1_compute
   implicit none

71
   class(elpa_abstract_impl_t), intent(inout) :: obj
72
   real(kind=REAL_DATATYPE), intent(out)           :: ev(obj%na)
73
74
#if REALCASE == 1
#ifdef USE_ASSUMED_SIZE
75
   real(kind=C_DATATYPE_KIND), intent(inout)       :: a(obj%local_nrows,*)
Andreas Marek's avatar
Andreas Marek committed
76
   real(kind=C_DATATYPE_KIND), optional,target,intent(out)  :: q(obj%local_nrows,*)
77
#else
Andreas Marek's avatar
Andreas Marek committed
78
   real(kind=C_DATATYPE_KIND), intent(inout)       :: a(obj%local_nrows,obj%local_ncols)
Andreas Marek's avatar
Andreas Marek committed
79
   real(kind=C_DATATYPE_KIND), optional, target, intent(out)  :: q(obj%local_nrows,obj%local_ncols)
80
81
#endif
   real(kind=C_DATATYPE_KIND), allocatable         :: tau(:)
Andreas Marek's avatar
Andreas Marek committed
82
83
   real(kind=C_DATATYPE_KIND), allocatable, target         :: q_dummy(:,:)
   real(kind=C_DATATYPE_KIND), pointer             :: q_actual(:,:)
84
85
86
87
#endif /* REALCASE */

#if COMPLEXCASE == 1
#ifdef USE_ASSUMED_SIZE
88
   complex(kind=C_DATATYPE_KIND), intent(inout)    :: a(obj%local_nrows,*)
Andreas Marek's avatar
Andreas Marek committed
89
   complex(kind=C_DATATYPE_KIND), optional, target, intent(out)      :: q(obj%local_nrows,*)
90
#else
Andreas Marek's avatar
Andreas Marek committed
91
   complex(kind=C_DATATYPE_KIND), intent(inout)    :: a(obj%local_nrows,obj%local_ncols)
Andreas Marek's avatar
Andreas Marek committed
92
   complex(kind=C_DATATYPE_KIND), optional, target, intent(out)      :: q(obj%local_nrows,obj%local_ncols)
93
94
95
96
#endif

   real(kind=REAL_DATATYPE), allocatable           :: q_real(:,:)
   complex(kind=C_DATATYPE_KIND), allocatable      :: tau(:)
Andreas Marek's avatar
Andreas Marek committed
97
98
   complex(kind=C_DATATYPE_KIND), allocatable,target :: q_dummy(:,:)
   complex(kind=C_DATATYPE_KIND), pointer          :: q_actual(:,:)
99
100
101
   integer(kind=c_int)                             :: l_cols, l_rows, l_cols_nev, np_rows, np_cols
#endif /* COMPLEXCASE */

102
   logical                                         :: useGPU
103
104
105
106
107
108
109
110
   logical                                         :: success

   logical                                         :: do_useGPU
   integer(kind=ik)                                :: numberOfGPUDevices

   integer(kind=c_int)                             :: my_pe, n_pes, my_prow, my_pcol, mpierr
   real(kind=C_DATATYPE_KIND), allocatable         :: e(:)
   logical                                         :: wantDebug
111
   integer(kind=c_int)                             :: istat, debug, gpu
112
   character(200)                                  :: errorMessage
113
   integer(kind=ik)                                :: na, nev, lda, ldq, nblk, matrixCols, &
Andreas Marek's avatar
Andreas Marek committed
114
115
                                                      mpi_comm_rows, mpi_comm_cols,        &
						      mpi_comm_all, check_pd, i
116

117
118
   logical                                         :: do_bandred, do_solve, do_trans_ev

119
   call obj%timer%start("elpa_solve_evp_&
120
121
122
   &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
123
   &")
124

Andreas Marek's avatar
Andreas Marek committed
125
   success = .true.
Andreas Marek's avatar
Andreas Marek committed
126
127

   if (present(q)) then
128
     obj%eigenvalues_only = .false.
Andreas Marek's avatar
Andreas Marek committed
129
   else
130
     obj%eigenvalues_only = .true.
Andreas Marek's avatar
Andreas Marek committed
131
132
   endif

133
134
135
136
137
138
139
   na         = obj%na
   nev        = obj%nev
   lda        = obj%local_nrows
   ldq        = obj%local_nrows
   nblk       = obj%nblk
   matrixCols = obj%local_ncols

Andreas Marek's avatar
Andreas Marek committed
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
   ! special case na = 1
   if (na .eq. 1) then
#if REALCASE == 1
     ev(1) = a(1,1)
     if (.not.(obj%eigenvalues_only)) then
       q(1,1) = CONST_REAL_1_0
     endif
#endif
#if COMPLEXCASE == 1
     ev(1) = real(a(1,1))
     if (.not.(obj%eigenvalues_only)) then
       q(1,1) = CONST_COMPLEX_PAIR_1_0
     endif
#endif
     call obj%timer%stop("elpa_solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &")
     return
   endif

162
163
164
165
166
167
   if (nev == 0) then
     nev = 1
     obj%eigenvalues_only = .true.
   endif


168
169
170
   call obj%get("mpi_comm_rows",mpi_comm_rows)
   call obj%get("mpi_comm_cols",mpi_comm_cols)
   call obj%get("mpi_comm_parent", mpi_comm_all)
171

172
173
   call obj%get("gpu",gpu)
   if (gpu .eq. 1) then
174
175
176
177
178
     useGPU =.true.
   else
     useGPU = .false.
   endif

179
   call obj%timer%start("mpi_communication")
180
181
182
183
184
185
186
187
188
189
190
191

   call mpi_comm_rank(mpi_comm_all,my_pe,mpierr)
   call mpi_comm_size(mpi_comm_all,n_pes,mpierr)

   call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
   call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)

#if COMPLEXCASE == 1
   call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
   call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#endif

192
   call obj%timer%stop("mpi_communication")
193

194
195
   call obj%get("debug", debug)
   wantDebug = debug == 1
196
   do_useGPU = .false.
197

198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215

   if (useGPU) then
     if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then

       do_useGPU = .true.
       ! set the neccessary parameters
       cudaMemcpyHostToDevice   = cuda_memcpyHostToDevice()
       cudaMemcpyDeviceToHost   = cuda_memcpyDeviceToHost()
       cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice()
       cudaHostRegisterPortable = cuda_hostRegisterPortable()
       cudaHostRegisterMapped   = cuda_hostRegisterMapped()
     else
       print *,"GPUs are requested but not detected! Aborting..."
       success = .false.
       return
     endif
   else
     ! check whether set by environment variable
216
217
     call obj%get("gpu", gpu)
     do_useGPU = gpu == 1
218
219

     if (do_useGPU) then
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
       if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then

         ! set the neccessary parameters
         cudaMemcpyHostToDevice   = cuda_memcpyHostToDevice()
         cudaMemcpyDeviceToHost   = cuda_memcpyDeviceToHost()
         cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice()
         cudaHostRegisterPortable = cuda_hostRegisterPortable()
         cudaHostRegisterMapped   = cuda_hostRegisterMapped()
       else
         print *,"GPUs are requested but not detected! Aborting..."
         success = .false.
         return
       endif
     endif
   endif
Andreas Marek's avatar
Andreas Marek committed
235
236

   ! allocate a dummy q_intern, if eigenvectors should not be commputed and thus q is NOT present
237
   if (.not.(obj%eigenvalues_only)) then
Andreas Marek's avatar
Andreas Marek committed
238
239
240
241
242
243
     q_actual => q(1:obj%local_nrows,1:obj%local_ncols)
   else
     allocate(q_dummy(obj%local_nrows,obj%local_ncols))
     q_actual => q_dummy
   endif

244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
#if COMPLEXCASE == 1
   l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a and q
   l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of q

   l_cols_nev = local_index(nev, my_pcol, np_cols, nblk, -1) ! Local columns corresponding to nev

   allocate(q_real(l_rows,l_cols), stat=istat, errmsg=errorMessage)
   if (istat .ne. 0) then
     print *,"solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &" // ": error when allocating q_real "//errorMessage
     stop 1
   endif
#endif
   allocate(e(na), tau(na), stat=istat, errmsg=errorMessage)
   if (istat .ne. 0) then
     print *,"solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &" // ": error when allocating e, tau "//errorMessage
     stop 1
   endif


271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
   ! start the computations
   ! as default do all three steps (this might change at some point)
   do_bandred  = .true.
   do_solve    = .true.
   do_trans_ev = .true.

   if (obj%eigenvalues_only) then
     do_trans_ev = .true.
   endif

   if (do_bandred) then
     call obj%timer%start("forward")
     call tridiag_&
     &MATH_DATATYPE&
     &_&
     &PRECISION&
287
     & (obj, na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU, wantDebug)
288
289
290
291
292
293
294
295
     call obj%timer%stop("forward")
    endif  !do_bandred

    if (do_solve) then
     call obj%timer%start("solve")
     call solve_tridi_&
     &PRECISION&
     & (obj, na, nev, ev, e,  &
296
#if REALCASE == 1
297
        q_actual, ldq,          &
298
299
#endif
#if COMPLEXCASE == 1
300
        q_real, l_rows,  &
301
#endif
302
303
304
305
        nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, wantDebug, success)
     call obj%timer%stop("solve")
     if (.not.(success)) return
   endif !do_solve
306

Andreas Marek's avatar
Andreas Marek committed
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
   if (obj%eigenvalues_only) then
     do_trans_ev = .false.
   else
     call obj%get("check_pd",check_pd)
     if (check_pd .eq. 1) then
       check_pd = 0
       do i = 1, na
         if (ev(i) .gt. THRESHOLD) then
           check_pd = check_pd + 1
         endif
       enddo
       if (check_pd .lt. na) then
         ! not positiv definite => eigenvectors needed
         do_trans_ev = .true.
       else
         do_trans_ev = .false.
       endif
     endif ! check_pd
   endif ! eigenvalues_only

327
   if (do_trans_ev) then
Andreas Marek's avatar
Andreas Marek committed
328
    ! q must be given thats why from here on we can use q and not q_actual
329
#if COMPLEXCASE == 1
330
     q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
331
#endif
332

Andreas Marek's avatar
Andreas Marek committed
333
334
     call obj%timer%start("back")
     call trans_ev_&
335
     &MATH_DATATYPE&
Andreas Marek's avatar
Andreas Marek committed
336
     &_&
337
     &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
338
339
     & (obj, na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU)
     call obj%timer%stop("back")
340
   endif ! do_trans_ev
Andreas Marek's avatar
Andreas Marek committed
341
342
343
344
345
346
347
348
349
350
351

#if COMPLEXCASE == 1
    deallocate(q_real, stat=istat, errmsg=errorMessage)
    if (istat .ne. 0) then
      print *,"solve_evp_&
      &MATH_DATATYPE&
      &_1stage_&
      &PRECISION&
      &" // ": error when deallocating q_real "//errorMessage
      stop 1
    endif
352
#endif
Andreas Marek's avatar
Andreas Marek committed
353

354
355
356
357
358
359
360
361
362
363
   deallocate(e, tau, stat=istat, errmsg=errorMessage)
   if (istat .ne. 0) then
     print *,"solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &" // ": error when deallocating e, tau "//errorMessage
     stop 1
   endif

364
   if (obj%eigenvalues_only) then
Andreas Marek's avatar
Andreas Marek committed
365
366
367
368
369
370
371
372
373
374
375
     deallocate(q_dummy, stat=istat, errmsg=errorMessage)
     if (istat .ne. 0) then
       print *,"solve_evp_&
       &MATH_DATATYPE&
       &_1stage_&
       &PRECISION&
       &" // ": error when deallocating q_dummy "//errorMessage
       stop 1
     endif
   endif

376
   call obj%timer%stop("elpa_solve_evp_&
377
378
379
   &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
380
   &")
381
382
383
end function