elpa1_template.X90 10.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#if 0
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
#endif

55
#include "../general/sanity.X90"
56
57
58
59
60

function elpa_solve_evp_&
         &MATH_DATATYPE&
	 &_1stage_&
	 &PRECISION&
61
	 &_impl (obj, a, ev, q) result(success)
62
63
64
65
   use precision
   use cuda_functions
   use mod_check_for_gpu
   use iso_c_binding
66
   use elpa_abstract_impl
67
68
69
70
   use elpa_mpi
   use elpa1_compute
   implicit none

71
   class(elpa_abstract_impl_t), intent(inout) :: obj
72
   real(kind=REAL_DATATYPE), intent(out)           :: ev(obj%na)
73
74
#if REALCASE == 1
#ifdef USE_ASSUMED_SIZE
75
   real(kind=C_DATATYPE_KIND), intent(inout)       :: a(obj%local_nrows,*)
Andreas Marek's avatar
Andreas Marek committed
76
   real(kind=C_DATATYPE_KIND), optional,target,intent(out)  :: q(obj%local_nrows,*)
77
#else
Andreas Marek's avatar
Andreas Marek committed
78
   real(kind=C_DATATYPE_KIND), intent(inout)       :: a(obj%local_nrows,obj%local_ncols)
Andreas Marek's avatar
Andreas Marek committed
79
   real(kind=C_DATATYPE_KIND), optional, target, intent(out)  :: q(obj%local_nrows,obj%local_ncols)
80
81
#endif
   real(kind=C_DATATYPE_KIND), allocatable         :: tau(:)
Andreas Marek's avatar
Andreas Marek committed
82
83
   real(kind=C_DATATYPE_KIND), allocatable, target         :: q_dummy(:,:)
   real(kind=C_DATATYPE_KIND), pointer             :: q_actual(:,:)
84
85
86
87
#endif /* REALCASE */

#if COMPLEXCASE == 1
#ifdef USE_ASSUMED_SIZE
88
   complex(kind=C_DATATYPE_KIND), intent(inout)    :: a(obj%local_nrows,*)
Andreas Marek's avatar
Andreas Marek committed
89
   complex(kind=C_DATATYPE_KIND), optional, target, intent(out)      :: q(obj%local_nrows,*)
90
#else
Andreas Marek's avatar
Andreas Marek committed
91
   complex(kind=C_DATATYPE_KIND), intent(inout)    :: a(obj%local_nrows,obj%local_ncols)
Andreas Marek's avatar
Andreas Marek committed
92
   complex(kind=C_DATATYPE_KIND), optional, target, intent(out)      :: q(obj%local_nrows,obj%local_ncols)
93
94
95
96
#endif

   real(kind=REAL_DATATYPE), allocatable           :: q_real(:,:)
   complex(kind=C_DATATYPE_KIND), allocatable      :: tau(:)
Andreas Marek's avatar
Andreas Marek committed
97
98
   complex(kind=C_DATATYPE_KIND), allocatable,target :: q_dummy(:,:)
   complex(kind=C_DATATYPE_KIND), pointer          :: q_actual(:,:)
99
100
101
   integer(kind=c_int)                             :: l_cols, l_rows, l_cols_nev, np_rows, np_cols
#endif /* COMPLEXCASE */

102
   logical                                         :: useGPU
103
104
105
106
107
108
109
110
   logical                                         :: success

   logical                                         :: do_useGPU
   integer(kind=ik)                                :: numberOfGPUDevices

   integer(kind=c_int)                             :: my_pe, n_pes, my_prow, my_pcol, mpierr
   real(kind=C_DATATYPE_KIND), allocatable         :: e(:)
   logical                                         :: wantDebug
111
   integer(kind=c_int)                             :: istat, debug, gpu
112
   character(200)                                  :: errorMessage
113
114
   integer(kind=ik)                                :: na, nev, lda, ldq, nblk, matrixCols, &
                                                      mpi_comm_rows, mpi_comm_cols, mpi_comm_all
115

116
   call obj%timer%start("elpa_solve_evp_&
117
118
119
   &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
120
   &")
121

Andreas Marek's avatar
Andreas Marek committed
122
123

   if (present(q)) then
124
     obj%eigenvalues_only = .false.
Andreas Marek's avatar
Andreas Marek committed
125
   else
126
     obj%eigenvalues_only = .true.
Andreas Marek's avatar
Andreas Marek committed
127
128
   endif

129
130
131
132
133
134
135
   na         = obj%na
   nev        = obj%nev
   lda        = obj%local_nrows
   ldq        = obj%local_nrows
   nblk       = obj%nblk
   matrixCols = obj%local_ncols

136
137
138
   call obj%get("mpi_comm_rows",mpi_comm_rows)
   call obj%get("mpi_comm_cols",mpi_comm_cols)
   call obj%get("mpi_comm_parent", mpi_comm_all)
139

140
141
   call obj%get("gpu",gpu)
   if (gpu .eq. 1) then
142
143
144
145
146
     useGPU =.true.
   else
     useGPU = .false.
   endif

147
   call obj%timer%start("mpi_communication")
148
149
150
151
152
153
154
155
156
157
158
159

   call mpi_comm_rank(mpi_comm_all,my_pe,mpierr)
   call mpi_comm_size(mpi_comm_all,n_pes,mpierr)

   call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
   call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)

#if COMPLEXCASE == 1
   call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
   call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#endif

160
   call obj%timer%stop("mpi_communication")
161
162
   success = .true.

163
164
   call obj%get("debug", debug)
   wantDebug = debug == 1
165
   do_useGPU = .false.
166

167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184

   if (useGPU) then
     if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then

       do_useGPU = .true.
       ! set the neccessary parameters
       cudaMemcpyHostToDevice   = cuda_memcpyHostToDevice()
       cudaMemcpyDeviceToHost   = cuda_memcpyDeviceToHost()
       cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice()
       cudaHostRegisterPortable = cuda_hostRegisterPortable()
       cudaHostRegisterMapped   = cuda_hostRegisterMapped()
     else
       print *,"GPUs are requested but not detected! Aborting..."
       success = .false.
       return
     endif
   else
     ! check whether set by environment variable
185
186
     call obj%get("gpu", gpu)
     do_useGPU = gpu == 1
187
188

     if (do_useGPU) then
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
       if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then

         ! set the neccessary parameters
         cudaMemcpyHostToDevice   = cuda_memcpyHostToDevice()
         cudaMemcpyDeviceToHost   = cuda_memcpyDeviceToHost()
         cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice()
         cudaHostRegisterPortable = cuda_hostRegisterPortable()
         cudaHostRegisterMapped   = cuda_hostRegisterMapped()
       else
         print *,"GPUs are requested but not detected! Aborting..."
         success = .false.
         return
       endif
     endif
   endif
Andreas Marek's avatar
Andreas Marek committed
204
205

   ! allocate a dummy q_intern, if eigenvectors should not be commputed and thus q is NOT present
206
   if (.not.(obj%eigenvalues_only)) then
Andreas Marek's avatar
Andreas Marek committed
207
208
209
210
211
212
     q_actual => q(1:obj%local_nrows,1:obj%local_ncols)
   else
     allocate(q_dummy(obj%local_nrows,obj%local_ncols))
     q_actual => q_dummy
   endif

213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
#if COMPLEXCASE == 1
   l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a and q
   l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of q

   l_cols_nev = local_index(nev, my_pcol, np_cols, nblk, -1) ! Local columns corresponding to nev

   allocate(q_real(l_rows,l_cols), stat=istat, errmsg=errorMessage)
   if (istat .ne. 0) then
     print *,"solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &" // ": error when allocating q_real "//errorMessage
     stop 1
   endif
#endif
   allocate(e(na), tau(na), stat=istat, errmsg=errorMessage)
   if (istat .ne. 0) then
     print *,"solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &" // ": error when allocating e, tau "//errorMessage
     stop 1
   endif

239
   call obj%timer%start("forward")
240
241
242
243
   call tridiag_&
   &MATH_DATATYPE&
   &_&
   &PRECISION&
244
   & (obj, na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU)
245
   call obj%timer%stop("forward")
246

247
   call obj%timer%start("solve")
248
249
   call solve_tridi_&
   &PRECISION&
250
   & (obj, na, nev, ev, e,  &
251
#if REALCASE == 1
Andreas Marek's avatar
Andreas Marek committed
252
      q_actual, ldq,          &
253
254
255
256
257
#endif
#if COMPLEXCASE == 1
      q_real, l_rows,  &
#endif
      nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, wantDebug, success)
258
   call obj%timer%stop("solve")
259
260
   if (.not.(success)) return

261
   if (obj%eigenvalues_only) then
262
263
264
     return
   endif

265
   if (.not.(obj%eigenvalues_only) ) then
Andreas Marek's avatar
Andreas Marek committed
266
    ! q must be given thats why from here on we can use q and not q_actual
267
#if COMPLEXCASE == 1
268
     q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
269
#endif
270

Andreas Marek's avatar
Andreas Marek committed
271
272
     call obj%timer%start("back")
     call trans_ev_&
273
     &MATH_DATATYPE&
Andreas Marek's avatar
Andreas Marek committed
274
     &_&
275
     &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
276
277
     & (obj, na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU)
     call obj%timer%stop("back")
278
   endif ! .not.(obj%eigenvalues_only
Andreas Marek's avatar
Andreas Marek committed
279
280
281
282
283
284
285
286
287
288
289

#if COMPLEXCASE == 1
    deallocate(q_real, stat=istat, errmsg=errorMessage)
    if (istat .ne. 0) then
      print *,"solve_evp_&
      &MATH_DATATYPE&
      &_1stage_&
      &PRECISION&
      &" // ": error when deallocating q_real "//errorMessage
      stop 1
    endif
290
#endif
Andreas Marek's avatar
Andreas Marek committed
291

292
293
294
295
296
297
298
299
300
301
   deallocate(e, tau, stat=istat, errmsg=errorMessage)
   if (istat .ne. 0) then
     print *,"solve_evp_&
     &MATH_DATATYPE&
     &_1stage_&
     &PRECISION&
     &" // ": error when deallocating e, tau "//errorMessage
     stop 1
   endif

302
   if (obj%eigenvalues_only) then
Andreas Marek's avatar
Andreas Marek committed
303
304
305
306
307
308
309
310
311
312
313
314
     deallocate(q_dummy, stat=istat, errmsg=errorMessage)
     if (istat .ne. 0) then
       print *,"solve_evp_&
       &MATH_DATATYPE&
       &_1stage_&
       &PRECISION&
       &" // ": error when deallocating q_dummy "//errorMessage
       stop 1
     endif

   endif

315
   call obj%timer%stop("elpa_solve_evp_&
316
317
318
   &MATH_DATATYPE&
   &_1stage_&
   &PRECISION&
319
   &")
320
321
322
end function