elpa1.F90 12 KB
Newer Older
1
2
!    This file is part of ELPA.
!
3
!    The ELPA library was originally created by the ELPA consortium,
4
5
!    consisting of the following organizations:
!
6
!    - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
7
8
9
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
10
11
12
13
14
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaftrn,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
15
16
!    - IBM Deutschland GmbH
!
17
!    This particular source code file contains additions, changes and
18
!    enhancements authored by Intel Corporation which is not part of
19
!    the ELPA consortium.
20
21
22
23
24
!
!    More information can be found here:
!    http://elpa.rzg.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
25
26
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
46
!
47
48
49
50
51
52
53
54
55
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".

#include "config-f90.h"

module ELPA1

56
  use elpa_utilities
57
  use elpa1_compute
58

59
60
61
#ifdef HAVE_DETAILED_TIMINGS
 use timings
#endif
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
  implicit none

  PRIVATE ! By default, all routines contained are private

  ! The following routines are public:

  public :: get_elpa_row_col_comms     ! Sets MPI row/col communicators

  public :: solve_evp_real             ! Driver routine for real eigenvalue problem
  public :: solve_evp_complex          ! Driver routine for complex eigenvalue problem

  ! Timing results, set by every call to solve_evp_xxx

  real*8, public :: time_evp_fwd    ! forward transformations (to tridiagonal form)
  real*8, public :: time_evp_solve  ! time for solving the tridiagonal system
  real*8, public :: time_evp_back   ! time for back transformations of eigenvectors

  ! Set elpa_print_times to .true. for explicit timing outputs

  logical, public :: elpa_print_times = .false.

  include 'mpif.h'

contains

!-------------------------------------------------------------------------------

89
function get_elpa_row_col_comms(mpi_comm_global, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols) result(mpierr)
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125

!-------------------------------------------------------------------------------
! get_elpa_row_col_comms:
! All ELPA routines need MPI communicators for communicating within
! rows or columns of processes, these are set here.
! mpi_comm_rows/mpi_comm_cols can be free'd with MPI_Comm_free if not used any more.
!
!  Parameters
!
!  mpi_comm_global   Global communicator for the calculations (in)
!
!  my_prow           Row coordinate of the calling process in the process grid (in)
!
!  my_pcol           Column coordinate of the calling process in the process grid (in)
!
!  mpi_comm_rows     Communicator for communicating within rows of processes (out)
!
!  mpi_comm_cols     Communicator for communicating within columns of processes (out)
!
!-------------------------------------------------------------------------------

   implicit none

   integer, intent(in)  :: mpi_comm_global, my_prow, my_pcol
   integer, intent(out) :: mpi_comm_rows, mpi_comm_cols

   integer :: mpierr

   ! mpi_comm_rows is used for communicating WITHIN rows, i.e. all processes
   ! having the same column coordinate share one mpi_comm_rows.
   ! So the "color" for splitting is my_pcol and the "key" is my row coordinate.
   ! Analogous for mpi_comm_cols

   call mpi_comm_split(mpi_comm_global,my_pcol,my_prow,mpi_comm_rows,mpierr)
   call mpi_comm_split(mpi_comm_global,my_prow,my_pcol,mpi_comm_cols,mpierr)

126
end function get_elpa_row_col_comms
127
128
129

!-------------------------------------------------------------------------------

130
function solve_evp_real(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols) result(success)
131
132
133
134
135
136
137
138
139
140
141

!-------------------------------------------------------------------------------
!  solve_evp_real: Solves the real eigenvalue problem
!
!  Parameters
!
!  na          Order of matrix a
!
!  nev         Number of eigenvalues needed.
!              The smallest nev eigenvalues/eigenvectors are calculated.
!
142
!  a(lda,matrixCols)    Distributed matrix for which eigenvalues are to be computed.
143
144
145
146
147
148
149
150
!              Distribution is like in Scalapack.
!              The full matrix must be set (not only one half like in scalapack).
!              Destroyed on exit (upper and lower half).
!
!  lda         Leading dimension of a
!
!  ev(na)      On output: eigenvalues of a, every processor gets the complete set
!
151
!  q(ldq,matrixCols)    On output: Eigenvectors of a
152
153
154
155
156
157
158
159
160
161
162
163
164
!              Distribution is like in Scalapack.
!              Must be always dimensioned to the full size (corresponding to (na,na))
!              even if only a part of the eigenvalues is needed.
!
!  ldq         Leading dimension of q
!
!  nblk        blocksize of cyclic distribution, must be the same in both directions!
!
!  mpi_comm_rows
!  mpi_comm_cols
!              MPI-Communicators for rows/columns
!
!-------------------------------------------------------------------------------
165
166
167
#ifdef HAVE_DETAILED_TIMINGS
 use timings
#endif
168
169
   implicit none

170
171
   integer, intent(in)  :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
   real*8               :: a(lda,matrixCols), ev(na), q(ldq,matrixCols)
172

173
174
175
176
   integer              :: my_prow, my_pcol, mpierr
   real*8, allocatable  :: e(:), tau(:)
   real*8               :: ttt0, ttt1
   logical              :: success
177
178
   logical, save        :: firstCall = .true.
   logical              :: wantDebug
179

180
181
182
183
#ifdef HAVE_DETAILED_TIMINGS
   call timer%start("solve_evp_real")
#endif

184
185
186
   call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
   call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)

187
188
   success = .true.

189
190
191
192
193
194
195
   wantDebug = .false.
   if (firstCall) then
     ! are debug messages desired?
     wantDebug = debug_messages_via_environment_variable()
     firstCall = .false.
   endif

196
197
198
   allocate(e(na), tau(na))

   ttt0 = MPI_Wtime()
199
   call tridiag_real(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau)
200
   ttt1 = MPI_Wtime()
201
   if(my_prow==0 .and. my_pcol==0 .and. elpa_print_times) write(error_unit,*) 'Time tridiag_real :',ttt1-ttt0
202
203
204
   time_evp_fwd = ttt1-ttt0

   ttt0 = MPI_Wtime()
205
   call solve_tridi(na, nev, ev, e, q, ldq, nblk, matrixCols, mpi_comm_rows, &
206
                    mpi_comm_cols, wantDebug, success)
207
208
   if (.not.(success)) return

209
   ttt1 = MPI_Wtime()
210
   if(my_prow==0 .and. my_pcol==0 .and. elpa_print_times) write(error_unit,*) 'Time solve_tridi  :',ttt1-ttt0
211
212
213
   time_evp_solve = ttt1-ttt0

   ttt0 = MPI_Wtime()
214
   call trans_ev_real(na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols)
215
   ttt1 = MPI_Wtime()
216
   if(my_prow==0 .and. my_pcol==0 .and. elpa_print_times) write(error_unit,*) 'Time trans_ev_real:',ttt1-ttt0
217
218
219
220
   time_evp_back = ttt1-ttt0

   deallocate(e, tau)

221
222
223
224
#ifdef HAVE_DETAILED_TIMINGS
   call timer%stop("solve_evp_real")
#endif

225
end function solve_evp_real
226
227
228
229

!-------------------------------------------------------------------------------


230
function solve_evp_complex(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols) result(success)
231
232
233
234
235
236
237
238
239
240
241

!-------------------------------------------------------------------------------
!  solve_evp_complex: Solves the complex eigenvalue problem
!
!  Parameters
!
!  na          Order of matrix a
!
!  nev         Number of eigenvalues needed
!              The smallest nev eigenvalues/eigenvectors are calculated.
!
242
!  a(lda,matrixCols)    Distributed matrix for which eigenvalues are to be computed.
243
244
245
246
247
248
249
250
!              Distribution is like in Scalapack.
!              The full matrix must be set (not only one half like in scalapack).
!              Destroyed on exit (upper and lower half).
!
!  lda         Leading dimension of a
!
!  ev(na)      On output: eigenvalues of a, every processor gets the complete set
!
251
!  q(ldq,matrixCols)    On output: Eigenvectors of a
252
253
254
255
256
257
258
259
260
261
262
263
264
!              Distribution is like in Scalapack.
!              Must be always dimensioned to the full size (corresponding to (na,na))
!              even if only a part of the eigenvalues is needed.
!
!  ldq         Leading dimension of q
!
!  nblk        blocksize of cyclic distribution, must be the same in both directions!
!
!  mpi_comm_rows
!  mpi_comm_cols
!              MPI-Communicators for rows/columns
!
!-------------------------------------------------------------------------------
265
266
267
#ifdef HAVE_DETAILED_TIMINGS
 use timings
#endif
268
269
270

   implicit none

271
272
   integer, intent(in)     :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
   complex*16              :: a(lda,matrixCols), q(ldq,matrixCols)
273
   real*8                  :: ev(na)
274

275
276
277
   integer                 :: my_prow, my_pcol, np_rows, np_cols, mpierr
   integer                 :: l_rows, l_cols, l_cols_nev
   real*8, allocatable     :: q_real(:,:), e(:)
278
279
280
   complex*16, allocatable :: tau(:)
   real*8 ttt0, ttt1

281
   logical                 :: success
282
283
284
   logical, save           :: firstCall = .true.
   logical                 :: wantDebug

285
286
287
#ifdef HAVE_DETAILED_TIMINGS
   call timer%start("solve_evp_complex")
#endif
288

289
290
291
292
293
   call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
   call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
   call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
   call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)

294
295
   success = .true.

296
297
298
299
300
301
302
303
   wantDebug = .false.
   if (firstCall) then
     ! are debug messages desired?
     wantDebug = debug_messages_via_environment_variable()
     firstCall = .false.
   endif


304
305
306
307
308
309
310
311
312
   l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a and q
   l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of q

   l_cols_nev = local_index(nev, my_pcol, np_cols, nblk, -1) ! Local columns corresponding to nev

   allocate(e(na), tau(na))
   allocate(q_real(l_rows,l_cols))

   ttt0 = MPI_Wtime()
313
   call tridiag_complex(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau)
314
   ttt1 = MPI_Wtime()
315
   if(my_prow==0 .and. my_pcol==0 .and. elpa_print_times) write(error_unit,*) 'Time tridiag_complex :',ttt1-ttt0
316
317
318
   time_evp_fwd = ttt1-ttt0

   ttt0 = MPI_Wtime()
319
   call solve_tridi(na, nev, ev, e, q_real, l_rows, nblk, matrixCols, mpi_comm_rows, &
320
                    mpi_comm_cols, wantDebug, success)
321
322
   if (.not.(success)) return

323
   ttt1 = MPI_Wtime()
324
   if(my_prow==0 .and. my_pcol==0 .and. elpa_print_times) write(error_unit,*) 'Time solve_tridi     :',ttt1-ttt0
325
326
327
328
329
   time_evp_solve = ttt1-ttt0

   ttt0 = MPI_Wtime()
   q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)

330
   call trans_ev_complex(na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols)
331
   ttt1 = MPI_Wtime()
332
   if(my_prow==0 .and. my_pcol==0 .and. elpa_print_times) write(error_unit,*) 'Time trans_ev_complex:',ttt1-ttt0
333
334
335
336
   time_evp_back = ttt1-ttt0

   deallocate(q_real)
   deallocate(e, tau)
337
338
339
#ifdef HAVE_DETAILED_TIMINGS
   call timer%stop("solve_evp_complex")
#endif
340

341
end function solve_evp_complex
342

343

344
345

end module ELPA1