elpa1.F90 12.7 KB
Newer Older
1
2
!    This file is part of ELPA.
!
3
!    The ELPA library was originally created by the ELPA consortium,
4
5
!    consisting of the following organizations:
!
6
!    - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
7
8
9
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
10
11
12
13
14
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaftrn,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
15
16
17
18
19
20
21
!    - IBM Deutschland GmbH
!
!
!    More information can be found here:
!    http://elpa.rzg.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
22
23
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
43
!
44
45
46
47
48
49
50
51
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".

#include "config-f90.h"

module ELPA1
Andreas Marek's avatar
Andreas Marek committed
52
  use elpa1_compute
53
54
  use elpa_utilities

55
56
57
#ifdef HAVE_DETAILED_TIMINGS
 use timings
#endif
58
59
60
61
62
63
64
65
66
67
68
  implicit none

  PRIVATE ! By default, all routines contained are private

  ! The following routines are public:

  public :: get_elpa_row_col_comms     ! Sets MPI row/col communicators

  public :: solve_evp_real             ! Driver routine for real eigenvalue problem
  public :: solve_evp_complex          ! Driver routine for complex eigenvalue problem

69

70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
!-------------------------------------------------------------------------------

  ! Timing results, set by every call to solve_evp_xxx

  real*8, public :: time_evp_fwd    ! forward transformations (to tridiagonal form)
  real*8, public :: time_evp_solve  ! time for solving the tridiagonal system
  real*8, public :: time_evp_back   ! time for back transformations of eigenvectors

  ! Set elpa_print_times to .true. for explicit timing outputs

  logical, public :: elpa_print_times = .false.

!-------------------------------------------------------------------------------

  include 'mpif.h'

contains

!-------------------------------------------------------------------------------

90
function get_elpa_row_col_comms(mpi_comm_global, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols) result(mpierr)
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126

!-------------------------------------------------------------------------------
! get_elpa_row_col_comms:
! All ELPA routines need MPI communicators for communicating within
! rows or columns of processes, these are set here.
! mpi_comm_rows/mpi_comm_cols can be free'd with MPI_Comm_free if not used any more.
!
!  Parameters
!
!  mpi_comm_global   Global communicator for the calculations (in)
!
!  my_prow           Row coordinate of the calling process in the process grid (in)
!
!  my_pcol           Column coordinate of the calling process in the process grid (in)
!
!  mpi_comm_rows     Communicator for communicating within rows of processes (out)
!
!  mpi_comm_cols     Communicator for communicating within columns of processes (out)
!
!-------------------------------------------------------------------------------

   implicit none

   integer, intent(in)  :: mpi_comm_global, my_prow, my_pcol
   integer, intent(out) :: mpi_comm_rows, mpi_comm_cols

   integer :: mpierr

   ! mpi_comm_rows is used for communicating WITHIN rows, i.e. all processes
   ! having the same column coordinate share one mpi_comm_rows.
   ! So the "color" for splitting is my_pcol and the "key" is my row coordinate.
   ! Analogous for mpi_comm_cols

   call mpi_comm_split(mpi_comm_global,my_pcol,my_prow,mpi_comm_rows,mpierr)
   call mpi_comm_split(mpi_comm_global,my_prow,my_pcol,mpi_comm_cols,mpierr)

127
end function get_elpa_row_col_comms
128
129
130

!-------------------------------------------------------------------------------

131
function solve_evp_real(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols) result(success)
132
133
134
135
136
137
138
139
140
141
142

!-------------------------------------------------------------------------------
!  solve_evp_real: Solves the real eigenvalue problem
!
!  Parameters
!
!  na          Order of matrix a
!
!  nev         Number of eigenvalues needed.
!              The smallest nev eigenvalues/eigenvectors are calculated.
!
143
!  a(lda,matrixCols)    Distributed matrix for which eigenvalues are to be computed.
144
145
146
147
148
149
150
151
!              Distribution is like in Scalapack.
!              The full matrix must be set (not only one half like in scalapack).
!              Destroyed on exit (upper and lower half).
!
!  lda         Leading dimension of a
!
!  ev(na)      On output: eigenvalues of a, every processor gets the complete set
!
152
!  q(ldq,matrixCols)    On output: Eigenvectors of a
153
154
155
156
157
158
159
160
161
162
163
164
165
!              Distribution is like in Scalapack.
!              Must be always dimensioned to the full size (corresponding to (na,na))
!              even if only a part of the eigenvalues is needed.
!
!  ldq         Leading dimension of q
!
!  nblk        blocksize of cyclic distribution, must be the same in both directions!
!
!  mpi_comm_rows
!  mpi_comm_cols
!              MPI-Communicators for rows/columns
!
!-------------------------------------------------------------------------------
166
167
168
#ifdef HAVE_DETAILED_TIMINGS
 use timings
#endif
169
170
   implicit none

171
172
   integer, intent(in)  :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
   real*8               :: a(lda,matrixCols), ev(na), q(ldq,matrixCols)
173

174
175
176
177
   integer              :: my_prow, my_pcol, mpierr
   real*8, allocatable  :: e(:), tau(:)
   real*8               :: ttt0, ttt1
   logical              :: success
178
179
   logical, save        :: firstCall = .true.
   logical              :: wantDebug
180

181
182
183
184
#ifdef HAVE_DETAILED_TIMINGS
   call timer%start("solve_evp_real")
#endif

185
186
187
   call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
   call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)

188
189
   success = .true.

190
191
192
193
194
195
196
   wantDebug = .false.
   if (firstCall) then
     ! are debug messages desired?
     wantDebug = debug_messages_via_environment_variable()
     firstCall = .false.
   endif

197
198
199
   allocate(e(na), tau(na))

   ttt0 = MPI_Wtime()
200
   call tridiag_real(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau)
201
   ttt1 = MPI_Wtime()
202
   if(my_prow==0 .and. my_pcol==0 .and. elpa_print_times) write(error_unit,*) 'Time tridiag_real :',ttt1-ttt0
203
204
205
   time_evp_fwd = ttt1-ttt0

   ttt0 = MPI_Wtime()
206
   call solve_tridi(na, nev, ev, e, q, ldq, nblk, matrixCols, mpi_comm_rows, &
207
                    mpi_comm_cols, wantDebug, success)
208
209
   if (.not.(success)) return

210
   ttt1 = MPI_Wtime()
211
   if(my_prow==0 .and. my_pcol==0 .and. elpa_print_times) write(error_unit,*) 'Time solve_tridi  :',ttt1-ttt0
212
213
214
   time_evp_solve = ttt1-ttt0

   ttt0 = MPI_Wtime()
215
   call trans_ev_real(na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols)
216
   ttt1 = MPI_Wtime()
217
   if(my_prow==0 .and. my_pcol==0 .and. elpa_print_times) write(error_unit,*) 'Time trans_ev_real:',ttt1-ttt0
218
219
220
221
   time_evp_back = ttt1-ttt0

   deallocate(e, tau)

222
223
224
225
#ifdef HAVE_DETAILED_TIMINGS
   call timer%stop("solve_evp_real")
#endif

226
end function solve_evp_real
227
228
229
230

!-------------------------------------------------------------------------------


231
function solve_evp_complex(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols) result(success)
232
233
234
235
236
237
238
239
240
241
242

!-------------------------------------------------------------------------------
!  solve_evp_complex: Solves the complex eigenvalue problem
!
!  Parameters
!
!  na          Order of matrix a
!
!  nev         Number of eigenvalues needed
!              The smallest nev eigenvalues/eigenvectors are calculated.
!
243
!  a(lda,matrixCols)    Distributed matrix for which eigenvalues are to be computed.
244
245
246
247
248
249
250
251
!              Distribution is like in Scalapack.
!              The full matrix must be set (not only one half like in scalapack).
!              Destroyed on exit (upper and lower half).
!
!  lda         Leading dimension of a
!
!  ev(na)      On output: eigenvalues of a, every processor gets the complete set
!
252
!  q(ldq,matrixCols)    On output: Eigenvectors of a
253
254
255
256
257
258
259
260
261
262
263
264
265
!              Distribution is like in Scalapack.
!              Must be always dimensioned to the full size (corresponding to (na,na))
!              even if only a part of the eigenvalues is needed.
!
!  ldq         Leading dimension of q
!
!  nblk        blocksize of cyclic distribution, must be the same in both directions!
!
!  mpi_comm_rows
!  mpi_comm_cols
!              MPI-Communicators for rows/columns
!
!-------------------------------------------------------------------------------
266
267
268
#ifdef HAVE_DETAILED_TIMINGS
 use timings
#endif
269
270
271

   implicit none

272
273
   integer, intent(in)     :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
   complex*16              :: a(lda,matrixCols), q(ldq,matrixCols)
274
   real*8                  :: ev(na)
275

276
277
278
   integer                 :: my_prow, my_pcol, np_rows, np_cols, mpierr
   integer                 :: l_rows, l_cols, l_cols_nev
   real*8, allocatable     :: q_real(:,:), e(:)
279
280
281
   complex*16, allocatable :: tau(:)
   real*8 ttt0, ttt1

282
   logical                 :: success
283
284
285
   logical, save           :: firstCall = .true.
   logical                 :: wantDebug

286
287
288
#ifdef HAVE_DETAILED_TIMINGS
   call timer%start("solve_evp_complex")
#endif
289

290
291
292
293
294
   call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
   call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
   call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
   call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)

295
296
   success = .true.

297
298
299
300
301
302
303
304
   wantDebug = .false.
   if (firstCall) then
     ! are debug messages desired?
     wantDebug = debug_messages_via_environment_variable()
     firstCall = .false.
   endif


305
306
307
308
309
310
311
312
313
   l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a and q
   l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of q

   l_cols_nev = local_index(nev, my_pcol, np_cols, nblk, -1) ! Local columns corresponding to nev

   allocate(e(na), tau(na))
   allocate(q_real(l_rows,l_cols))

   ttt0 = MPI_Wtime()
314
   call tridiag_complex(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau)
315
   ttt1 = MPI_Wtime()
316
   if(my_prow==0 .and. my_pcol==0 .and. elpa_print_times) write(error_unit,*) 'Time tridiag_complex :',ttt1-ttt0
317
318
319
   time_evp_fwd = ttt1-ttt0

   ttt0 = MPI_Wtime()
320
   call solve_tridi(na, nev, ev, e, q_real, l_rows, nblk, matrixCols, mpi_comm_rows, &
321
                    mpi_comm_cols, wantDebug, success)
322
323
   if (.not.(success)) return

324
   ttt1 = MPI_Wtime()
325
   if(my_prow==0 .and. my_pcol==0 .and. elpa_print_times) write(error_unit,*) 'Time solve_tridi     :',ttt1-ttt0
326
327
328
329
330
   time_evp_solve = ttt1-ttt0

   ttt0 = MPI_Wtime()
   q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)

331
   call trans_ev_complex(na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols)
332
   ttt1 = MPI_Wtime()
333
   if(my_prow==0 .and. my_pcol==0 .and. elpa_print_times) write(error_unit,*) 'Time trans_ev_complex:',ttt1-ttt0
334
335
336
337
   time_evp_back = ttt1-ttt0

   deallocate(q_real)
   deallocate(e, tau)
338
339
340
#ifdef HAVE_DETAILED_TIMINGS
   call timer%stop("solve_evp_complex")
#endif
341

342
end function solve_evp_complex
343

344

345
346
347
348
349
350
351
352
353
354
! --------------------------------------------------------------------------------------------------

end module ELPA1

! --------------------------------------------------------------------------------------------------
! Please note that the following routines are outside of the module ELPA1
! so that they can be used with real or complex data
! --------------------------------------------------------------------------------------------------


355
! to do: write interface and put them into module
356
357
358
359

!-------------------------------------------------------------------------------

!-------------------------------------------------------------------------------