elpa1_tools_template.F90 11.1 KB
Newer Older
Pavel Kus's avatar
Pavel Kus committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#if 0
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
#endif
54

55
#include "../general/sanity.F90"
56

57
58
#if REALCASE == 1

Andreas Marek's avatar
Andreas Marek committed
59
60
    subroutine v_add_s_&
    &PRECISION&
61
    &(obj, v,n,s)
Pavel Kus's avatar
Pavel Kus committed
62
      use precision
63
      use elpa_abstract_impl
Pavel Kus's avatar
Pavel Kus committed
64
      implicit none
65
      class(elpa_abstract_impl_t), intent(inout) :: obj
66
      integer(kind=ik)            :: n
Pavel Kus's avatar
Pavel Kus committed
67
68
69
      real(kind=REAL_DATATYPE)    :: v(n),s

      v(:) = v(:) + s
Andreas Marek's avatar
Andreas Marek committed
70
71
    end subroutine v_add_s_&
    &PRECISION
Pavel Kus's avatar
Pavel Kus committed
72

Andreas Marek's avatar
Andreas Marek committed
73
74
    subroutine distribute_global_column_&
    &PRECISION&
75
    &(obj, g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
Pavel Kus's avatar
Pavel Kus committed
76
      use precision
77
      use elpa_abstract_impl
Pavel Kus's avatar
Pavel Kus committed
78
79
      implicit none

80
      class(elpa_abstract_impl_t), intent(inout) :: obj
Pavel Kus's avatar
Pavel Kus committed
81
      real(kind=REAL_DATATYPE)     :: g_col(nlen), l_col(*) ! chnage this to proper 2d 1d matching ! remove assumed size
82
      integer(kind=ik)             :: noff, nlen, my_prow, np_rows, nblk
Pavel Kus's avatar
Pavel Kus committed
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101

      integer(kind=ik)  :: nbs, nbe, jb, g_off, l_off, js, je

      nbs = noff/(nblk*np_rows)
      nbe = (noff+nlen-1)/(nblk*np_rows)

      do jb = nbs, nbe

        g_off = jb*nblk*np_rows + nblk*my_prow
        l_off = jb*nblk

        js = MAX(noff+1-g_off,1)
        je = MIN(noff+nlen-g_off,nblk)

        if (je<js) cycle

        l_col(l_off+js:l_off+je) = g_col(g_off+js-noff:g_off+je-noff)

      enddo
Andreas Marek's avatar
Andreas Marek committed
102
103
    end subroutine distribute_global_column_&
    &PRECISION
Pavel Kus's avatar
Pavel Kus committed
104

Andreas Marek's avatar
Andreas Marek committed
105
106
    subroutine solve_secular_equation_&
    &PRECISION&
107
    &(obj, n, i, d, z, delta, rho, dlam)
Pavel Kus's avatar
Pavel Kus committed
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
    !-------------------------------------------------------------------------------
    ! This routine solves the secular equation of a symmetric rank 1 modified
    ! diagonal matrix:
    !
    !    1. + rho*SUM(z(:)**2/(d(:)-x)) = 0
    !
    ! It does the same as the LAPACK routine DLAED4 but it uses a bisection technique
    ! which is more robust (it always yields a solution) but also slower
    ! than the algorithm used in DLAED4.
    !
    ! The same restictions than in DLAED4 hold, namely:
    !
    !   rho > 0   and   d(i+1) > d(i)
    !
    ! but this routine will not terminate with error if these are not satisfied
    ! (it will normally converge to a pole in this case).
    !
    ! The output in DELTA(j) is always (D(j) - lambda_I), even for the cases
    ! N=1 and N=2 which is not compatible with DLAED4.
    ! Thus this routine shouldn't be used for these cases as a simple replacement
    ! of DLAED4.
    !
    ! The arguments are the same as in DLAED4 (with the exception of the INFO argument):
    !
    !
    !  N      (input) INTEGER
    !         The length of all arrays.
    !
    !  I      (input) INTEGER
    !         The index of the eigenvalue to be computed.  1 <= I <= N.
    !
    !  D      (input) DOUBLE PRECISION array, dimension (N)
    !         The original eigenvalues.  It is assumed that they are in
    !         order, D(I) < D(J)  for I < J.
    !
    !  Z      (input) DOUBLE PRECISION array, dimension (N)
144
    !         The components of the updating Vector.
Pavel Kus's avatar
Pavel Kus committed
145
146
147
148
149
150
151
152
153
154
155
156
157
    !
    !  DELTA  (output) DOUBLE PRECISION array, dimension (N)
    !         DELTA contains (D(j) - lambda_I) in its  j-th component.
    !         See remark above about DLAED4 compatibility!
    !
    !  RHO    (input) DOUBLE PRECISION
    !         The scalar in the symmetric updating formula.
    !
    !  DLAM   (output) DOUBLE PRECISION
    !         The computed lambda_I, the I-th updated eigenvalue.
    !-------------------------------------------------------------------------------

      use precision
158
      use elpa_abstract_impl
Pavel Kus's avatar
Pavel Kus committed
159
      implicit none
160
#include "../../src/general/precision_kinds.F90"
161
      class(elpa_abstract_impl_t), intent(inout) :: obj
162
163
      integer(kind=ik)           :: n, i
      real(kind=REAL_DATATYPE)   :: d(n), z(n), delta(n), rho, dlam
Pavel Kus's avatar
Pavel Kus committed
164

165
166
      integer(kind=ik)           :: iter
      real(kind=REAL_DATATYPE)   :: a, b, x, y, dshift
Pavel Kus's avatar
Pavel Kus committed
167
168
169
170
171
172

      ! In order to obtain sufficient numerical accuracy we have to shift the problem
      ! either by d(i) or d(i+1), whichever is closer to the solution

      ! Upper and lower bound of the shifted solution interval are a and b

173
      call obj%timer%start("solve_secular_equation" // PRECISION_SUFFIX)
Pavel Kus's avatar
Pavel Kus committed
174
175
176
177
178
179
180
181
182
      if (i==n) then

       ! Special case: Last eigenvalue
       ! We shift always by d(n), lower bound is d(n),
       ! upper bound is determined by a guess:

       dshift = d(n)
       delta(:) = d(:) - dshift

183
       a = 0.0_rk ! delta(n)
184
       b = rho*SUM(z(:)**2) + CONST_1_0 ! rho*SUM(z(:)**2) is the lower bound for the guess
Pavel Kus's avatar
Pavel Kus committed
185
186
187
188
189
      else

        ! Other eigenvalues: lower bound is d(i), upper bound is d(i+1)
        ! We check the sign of the function in the midpoint of the interval
        ! in order to determine if eigenvalue is more close to d(i) or d(i+1)
190
191
        x = CONST_0_5*(d(i)+d(i+1))
        y = CONST_1_0 + rho*SUM(z(:)**2/(d(:)-x))
Pavel Kus's avatar
Pavel Kus committed
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
        if (y>0) then
          ! solution is next to d(i)
          dshift = d(i)
        else
          ! solution is next to d(i+1)
          dshift = d(i+1)
        endif

        delta(:) = d(:) - dshift
        a = delta(i)
        b = delta(i+1)

      endif

      ! Bisection:

      do iter=1,200

        ! Interval subdivision
211
        x = CONST_0_5*(a+b)
Pavel Kus's avatar
Pavel Kus committed
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
        if (x==a .or. x==b) exit   ! No further interval subdivisions possible
#ifdef DOUBLE_PRECISION_REAL
        if (abs(x) < 1.e-200_rk8) exit ! x next to pole
#else
        if (abs(x) < 1.e-20_rk4) exit ! x next to pole
#endif
        ! evaluate value at x

        y = 1. + rho*SUM(z(:)**2/(delta(:)-x))

        if (y==0) then
          ! found exact solution
          exit
        elseif (y>0) then
          b = x
        else
          a = x
        endif

      enddo

      ! Solution:

      dlam = x + dshift
      delta(:) = delta(:) - x
237
      call  obj%timer%stop("solve_secular_equation" // PRECISION_SUFFIX)
Pavel Kus's avatar
Pavel Kus committed
238

Andreas Marek's avatar
Andreas Marek committed
239
240
    end subroutine solve_secular_equation_&
    &PRECISION
Pavel Kus's avatar
Pavel Kus committed
241
    !-------------------------------------------------------------------------------
242
#endif
Pavel Kus's avatar
Pavel Kus committed
243

244
245
246
247
248
249
#if REALCASE == 1
    subroutine hh_transform_real_&
#endif
#if COMPLEXCASE == 1
    subroutine hh_transform_complex_&
#endif
Andreas Marek's avatar
Cleanup    
Andreas Marek committed
250
    &PRECISION &
251
                   (obj, alpha, xnorm_sq, xf, tau, wantDebug)
252
#if REALCASE  == 1
Pavel Kus's avatar
Pavel Kus committed
253
      ! Similar to LAPACK routine DLARFP, but uses ||x||**2 instead of x(:)
254
255
256
257
#endif
#if COMPLEXCASE == 1
      ! Similar to LAPACK routine ZLARFP, but uses ||x||**2 instead of x(:)
#endif
Pavel Kus's avatar
Pavel Kus committed
258
259
260
261
      ! and returns the factor xf by which x has to be scaled.
      ! It also hasn't the special handling for numbers < 1.d-300 or > 1.d150
      ! since this would be expensive for the parallel implementation.
      use precision
262
      use elpa_abstract_impl
Pavel Kus's avatar
Pavel Kus committed
263
      implicit none
264
265
      class(elpa_abstract_impl_t), intent(inout)    :: obj
      logical, intent(in)                           :: wantDebug
266
267
268
269
270
271
272
273
274
275
276
277
278
279
#if REALCASE == 1
      real(kind=REAL_DATATYPE), intent(inout)       :: alpha
#endif
#if COMPLEXCASE == 1
      complex(kind=COMPLEX_DATATYPE), intent(inout) :: alpha
#endif
      real(kind=REAL_DATATYPE), intent(in)          :: xnorm_sq
#if REALCASE == 1
      real(kind=REAL_DATATYPE), intent(out)         :: xf, tau
#endif
#if COMPLEXCASE == 1
      complex(kind=COMPLEX_DATATYPE), intent(out)   :: xf, tau
      real(kind=REAL_DATATYPE)                      :: ALPHR, ALPHI
#endif
Pavel Kus's avatar
Pavel Kus committed
280

281
      real(kind=REAL_DATATYPE)                      :: BETA
Pavel Kus's avatar
Pavel Kus committed
282

283
     if (wantDebug) call obj%timer%start("hh_transform_&
Andreas Marek's avatar
Andreas Marek committed
284
285
                      &MATH_DATATYPE&
		      &" // &
Andreas Marek's avatar
Cleanup    
Andreas Marek committed
286
		      &PRECISION_SUFFIX )
287
288
289
290
291
292
293

#if COMPLEXCASE == 1
      ALPHR = real( ALPHA, kind=REAL_DATATYPE )
      ALPHI = PRECISION_IMAG( ALPHA )
#endif

#if REALCASE == 1
Pavel Kus's avatar
Pavel Kus committed
294
      if ( XNORM_SQ==0. ) then
295
296
297
298
#endif
#if COMPLEXCASE == 1
      if ( XNORM_SQ==0. .AND. ALPHI==0. ) then
#endif
Pavel Kus's avatar
Pavel Kus committed
299

300
#if REALCASE == 1
Pavel Kus's avatar
Pavel Kus committed
301
        if ( ALPHA>=0. ) then
302
303
304
305
#endif
#if COMPLEXCASE == 1
        if ( ALPHR>=0. ) then
#endif
Pavel Kus's avatar
Pavel Kus committed
306
307
308
309
310
311
312
313
314
          TAU = 0.
        else
          TAU = 2.
          ALPHA = -ALPHA
        endif
        XF = 0.

      else

315
#if REALCASE == 1
Pavel Kus's avatar
Pavel Kus committed
316
        BETA = SIGN( SQRT( ALPHA**2 + XNORM_SQ ), ALPHA )
317
318
319
320
#endif
#if COMPLEXCASE == 1
        BETA = SIGN( SQRT( ALPHR**2 + ALPHI**2 + XNORM_SQ ), ALPHR )
#endif
Pavel Kus's avatar
Pavel Kus committed
321
322
323
        ALPHA = ALPHA + BETA
        IF ( BETA<0 ) THEN
          BETA = -BETA
324
          TAU  = -ALPHA / BETA
Pavel Kus's avatar
Pavel Kus committed
325
        ELSE
326
#if REALCASE == 1
Pavel Kus's avatar
Pavel Kus committed
327
          ALPHA = XNORM_SQ / ALPHA
328
329
330
331
332
333
334
#endif
#if COMPLEXCASE == 1
          ALPHR = ALPHI * (ALPHI/real( ALPHA , kind=KIND_PRECISION))
          ALPHR = ALPHR + XNORM_SQ/real( ALPHA, kind=KIND_PRECISION )
#endif

#if REALCASE == 1
Pavel Kus's avatar
Pavel Kus committed
335
336
          TAU = ALPHA / BETA
          ALPHA = -ALPHA
337
338
339
340
341
#endif
#if COMPLEXCASE == 1
          TAU = PRECISION_CMPLX( ALPHR/BETA, -ALPHI/BETA )
          ALPHA = PRECISION_CMPLX( -ALPHR, ALPHI )
#endif
Pavel Kus's avatar
Pavel Kus committed
342
       END IF
343
       XF = 1.0/ALPHA
Pavel Kus's avatar
Pavel Kus committed
344
345
       ALPHA = BETA
     endif
346

347
      if (wantDebug) call obj%timer%stop("hh_transform_&
Andreas Marek's avatar
Andreas Marek committed
348
349
      &MATH_DATATYPE&
      &" // &
Andreas Marek's avatar
Cleanup    
Andreas Marek committed
350
      &PRECISION_SUFFIX )
351
352

#if REALCASE == 1
353
    end subroutine hh_transform_real_&
354
355
#endif
#if COMPLEXCASE == 1
356
    end subroutine hh_transform_complex_&
357
#endif
Andreas Marek's avatar
Cleanup    
Andreas Marek committed
358
    &PRECISION