elpa1_tools_template.X90 10.5 KB
Newer Older
Pavel Kus's avatar
Pavel Kus committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#if 0
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
#endif
54
55
56

#if REALCASE == 1

57
    subroutine v_add_s_PRECISION(v,n,s)
Pavel Kus's avatar
Pavel Kus committed
58
59
60
61
62
63
      use precision
      implicit none
      integer(kind=ik) :: n
      real(kind=REAL_DATATYPE)    :: v(n),s

      v(:) = v(:) + s
64
    end subroutine v_add_s_PRECISION
Pavel Kus's avatar
Pavel Kus committed
65

66
    subroutine distribute_global_column_PRECISION(g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
Pavel Kus's avatar
Pavel Kus committed
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
      use precision
      implicit none

      real(kind=REAL_DATATYPE)     :: g_col(nlen), l_col(*) ! chnage this to proper 2d 1d matching ! remove assumed size
      integer(kind=ik)  :: noff, nlen, my_prow, np_rows, nblk

      integer(kind=ik)  :: nbs, nbe, jb, g_off, l_off, js, je

      nbs = noff/(nblk*np_rows)
      nbe = (noff+nlen-1)/(nblk*np_rows)

      do jb = nbs, nbe

        g_off = jb*nblk*np_rows + nblk*my_prow
        l_off = jb*nblk

        js = MAX(noff+1-g_off,1)
        je = MIN(noff+nlen-g_off,nblk)

        if (je<js) cycle

        l_col(l_off+js:l_off+je) = g_col(g_off+js-noff:g_off+je-noff)

      enddo
91
    end subroutine distribute_global_column_PRECISION
Pavel Kus's avatar
Pavel Kus committed
92

93
    subroutine solve_secular_equation_PRECISION(n, i, d, z, delta, rho, dlam)
Pavel Kus's avatar
Pavel Kus committed
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
    !-------------------------------------------------------------------------------
    ! This routine solves the secular equation of a symmetric rank 1 modified
    ! diagonal matrix:
    !
    !    1. + rho*SUM(z(:)**2/(d(:)-x)) = 0
    !
    ! It does the same as the LAPACK routine DLAED4 but it uses a bisection technique
    ! which is more robust (it always yields a solution) but also slower
    ! than the algorithm used in DLAED4.
    !
    ! The same restictions than in DLAED4 hold, namely:
    !
    !   rho > 0   and   d(i+1) > d(i)
    !
    ! but this routine will not terminate with error if these are not satisfied
    ! (it will normally converge to a pole in this case).
    !
    ! The output in DELTA(j) is always (D(j) - lambda_I), even for the cases
    ! N=1 and N=2 which is not compatible with DLAED4.
    ! Thus this routine shouldn't be used for these cases as a simple replacement
    ! of DLAED4.
    !
    ! The arguments are the same as in DLAED4 (with the exception of the INFO argument):
    !
    !
    !  N      (input) INTEGER
    !         The length of all arrays.
    !
    !  I      (input) INTEGER
    !         The index of the eigenvalue to be computed.  1 <= I <= N.
    !
    !  D      (input) DOUBLE PRECISION array, dimension (N)
    !         The original eigenvalues.  It is assumed that they are in
    !         order, D(I) < D(J)  for I < J.
    !
    !  Z      (input) DOUBLE PRECISION array, dimension (N)
    !         The components of the updating vector.
    !
    !  DELTA  (output) DOUBLE PRECISION array, dimension (N)
    !         DELTA contains (D(j) - lambda_I) in its  j-th component.
    !         See remark above about DLAED4 compatibility!
    !
    !  RHO    (input) DOUBLE PRECISION
    !         The scalar in the symmetric updating formula.
    !
    !  DLAM   (output) DOUBLE PRECISION
    !         The computed lambda_I, the I-th updated eigenvalue.
    !-------------------------------------------------------------------------------

#ifdef HAVE_DETAILED_TIMINGS
      use timings
145
146
#else
      use timings_dummy
Pavel Kus's avatar
Pavel Kus committed
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#endif
      use precision
      implicit none

      integer(kind=ik)   :: n, i
      real(kind=REAL_DATATYPE)      :: d(n), z(n), delta(n), rho, dlam

      integer(kind=ik)   :: iter
      real(kind=REAL_DATATYPE)      :: a, b, x, y, dshift

      ! In order to obtain sufficient numerical accuracy we have to shift the problem
      ! either by d(i) or d(i+1), whichever is closer to the solution

      ! Upper and lower bound of the shifted solution interval are a and b

162
      call timer%start("solve_secular_equation" // PRECISION_SUFFIX)
Pavel Kus's avatar
Pavel Kus committed
163
164
165
166
167
168
169
170
171
      if (i==n) then

       ! Special case: Last eigenvalue
       ! We shift always by d(n), lower bound is d(n),
       ! upper bound is determined by a guess:

       dshift = d(n)
       delta(:) = d(:) - dshift

172
173
       a = CONST_0_0 ! delta(n)
       b = rho*SUM(z(:)**2) + CONST_1_0 ! rho*SUM(z(:)**2) is the lower bound for the guess
Pavel Kus's avatar
Pavel Kus committed
174
175
176
177
178
      else

        ! Other eigenvalues: lower bound is d(i), upper bound is d(i+1)
        ! We check the sign of the function in the midpoint of the interval
        ! in order to determine if eigenvalue is more close to d(i) or d(i+1)
179
180
        x = CONST_0_5*(d(i)+d(i+1))
        y = CONST_1_0 + rho*SUM(z(:)**2/(d(:)-x))
Pavel Kus's avatar
Pavel Kus committed
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
        if (y>0) then
          ! solution is next to d(i)
          dshift = d(i)
        else
          ! solution is next to d(i+1)
          dshift = d(i+1)
        endif

        delta(:) = d(:) - dshift
        a = delta(i)
        b = delta(i+1)

      endif

      ! Bisection:

      do iter=1,200

        ! Interval subdivision
200
        x = CONST_0_5*(a+b)
Pavel Kus's avatar
Pavel Kus committed
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
        if (x==a .or. x==b) exit   ! No further interval subdivisions possible
#ifdef DOUBLE_PRECISION_REAL
        if (abs(x) < 1.e-200_rk8) exit ! x next to pole
#else
        if (abs(x) < 1.e-20_rk4) exit ! x next to pole
#endif
        ! evaluate value at x

        y = 1. + rho*SUM(z(:)**2/(delta(:)-x))

        if (y==0) then
          ! found exact solution
          exit
        elseif (y>0) then
          b = x
        else
          a = x
        endif

      enddo

      ! Solution:

      dlam = x + dshift
      delta(:) = delta(:) - x
226
      call  timer%stop("solve_secular_equation" // PRECISION_SUFFIX)
Pavel Kus's avatar
Pavel Kus committed
227

228
    end subroutine solve_secular_equation_PRECISION
Pavel Kus's avatar
Pavel Kus committed
229
    !-------------------------------------------------------------------------------
230
#endif
Pavel Kus's avatar
Pavel Kus committed
231

232
233
234
235
236
237
238
239
#if REALCASE == 1
    subroutine hh_transform_real_&
#endif
#if COMPLEXCASE == 1
    subroutine hh_transform_complex_&
#endif
&PRECISION &
                   (alpha, xnorm_sq, xf, tau)
240
#if REALCASE  == 1
Pavel Kus's avatar
Pavel Kus committed
241
      ! Similar to LAPACK routine DLARFP, but uses ||x||**2 instead of x(:)
242
243
244
245
#endif
#if COMPLEXCASE == 1
      ! Similar to LAPACK routine ZLARFP, but uses ||x||**2 instead of x(:)
#endif
Pavel Kus's avatar
Pavel Kus committed
246
247
248
249
      ! and returns the factor xf by which x has to be scaled.
      ! It also hasn't the special handling for numbers < 1.d-300 or > 1.d150
      ! since this would be expensive for the parallel implementation.
      use precision
250
251
252
253
254
#ifdef HAVE_DETAILED_TIMINGS
      use timings
#else
      use timings_dummy
#endif
Pavel Kus's avatar
Pavel Kus committed
255
      implicit none
256
257
258
259
260
261
262
263
264
265
266
267
268
269
#if REALCASE == 1
      real(kind=REAL_DATATYPE), intent(inout)       :: alpha
#endif
#if COMPLEXCASE == 1
      complex(kind=COMPLEX_DATATYPE), intent(inout) :: alpha
#endif
      real(kind=REAL_DATATYPE), intent(in)          :: xnorm_sq
#if REALCASE == 1
      real(kind=REAL_DATATYPE), intent(out)         :: xf, tau
#endif
#if COMPLEXCASE == 1
      complex(kind=COMPLEX_DATATYPE), intent(out)   :: xf, tau
      real(kind=REAL_DATATYPE)                      :: ALPHR, ALPHI
#endif
Pavel Kus's avatar
Pavel Kus committed
270

271
      real(kind=REAL_DATATYPE)                      :: BETA
Pavel Kus's avatar
Pavel Kus committed
272

273
274
275
     call timer%start("hh_tranform_&
&MATH_DATATYPE&
&" // PRECISION_SUFFIX )
276
277
278
279
280
281
282

#if COMPLEXCASE == 1
      ALPHR = real( ALPHA, kind=REAL_DATATYPE )
      ALPHI = PRECISION_IMAG( ALPHA )
#endif

#if REALCASE == 1
Pavel Kus's avatar
Pavel Kus committed
283
      if ( XNORM_SQ==0. ) then
284
285
286
287
#endif
#if COMPLEXCASE == 1
      if ( XNORM_SQ==0. .AND. ALPHI==0. ) then
#endif
Pavel Kus's avatar
Pavel Kus committed
288

289
#if REALCASE == 1
Pavel Kus's avatar
Pavel Kus committed
290
        if ( ALPHA>=0. ) then
291
292
293
294
#endif
#if COMPLEXCASE == 1
        if ( ALPHR>=0. ) then
#endif
Pavel Kus's avatar
Pavel Kus committed
295
296
297
298
299
300
301
302
303
          TAU = 0.
        else
          TAU = 2.
          ALPHA = -ALPHA
        endif
        XF = 0.

      else

304
#if REALCASE == 1
Pavel Kus's avatar
Pavel Kus committed
305
        BETA = SIGN( SQRT( ALPHA**2 + XNORM_SQ ), ALPHA )
306
307
308
309
#endif
#if COMPLEXCASE == 1
        BETA = SIGN( SQRT( ALPHR**2 + ALPHI**2 + XNORM_SQ ), ALPHR )
#endif
Pavel Kus's avatar
Pavel Kus committed
310
311
312
        ALPHA = ALPHA + BETA
        IF ( BETA<0 ) THEN
          BETA = -BETA
313
          TAU  = -ALPHA / BETA
Pavel Kus's avatar
Pavel Kus committed
314
        ELSE
315
#if REALCASE == 1
Pavel Kus's avatar
Pavel Kus committed
316
          ALPHA = XNORM_SQ / ALPHA
317
318
319
320
321
322
323
#endif
#if COMPLEXCASE == 1
          ALPHR = ALPHI * (ALPHI/real( ALPHA , kind=KIND_PRECISION))
          ALPHR = ALPHR + XNORM_SQ/real( ALPHA, kind=KIND_PRECISION )
#endif

#if REALCASE == 1
Pavel Kus's avatar
Pavel Kus committed
324
325
          TAU = ALPHA / BETA
          ALPHA = -ALPHA
326
327
328
329
330
#endif
#if COMPLEXCASE == 1
          TAU = PRECISION_CMPLX( ALPHR/BETA, -ALPHI/BETA )
          ALPHA = PRECISION_CMPLX( -ALPHR, ALPHI )
#endif
Pavel Kus's avatar
Pavel Kus committed
331
       END IF
332
       XF = 1.0/ALPHA
Pavel Kus's avatar
Pavel Kus committed
333
334
       ALPHA = BETA
     endif
335

336
337
338
      call timer%stop("hh_transform_&
&MATH_DATATYPE&
&" // PRECISION_SUFFIX )
339
340

#if REALCASE == 1
341
    end subroutine hh_transform_real_&
342
343
#endif
#if COMPLEXCASE == 1
344
    end subroutine hh_transform_complex_&
345
#endif
346
&PRECISION