elpa1_tools_template.X90 10.7 KB
Newer Older
Pavel Kus's avatar
Pavel Kus committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#if 0
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
#endif
54
55
56

#if REALCASE == 1

Andreas Marek's avatar
Andreas Marek committed
57
58
59
    subroutine v_add_s_&
    &PRECISION&
    &(v,n,s)
Pavel Kus's avatar
Pavel Kus committed
60
61
62
63
64
65
      use precision
      implicit none
      integer(kind=ik) :: n
      real(kind=REAL_DATATYPE)    :: v(n),s

      v(:) = v(:) + s
Andreas Marek's avatar
Andreas Marek committed
66
67
    end subroutine v_add_s_&
    &PRECISION
Pavel Kus's avatar
Pavel Kus committed
68

Andreas Marek's avatar
Andreas Marek committed
69
70
71
    subroutine distribute_global_column_&
    &PRECISION&
    &(g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
Pavel Kus's avatar
Pavel Kus committed
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
      use precision
      implicit none

      real(kind=REAL_DATATYPE)     :: g_col(nlen), l_col(*) ! chnage this to proper 2d 1d matching ! remove assumed size
      integer(kind=ik)  :: noff, nlen, my_prow, np_rows, nblk

      integer(kind=ik)  :: nbs, nbe, jb, g_off, l_off, js, je

      nbs = noff/(nblk*np_rows)
      nbe = (noff+nlen-1)/(nblk*np_rows)

      do jb = nbs, nbe

        g_off = jb*nblk*np_rows + nblk*my_prow
        l_off = jb*nblk

        js = MAX(noff+1-g_off,1)
        je = MIN(noff+nlen-g_off,nblk)

        if (je<js) cycle

        l_col(l_off+js:l_off+je) = g_col(g_off+js-noff:g_off+je-noff)

      enddo
Andreas Marek's avatar
Andreas Marek committed
96
97
    end subroutine distribute_global_column_&
    &PRECISION
Pavel Kus's avatar
Pavel Kus committed
98

Andreas Marek's avatar
Andreas Marek committed
99
100
101
    subroutine solve_secular_equation_&
    &PRECISION&
    &(n, i, d, z, delta, rho, dlam)
Pavel Kus's avatar
Pavel Kus committed
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
    !-------------------------------------------------------------------------------
    ! This routine solves the secular equation of a symmetric rank 1 modified
    ! diagonal matrix:
    !
    !    1. + rho*SUM(z(:)**2/(d(:)-x)) = 0
    !
    ! It does the same as the LAPACK routine DLAED4 but it uses a bisection technique
    ! which is more robust (it always yields a solution) but also slower
    ! than the algorithm used in DLAED4.
    !
    ! The same restictions than in DLAED4 hold, namely:
    !
    !   rho > 0   and   d(i+1) > d(i)
    !
    ! but this routine will not terminate with error if these are not satisfied
    ! (it will normally converge to a pole in this case).
    !
    ! The output in DELTA(j) is always (D(j) - lambda_I), even for the cases
    ! N=1 and N=2 which is not compatible with DLAED4.
    ! Thus this routine shouldn't be used for these cases as a simple replacement
    ! of DLAED4.
    !
    ! The arguments are the same as in DLAED4 (with the exception of the INFO argument):
    !
    !
    !  N      (input) INTEGER
    !         The length of all arrays.
    !
    !  I      (input) INTEGER
    !         The index of the eigenvalue to be computed.  1 <= I <= N.
    !
    !  D      (input) DOUBLE PRECISION array, dimension (N)
    !         The original eigenvalues.  It is assumed that they are in
    !         order, D(I) < D(J)  for I < J.
    !
    !  Z      (input) DOUBLE PRECISION array, dimension (N)
    !         The components of the updating vector.
    !
    !  DELTA  (output) DOUBLE PRECISION array, dimension (N)
    !         DELTA contains (D(j) - lambda_I) in its  j-th component.
    !         See remark above about DLAED4 compatibility!
    !
    !  RHO    (input) DOUBLE PRECISION
    !         The scalar in the symmetric updating formula.
    !
    !  DLAM   (output) DOUBLE PRECISION
    !         The computed lambda_I, the I-th updated eigenvalue.
    !-------------------------------------------------------------------------------

#ifdef HAVE_DETAILED_TIMINGS
      use timings
153
154
#else
      use timings_dummy
Pavel Kus's avatar
Pavel Kus committed
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#endif
      use precision
      implicit none

      integer(kind=ik)   :: n, i
      real(kind=REAL_DATATYPE)      :: d(n), z(n), delta(n), rho, dlam

      integer(kind=ik)   :: iter
      real(kind=REAL_DATATYPE)      :: a, b, x, y, dshift

      ! In order to obtain sufficient numerical accuracy we have to shift the problem
      ! either by d(i) or d(i+1), whichever is closer to the solution

      ! Upper and lower bound of the shifted solution interval are a and b

170
      call timer%start("solve_secular_equation" // PRECISION_SUFFIX)
Pavel Kus's avatar
Pavel Kus committed
171
172
173
174
175
176
177
178
179
      if (i==n) then

       ! Special case: Last eigenvalue
       ! We shift always by d(n), lower bound is d(n),
       ! upper bound is determined by a guess:

       dshift = d(n)
       delta(:) = d(:) - dshift

180
181
       a = CONST_0_0 ! delta(n)
       b = rho*SUM(z(:)**2) + CONST_1_0 ! rho*SUM(z(:)**2) is the lower bound for the guess
Pavel Kus's avatar
Pavel Kus committed
182
183
184
185
186
      else

        ! Other eigenvalues: lower bound is d(i), upper bound is d(i+1)
        ! We check the sign of the function in the midpoint of the interval
        ! in order to determine if eigenvalue is more close to d(i) or d(i+1)
187
188
        x = CONST_0_5*(d(i)+d(i+1))
        y = CONST_1_0 + rho*SUM(z(:)**2/(d(:)-x))
Pavel Kus's avatar
Pavel Kus committed
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
        if (y>0) then
          ! solution is next to d(i)
          dshift = d(i)
        else
          ! solution is next to d(i+1)
          dshift = d(i+1)
        endif

        delta(:) = d(:) - dshift
        a = delta(i)
        b = delta(i+1)

      endif

      ! Bisection:

      do iter=1,200

        ! Interval subdivision
208
        x = CONST_0_5*(a+b)
Pavel Kus's avatar
Pavel Kus committed
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
        if (x==a .or. x==b) exit   ! No further interval subdivisions possible
#ifdef DOUBLE_PRECISION_REAL
        if (abs(x) < 1.e-200_rk8) exit ! x next to pole
#else
        if (abs(x) < 1.e-20_rk4) exit ! x next to pole
#endif
        ! evaluate value at x

        y = 1. + rho*SUM(z(:)**2/(delta(:)-x))

        if (y==0) then
          ! found exact solution
          exit
        elseif (y>0) then
          b = x
        else
          a = x
        endif

      enddo

      ! Solution:

      dlam = x + dshift
      delta(:) = delta(:) - x
234
      call  timer%stop("solve_secular_equation" // PRECISION_SUFFIX)
Pavel Kus's avatar
Pavel Kus committed
235

Andreas Marek's avatar
Andreas Marek committed
236
237
    end subroutine solve_secular_equation_&
    &PRECISION
Pavel Kus's avatar
Pavel Kus committed
238
    !-------------------------------------------------------------------------------
239
#endif
Pavel Kus's avatar
Pavel Kus committed
240

241
242
243
244
245
246
#if REALCASE == 1
    subroutine hh_transform_real_&
#endif
#if COMPLEXCASE == 1
    subroutine hh_transform_complex_&
#endif
Andreas Marek's avatar
Cleanup    
Andreas Marek committed
247
    &PRECISION &
248
                   (alpha, xnorm_sq, xf, tau)
249
#if REALCASE  == 1
Pavel Kus's avatar
Pavel Kus committed
250
      ! Similar to LAPACK routine DLARFP, but uses ||x||**2 instead of x(:)
251
252
253
254
#endif
#if COMPLEXCASE == 1
      ! Similar to LAPACK routine ZLARFP, but uses ||x||**2 instead of x(:)
#endif
Pavel Kus's avatar
Pavel Kus committed
255
256
257
258
      ! and returns the factor xf by which x has to be scaled.
      ! It also hasn't the special handling for numbers < 1.d-300 or > 1.d150
      ! since this would be expensive for the parallel implementation.
      use precision
259
260
261
262
263
#ifdef HAVE_DETAILED_TIMINGS
      use timings
#else
      use timings_dummy
#endif
Pavel Kus's avatar
Pavel Kus committed
264
      implicit none
265
266
267
268
269
270
271
272
273
274
275
276
277
278
#if REALCASE == 1
      real(kind=REAL_DATATYPE), intent(inout)       :: alpha
#endif
#if COMPLEXCASE == 1
      complex(kind=COMPLEX_DATATYPE), intent(inout) :: alpha
#endif
      real(kind=REAL_DATATYPE), intent(in)          :: xnorm_sq
#if REALCASE == 1
      real(kind=REAL_DATATYPE), intent(out)         :: xf, tau
#endif
#if COMPLEXCASE == 1
      complex(kind=COMPLEX_DATATYPE), intent(out)   :: xf, tau
      real(kind=REAL_DATATYPE)                      :: ALPHR, ALPHI
#endif
Pavel Kus's avatar
Pavel Kus committed
279

280
      real(kind=REAL_DATATYPE)                      :: BETA
Pavel Kus's avatar
Pavel Kus committed
281

Andreas Marek's avatar
Andreas Marek committed
282
     call timer%start("hh_transform_&
Andreas Marek's avatar
Andreas Marek committed
283
284
                      &MATH_DATATYPE&
		      &" // &
Andreas Marek's avatar
Cleanup    
Andreas Marek committed
285
		      &PRECISION_SUFFIX )
286
287
288
289
290
291
292

#if COMPLEXCASE == 1
      ALPHR = real( ALPHA, kind=REAL_DATATYPE )
      ALPHI = PRECISION_IMAG( ALPHA )
#endif

#if REALCASE == 1
Pavel Kus's avatar
Pavel Kus committed
293
      if ( XNORM_SQ==0. ) then
294
295
296
297
#endif
#if COMPLEXCASE == 1
      if ( XNORM_SQ==0. .AND. ALPHI==0. ) then
#endif
Pavel Kus's avatar
Pavel Kus committed
298

299
#if REALCASE == 1
Pavel Kus's avatar
Pavel Kus committed
300
        if ( ALPHA>=0. ) then
301
302
303
304
#endif
#if COMPLEXCASE == 1
        if ( ALPHR>=0. ) then
#endif
Pavel Kus's avatar
Pavel Kus committed
305
306
307
308
309
310
311
312
313
          TAU = 0.
        else
          TAU = 2.
          ALPHA = -ALPHA
        endif
        XF = 0.

      else

314
#if REALCASE == 1
Pavel Kus's avatar
Pavel Kus committed
315
        BETA = SIGN( SQRT( ALPHA**2 + XNORM_SQ ), ALPHA )
316
317
318
319
#endif
#if COMPLEXCASE == 1
        BETA = SIGN( SQRT( ALPHR**2 + ALPHI**2 + XNORM_SQ ), ALPHR )
#endif
Pavel Kus's avatar
Pavel Kus committed
320
321
322
        ALPHA = ALPHA + BETA
        IF ( BETA<0 ) THEN
          BETA = -BETA
323
          TAU  = -ALPHA / BETA
Pavel Kus's avatar
Pavel Kus committed
324
        ELSE
325
#if REALCASE == 1
Pavel Kus's avatar
Pavel Kus committed
326
          ALPHA = XNORM_SQ / ALPHA
327
328
329
330
331
332
333
#endif
#if COMPLEXCASE == 1
          ALPHR = ALPHI * (ALPHI/real( ALPHA , kind=KIND_PRECISION))
          ALPHR = ALPHR + XNORM_SQ/real( ALPHA, kind=KIND_PRECISION )
#endif

#if REALCASE == 1
Pavel Kus's avatar
Pavel Kus committed
334
335
          TAU = ALPHA / BETA
          ALPHA = -ALPHA
336
337
338
339
340
#endif
#if COMPLEXCASE == 1
          TAU = PRECISION_CMPLX( ALPHR/BETA, -ALPHI/BETA )
          ALPHA = PRECISION_CMPLX( -ALPHR, ALPHI )
#endif
Pavel Kus's avatar
Pavel Kus committed
341
       END IF
342
       XF = 1.0/ALPHA
Pavel Kus's avatar
Pavel Kus committed
343
344
       ALPHA = BETA
     endif
345

346
      call timer%stop("hh_transform_&
Andreas Marek's avatar
Andreas Marek committed
347
348
      &MATH_DATATYPE&
      &" // &
Andreas Marek's avatar
Cleanup    
Andreas Marek committed
349
      &PRECISION_SUFFIX )
350
351

#if REALCASE == 1
352
    end subroutine hh_transform_real_&
353
354
#endif
#if COMPLEXCASE == 1
355
    end subroutine hh_transform_complex_&
356
#endif
Andreas Marek's avatar
Cleanup    
Andreas Marek committed
357
    &PRECISION