elpa2_kernels_simple_template.X90 7.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#if 0
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! --------------------------------------------------------------------------------------------------
!
! This file contains the compute intensive kernels for the Householder transformations.
!
! This is the small and simple version (no hand unrolling of loops etc.) but for some
! compilers this performs better than a sophisticated version with transformed and unrolled loops.
!
! It should be compiled with the highest possible optimization level.
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
!
! --------------------------------------------------------------------------------------------------
#endif

61
#if COMPLEXCASE==1
62
63
  ! the intel compiler creates a temp copy of array q
  ! this should be avoided without using assumed size arrays
64
65
66
67
68

  subroutine single_hh_trafo_&
  &MATH_DATATYPE&
  &_generic_simple_&
  &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
69
  & (q, hh, nb, nq, ldq)
70

71
    use precision
72
    use elpa_abstract_impl
73
    implicit none
Andreas Marek's avatar
Andreas Marek committed
74
    !class(elpa_abstract_impl_t), intent(inout) :: obj
75
    integer(kind=ik), intent(in)    :: nb, nq, ldq
76
#ifdef USE_ASSUMED_SIZE
77
78
    complex(kind=C_DATATYPE_KIND), intent(inout) :: q(ldq,*)
    complex(kind=C_DATATYPE_KIND), intent(in)    :: hh(*)
79
#else
80
81
    complex(kind=C_DATATYPE_KIND), intent(inout) :: q(1:ldq,1:nb)
    complex(kind=C_DATATYPE_KIND), intent(in)    :: hh(1:nb)
82
83
#endif
    integer(kind=ik)                :: i
84
    complex(kind=C_DATATYPE_KIND)                :: h1, tau1, x(nq)
85

Andreas Marek's avatar
Andreas Marek committed
86
87
88
89
90
91
92
    !call obj%timer%start("kernel_&
    !&MATH_DATATYPE&
    !&_generic_simple: single_hh_trafo_&
    !&MATH_DATATYPE&
    !&_generic_simple" // &
    !&PRECISION_SUFFIX &
    !)
93

94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
    ! Just one Householder transformation

    x(1:nq) = q(1:nq,1)

    do i=2,nb
       x(1:nq) = x(1:nq) + q(1:nq,i)*conjg(hh(i))
    enddo

    tau1 = hh(1)
    x(1:nq) = x(1:nq)*(-tau1)

    q(1:nq,1) = q(1:nq,1) + x(1:nq)

    do i=2,nb
       q(1:nq,i) = q(1:nq,i) + x(1:nq)*hh(i)
    enddo


Andreas Marek's avatar
Andreas Marek committed
112
113
114
115
116
117
118
    !call obj%timer%stop("kernel_&
    !&MATH_DATATYPE&
    !&_generic_simple: single_hh_trafo_&
    !&MATH_DATATYPE&
    !&_generic_simple" // &
    !&PRECISION_SUFFIX &
    !)
119

120
  end subroutine
121
122

#endif /* COMPLEXCASE == 1 */
123
124
  ! --------------------------------------------------------------------------------------------------

125
126
#if REALCASE==1

127
128
129
130
  subroutine double_hh_trafo_&
  &MATH_DATATYPE&
  &_generic_simple_&
  &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
131
  & (q, hh, nb, nq, ldq, ldh)
132
133
134
135
136

#endif /* REALCASE == 1 */

#if COMPLEXCASE==1

137
138
139
140
  subroutine double_hh_trafo_&
  &MATH_DATATYPE&
  &_generic_simple_&
  &PRECISION&
Andreas Marek's avatar
Andreas Marek committed
141
  & (q, hh, nb, nq, ldq, ldh)
142
143
144

#endif /* COMPLEXCASE==1 */

145
    use precision
146
    use elpa_abstract_impl
147
148
    implicit none

Andreas Marek's avatar
Andreas Marek committed
149
    !class(elpa_abstract_impl_t), intent(inout) :: obj
150
    integer(kind=ik), intent(in)    :: nb, nq, ldq, ldh
151
152
#if REALCASE==1

153
#ifdef USE_ASSUMED_SIZE
154
155
    real(kind=C_DATATYPE_KIND), intent(inout) :: q(ldq,*)
    real(kind=C_DATATYPE_KIND), intent(in)    :: hh(ldh,*)
156
#else
157
158
    real(kind=C_DATATYPE_KIND), intent(inout) :: q(1:ldq,1:nb+1)
    real(kind=C_DATATYPE_KIND), intent(in)    :: hh(1:ldh,1:6)
159
#endif
160
    real(kind=C_DATATYPE_KIND)                :: s, h1, h2, tau1, tau2, x(nq), y(nq)
161
162
163
164
#endif /* REALCASE==1 */

#if COMPLEXCASE==1

165
#ifdef USE_ASSUMED_SIZE
166
167
    complex(kind=C_DATATYPE_KIND), intent(inout) :: q(ldq,*)
    complex(kind=C_DATATYPE_KIND), intent(in)    :: hh(ldh,*)
168
#else
169
170
    complex(kind=C_DATATYPE_KIND), intent(inout) :: q(1:ldq,1:nb+1)
    complex(kind=C_DATATYPE_KIND), intent(in)    :: hh(1:ldh,1:2)
171
#endif
172
    complex(kind=C_DATATYPE_KIND)                :: s, h1, h2, tau1, tau2, x(nq), y(nq)
173
#endif /* COMPLEXCASE==1 */
174
175
    integer(kind=ik)                :: i

Andreas Marek's avatar
Andreas Marek committed
176
177
178
179
180
181
182
    !call obj%timer%start("kernel_&
    !&MATH_DATATYPE&
    !&_generic_simple: double_hh_trafo_&
    !&MATH_DATATYPE&
    !&_generic_simple" // &
    !&PRECISION_SUFFIX &
    !)
183

184
    ! Calculate dot product of the two Householder vectors
185
186
187
188
189
190
#if REALCASE==1
    s = hh(2,2)*1.0
    do i=3,nb
       s = s+hh(i,2)*hh(i-1,1)
    enddo
#endif
191

192
193
#if COMPLEXCASE==1
    s = conjg(hh(2,2))*1.0
194
195
196
    do i=3,nb
       s = s+(conjg(hh(i,2))*hh(i-1,1))
    enddo
197
#endif
198
199
200
201

    ! Do the Householder transformations

    x(1:nq) = q(1:nq,2)
202
203
204
#if REALCASE==1
    y(1:nq) = q(1:nq,1) + q(1:nq,2)*hh(2,2)
#endif
205

206
#if COMPLEXCASE==1
207
    y(1:nq) = q(1:nq,1) + q(1:nq,2)*conjg(hh(2,2))
208
#endif
209
210

    do i=3,nb
211
212
213
214
215
216
#if REALCASE==1
       h1 = hh(i-1,1)
       h2 = hh(i,2)
#endif

#if COMPLEXCASE==1
217
218
       h1 = conjg(hh(i-1,1))
       h2 = conjg(hh(i,2))
219
#endif
220
221
222
223
       x(1:nq) = x(1:nq) + q(1:nq,i)*h1
       y(1:nq) = y(1:nq) + q(1:nq,i)*h2
    enddo

224
225
226
#if REALCASE==1
    x(1:nq) = x(1:nq) + q(1:nq,nb+1)*hh(nb,1)
#endif
227

228
229
230
#if COMPLEXCASE==1
    x(1:nq) = x(1:nq) + q(1:nq,nb+1)*conjg(hh(nb,1))
#endif
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
    tau1 = hh(1,1)
    tau2 = hh(1,2)

    h1 = -tau1
    x(1:nq) = x(1:nq)*h1
    h1 = -tau2
    h2 = -tau2*s
    y(1:nq) = y(1:nq)*h1 + x(1:nq)*h2

    q(1:nq,1) = q(1:nq,1) + y(1:nq)
    q(1:nq,2) = q(1:nq,2) + x(1:nq) + y(1:nq)*hh(2,2)

    do i=3,nb
       h1 = hh(i-1,1)
       h2 = hh(i,2)
       q(1:nq,i) = q(1:nq,i) + x(1:nq)*h1 + y(1:nq)*h2
    enddo

    q(1:nq,nb+1) = q(1:nq,nb+1) + x(1:nq)*hh(nb,1)

251

Andreas Marek's avatar
Andreas Marek committed
252
253
254
255
256
257
258
    !call obj%timer%stop("kernel_&
    !&MATH_DATATYPE&
    !&_generic_simple: double_hh_trafo_&
    !&MATH_DATATYPE&
    !&_generic_simple" // &
    !&PRECISION_SUFFIX &
    !)
259

260
  end subroutine