Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
071ba6be
Commit
071ba6be
authored
Aug 31, 2016
by
Andreas Marek
Browse files
Unify template for real, complex simple kernel
parent
9e2c7b80
Changes
7
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Makefile.am
View file @
071ba6be
...
@@ -52,8 +52,7 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
...
@@ -52,8 +52,7 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa2_compute_complex_template.X90
\
src/elpa2_compute_complex_template.X90
\
src/elpa2_kernels/elpa2_kernels_real_template.X90
\
src/elpa2_kernels/elpa2_kernels_real_template.X90
\
src/elpa2_kernels/elpa2_kernels_complex_template.X90
\
src/elpa2_kernels/elpa2_kernels_complex_template.X90
\
src/elpa2_kernels/elpa2_kernels_real_simple_template.X90
\
src/elpa2_kernels/elpa2_kernels_simple_template.X90
\
src/elpa2_kernels/elpa2_kernels_complex_simple_template.X90
\
src/redist_band.X90
src/redist_band.X90
lib_LTLIBRARIES
=
libelpa@SUFFIX@.la
lib_LTLIBRARIES
=
libelpa@SUFFIX@.la
...
@@ -775,8 +774,7 @@ EXTRA_DIST = \
...
@@ -775,8 +774,7 @@ EXTRA_DIST = \
src/elpa2_compute_complex_template.X90
\
src/elpa2_compute_complex_template.X90
\
src/elpa2_kernels/elpa2_kernels_real_template.X90
\
src/elpa2_kernels/elpa2_kernels_real_template.X90
\
src/elpa2_kernels/elpa2_kernels_complex_template.X90
\
src/elpa2_kernels/elpa2_kernels_complex_template.X90
\
src/elpa2_kernels/elpa2_kernels_real_simple_template.X90
\
src/elpa2_kernels/elpa2_kernels_simple_template.X90
\
src/elpa2_kernels/elpa2_kernels_complex_simple_template.X90
\
src/redist_band.X90
\
src/redist_band.X90
\
src/elpa_qr/elpa_qrkernels.X90
\
src/elpa_qr/elpa_qrkernels.X90
\
src/ev_tridi_band_gpu_c_v2_complex_template.Xcu
\
src/ev_tridi_band_gpu_c_v2_complex_template.Xcu
\
...
...
src/elpa2_kernels/elpa2_kernels_complex.F90
View file @
071ba6be
...
@@ -10,7 +10,7 @@
...
@@ -10,7 +10,7 @@
! - Technische Universität München, Lehrstuhl für Informatik mit
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaft
r
n,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaft
e
n,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! and
! - IBM Deutschland GmbH
! - IBM Deutschland GmbH
...
...
src/elpa2_kernels/elpa2_kernels_complex_simple.F90
View file @
071ba6be
...
@@ -10,7 +10,7 @@
...
@@ -10,7 +10,7 @@
! - Technische Universität München, Lehrstuhl für Informatik mit
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaft
r
n,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaft
e
n,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! and
! - IBM Deutschland GmbH
! - IBM Deutschland GmbH
...
@@ -68,18 +68,22 @@ module complex_generic_simple_kernel
...
@@ -68,18 +68,22 @@ module complex_generic_simple_kernel
contains
contains
#define COMPLEXCASE 1
#define DOUBLE_PRECISION_COMPLEX 1
#define DOUBLE_PRECISION_COMPLEX 1
#define
COMPLEX_
DATATYPE ck8
#define DATATYPE ck8
#include "elpa2_kernels_
complex_
simple_template.X90"
#include "elpa2_kernels_simple_template.X90"
#undef DOUBLE_PRECISION_COMPLEX
#undef DOUBLE_PRECISION_COMPLEX
#undef COMPLEX_DATATYPE
#undef DATATYPE
#undef COMPLEXCASE
#ifdef WANT_SINGLE_PRECISION_COMPLEX
#ifdef WANT_SINGLE_PRECISION_COMPLEX
#define COMPLEXCASE 1
#undef DOUBLE_PRECISION_COMPLEX
#undef DOUBLE_PRECISION_COMPLEX
#define
COMPLEX_
DATATYPE ck4
#define DATATYPE ck4
#include "elpa2_kernels_
complex_
simple_template.X90"
#include "elpa2_kernels_simple_template.X90"
#undef DOUBLE_PRECISION_COMPLEX
#undef DOUBLE_PRECISION_COMPLEX
#undef COMPLEX_DATATYPE
#undef DATATYPE
#undef COMPLEXCASE
#endif
#endif
...
...
src/elpa2_kernels/elpa2_kernels_real.F90
View file @
071ba6be
...
@@ -10,7 +10,7 @@
...
@@ -10,7 +10,7 @@
! - Technische Universität München, Lehrstuhl für Informatik mit
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaft
r
n,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaft
e
n,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! and
! - IBM Deutschland GmbH
! - IBM Deutschland GmbH
...
...
src/elpa2_kernels/elpa2_kernels_real_simple.F90
View file @
071ba6be
...
@@ -10,7 +10,7 @@
...
@@ -10,7 +10,7 @@
! - Technische Universität München, Lehrstuhl für Informatik mit
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaft
r
n,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaft
e
n,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! and
! - IBM Deutschland GmbH
! - IBM Deutschland GmbH
...
@@ -69,19 +69,22 @@ module real_generic_simple_kernel
...
@@ -69,19 +69,22 @@ module real_generic_simple_kernel
contains
contains
#define REALCASE 1
#define DOUBLE_PRECISION_REAL 1
#define DOUBLE_PRECISION_REAL 1
#define
REAL_
DATATYPE rk8
#define DATATYPE rk8
#include "elpa2_kernels_
real_
simple_template.X90"
#include "elpa2_kernels_simple_template.X90"
#undef DOUBLE_PRECISION_REAL
#undef DOUBLE_PRECISION_REAL
#undef
REAL_
DATATYPE
#undef DATATYPE
#undef REALCASE
#ifdef WANT_SINGLE_PRECISION_REAL
#ifdef WANT_SINGLE_PRECISION_REAL
#define REALCASE 1
#undef DOUBLE_PRECISION_REAL
#undef DOUBLE_PRECISION_REAL
#define
REAL_
DATATYPE rk4
#define DATATYPE rk4
#include "elpa2_kernels_
real_
simple_template.X90"
#include "elpa2_kernels_simple_template.X90"
#undef DOUBLE_PRECISION_REAL
#undef DOUBLE_PRECISION_REAL
#undef REAL_DATATYPE
#undef DATATYPE
#undef REALCASE
#endif
#endif
end
module
real_generic_simple_kernel
end
module
real_generic_simple_kernel
...
...
src/elpa2_kernels/elpa2_kernels_real_simple_template.X90
deleted
100644 → 0
View file @
9e2c7b80
#if 0
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
!
! --------------------------------------------------------------------------------------------------
!
! This file contains the compute intensive kernels for the Householder transformations.
!
! This is the small and simple version (no hand unrolling of loops etc.) but for some
! compilers this performs better than a sophisticated version with transformed and unrolled loops.
!
! It should be compiled with the highest possible optimization level.
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
!
! --------------------------------------------------------------------------------------------------
#endif
#ifdef DOUBLE_PRECISION_REAL
subroutine double_hh_trafo_generic_simple_double(q, hh, nb, nq, ldq, ldh)
#else
subroutine double_hh_trafo_generic_simple_single(q, hh, nb, nq, ldq, ldh)
#endif
use precision
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
implicit none
integer(kind=ik), intent(in) :: nb, nq, ldq, ldh
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
real(kind=REAL_DATATYPE), intent(inout) :: q(ldq,*)
real(kind=REAL_DATATYPE), intent(in) :: hh(ldh,*)
#else
real(kind=REAL_DATATYPE), intent(inout) :: q(ldq,1:nb+1)
real(kind=REAL_DATATYPE), intent(in) :: hh(ldh,2)
#endif
real(kind=REAL_DATATYPE) :: s, h1, h2, tau1, tau2, x(nq), y(nq)
integer(kind=ik) :: i
#ifdef DOUBLE_PRECISION_REAL
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("kernel generic simple: double_hh_trafo_generic_simple_double")
#endif
#else
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("kernel generic simple: double_hh_trafo_generic_simple_single")
#endif
#endif
! Calculate dot product of the two Householder vectors
s = hh(2,2)*1
do i=3,nb
s = s+hh(i,2)*hh(i-1,1)
enddo
! Do the Householder transformations
x(1:nq) = q(1:nq,2)
y(1:nq) = q(1:nq,1) + q(1:nq,2)*hh(2,2)
do i=3,nb
h1 = hh(i-1,1)
h2 = hh(i,2)
x(1:nq) = x(1:nq) + q(1:nq,i)*h1
y(1:nq) = y(1:nq) + q(1:nq,i)*h2
enddo
x(1:nq) = x(1:nq) + q(1:nq,nb+1)*hh(nb,1)
tau1 = hh(1,1)
tau2 = hh(1,2)
h1 = -tau1
x(1:nq) = x(1:nq)*h1
h1 = -tau2
h2 = -tau2*s
y(1:nq) = y(1:nq)*h1 + x(1:nq)*h2
q(1:nq,1) = q(1:nq,1) + y(1:nq)
q(1:nq,2) = q(1:nq,2) + x(1:nq) + y(1:nq)*hh(2,2)
do i=3,nb
h1 = hh(i-1,1)
h2 = hh(i,2)
q(1:nq,i) = q(1:nq,i) + x(1:nq)*h1 + y(1:nq)*h2
enddo
q(1:nq,nb+1) = q(1:nq,nb+1) + x(1:nq)*hh(nb,1)
#ifdef DOUBLE_PRECISION_REAL
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel generic simple: double_hh_trafo_generic_simple_double")
#endif
#else
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel generic simple: double_hh_trafo_generic_simple_single")
#endif
#endif
#ifdef DOUBLE_PRECISION_REAL
end subroutine double_hh_trafo_generic_simple_double
#else
end subroutine double_hh_trafo_generic_simple_single
#endif
src/elpa2_kernels/elpa2_kernels_
complex_
simple_template.X90
→
src/elpa2_kernels/elpa2_kernels_simple_template.X90
View file @
071ba6be
...
@@ -58,6 +58,7 @@
...
@@ -58,6 +58,7 @@
! --------------------------------------------------------------------------------------------------
! --------------------------------------------------------------------------------------------------
#endif
#endif
#if COMPLEXCASE==1
! the intel compiler creates a temp copy of array q
! the intel compiler creates a temp copy of array q
! this should be avoided without using assumed size arrays
! this should be avoided without using assumed size arrays
#ifdef DOUBLE_PRECISION_COMPLEX
#ifdef DOUBLE_PRECISION_COMPLEX
...
@@ -73,14 +74,14 @@
...
@@ -73,14 +74,14 @@
integer(kind=ik), intent(in) :: nb, nq, ldq
integer(kind=ik), intent(in) :: nb, nq, ldq
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=
COMPLEX_
DATATYPE), intent(inout) :: q(ldq,*)
complex(kind=DATATYPE), intent(inout) :: q(ldq,*)
complex(kind=
COMPLEX_
DATATYPE), intent(in) :: hh(*)
complex(kind=DATATYPE), intent(in) :: hh(*)
#else
#else
complex(kind=
COMPLEX_
DATATYPE), intent(inout) :: q(1:ldq,1:nb)
complex(kind=DATATYPE), intent(inout) :: q(1:ldq,1:nb)
complex(kind=
COMPLEX_
DATATYPE), intent(in) :: hh(1:nb)
complex(kind=DATATYPE), intent(in) :: hh(1:nb)
#endif
#endif
integer(kind=ik) :: i
integer(kind=ik) :: i
complex(kind=
COMPLEX_
DATATYPE) :: h1, tau1, x(nq)
complex(kind=DATATYPE) :: h1, tau1, x(nq)
#ifdef DOUBLE_PRECISION_COMPLEX
#ifdef DOUBLE_PRECISION_COMPLEX
...
@@ -88,13 +89,14 @@
...
@@ -88,13 +89,14 @@
call timer%start("kernel complex generic simple: single_hh_trafo_complex_generic_simple_double")
call timer%start("kernel complex generic simple: single_hh_trafo_complex_generic_simple_double")
#endif
#endif
#else
#else
/* DOUBLE_PRECISION_COMPLEX */
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("kernel complex generic simple: single_hh_trafo_complex_generic_simple_single")
call timer%start("kernel complex generic simple: single_hh_trafo_complex_generic_simple_single")
#endif
#endif
#endif
#endif /* DOUBLE_PRECISION_COMPLEX */
! Just one Householder transformation
! Just one Householder transformation
x(1:nq) = q(1:nq,1)
x(1:nq) = q(1:nq,1)
...
@@ -117,26 +119,43 @@
...
@@ -117,26 +119,43 @@
call timer%stop("kernel complex generic simple: single_hh_trafo_complex_generic_simple_double")
call timer%stop("kernel complex generic simple: single_hh_trafo_complex_generic_simple_double")
#endif
#endif
#else
#else
/* DOUBLE_PRECISION_COMPLEX */
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel complex generic simple: single_hh_trafo_complex_generic_simple_single")
call timer%stop("kernel complex generic simple: single_hh_trafo_complex_generic_simple_single")
#endif
#endif
#endif
#endif
/* DOUBLE_PRECISION_COMPLEX */
#ifdef DOUBLE_PRECISION_COMPLEX
#ifdef DOUBLE_PRECISION_COMPLEX
end subroutine single_hh_trafo_complex_generic_simple_double
end subroutine single_hh_trafo_complex_generic_simple_double
#else
#else
end subroutine single_hh_trafo_complex_generic_simple_single
end subroutine single_hh_trafo_complex_generic_simple_single
#endif
#endif
#endif /* COMPLEXCASE == 1 */
! --------------------------------------------------------------------------------------------------
! --------------------------------------------------------------------------------------------------
#if REALCASE==1
#ifdef DOUBLE_PRECISION_REAL
subroutine double_hh_trafo_generic_simple_double(q, hh, nb, nq, ldq, ldh)
#else
subroutine double_hh_trafo_generic_simple_single(q, hh, nb, nq, ldq, ldh)
#endif
#endif /* REALCASE == 1 */
#if COMPLEXCASE==1
#ifdef DOUBLE_PRECISION_COMPLEX
#ifdef DOUBLE_PRECISION_COMPLEX
subroutine double_hh_trafo_complex_generic_simple_double(q, hh, nb, nq, ldq, ldh)
subroutine double_hh_trafo_complex_generic_simple_double(q, hh, nb, nq, ldq, ldh)
#else
#else
subroutine double_hh_trafo_complex_generic_simple_single(q, hh, nb, nq, ldq, ldh)
subroutine double_hh_trafo_complex_generic_simple_single(q, hh, nb, nq, ldq, ldh)
#endif
#endif
#endif /* COMPLEXCASE==1 */
use precision
use precision
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
use timings
use timings
...
@@ -144,16 +163,51 @@
...
@@ -144,16 +163,51 @@
implicit none
implicit none
integer(kind=ik), intent(in) :: nb, nq, ldq, ldh
integer(kind=ik), intent(in) :: nb, nq, ldq, ldh
#if REALCASE==1
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=COMPLEX_
DATATYPE), intent(inout) :: q(ldq,*)
real(kind=
DATATYPE), intent(inout) :: q(ldq,*)
complex(kind=COMPLEX_
DATATYPE), intent(in) :: hh(ldh,*)
real(kind=
DATATYPE), intent(in) :: hh(ldh,*)
#else
#else
complex(kind=COMPLEX_
DATATYPE), intent(inout) :: q(1:ldq,1:nb+1)
real(kind=
DATATYPE), intent(inout) :: q(1:ldq,1:nb+1)
complex(kind=COMPLEX_
DATATYPE), intent(in) :: hh(1:ldh,1:
2
)
real(kind=
DATATYPE), intent(in) :: hh(1:ldh,1:
6
)
#endif
#endif
complex(kind=COMPLEX_DATATYPE) :: s, h1, h2, tau1, tau2, x(nq), y(nq)
real(kind=DATATYPE) :: s, h1, h2, tau1, tau2, x(nq), y(nq)
#endif /* REALCASE==1 */
#if COMPLEXCASE==1
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=DATATYPE), intent(inout) :: q(ldq,*)
complex(kind=DATATYPE), intent(in) :: hh(ldh,*)
#else
complex(kind=DATATYPE), intent(inout) :: q(1:ldq,1:nb+1)
complex(kind=DATATYPE), intent(in) :: hh(1:ldh,1:2)
#endif
complex(kind=DATATYPE) :: s, h1, h2, tau1, tau2, x(nq), y(nq)
#endif /* COMPLEXCASE==1 */
integer(kind=ik) :: i
integer(kind=ik) :: i
#if REALCASE==1
#ifdef DOUBLE_PRECISION_REAL
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("kernel real generic simple: double_hh_trafo_real_generic_simple_double")
#endif
#else
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("kernel real generic simple: double_hh_trafo_real_generic_simple_single")
#endif
#endif
#endif /* REALCASE==1 */
#if COMPLEXCASE==1
#ifdef DOUBLE_PRECISION_COMPLEX
#ifdef DOUBLE_PRECISION_COMPLEX
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
...
@@ -168,28 +222,55 @@
...
@@ -168,28 +222,55 @@
#endif
#endif
#endif /* COMPLEXCASE==1 */
! Calculate dot product of the two Householder vectors
! Calculate dot product of the two Householder vectors
#if REALCASE==1
s = hh(2,2)*1.0
do i=3,nb
s = s+hh(i,2)*hh(i-1,1)
enddo
#endif
s = conjg(hh(2,2))*1
#if COMPLEXCASE==1
s = conjg(hh(2,2))*1.0
do i=3,nb
do i=3,nb
s = s+(conjg(hh(i,2))*hh(i-1,1))
s = s+(conjg(hh(i,2))*hh(i-1,1))
enddo
enddo
#endif
! Do the Householder transformations
! Do the Householder transformations
x(1:nq) = q(1:nq,2)
x(1:nq) = q(1:nq,2)
#if REALCASE==1
y(1:nq) = q(1:nq,1) + q(1:nq,2)*hh(2,2)
#endif
#if COMPLEXCASE==1
y(1:nq) = q(1:nq,1) + q(1:nq,2)*conjg(hh(2,2))
y(1:nq) = q(1:nq,1) + q(1:nq,2)*conjg(hh(2,2))
#endif
do i=3,nb
do i=3,nb
#if REALCASE==1
h1 = hh(i-1,1)
h2 = hh(i,2)
#endif
#if COMPLEXCASE==1
h1 = conjg(hh(i-1,1))
h1 = conjg(hh(i-1,1))
h2 = conjg(hh(i,2))
h2 = conjg(hh(i,2))
#endif
x(1:nq) = x(1:nq) + q(1:nq,i)*h1
x(1:nq) = x(1:nq) + q(1:nq,i)*h1
y(1:nq) = y(1:nq) + q(1:nq,i)*h2
y(1:nq) = y(1:nq) + q(1:nq,i)*h2
enddo
enddo
x(1:nq) = x(1:nq) + q(1:nq,nb+1)*conjg(hh(nb,1))
#if REALCASE==1
x(1:nq) = x(1:nq) + q(1:nq,nb+1)*hh(nb,1)
#endif
#if COMPLEXCASE==1
x(1:nq) = x(1:nq) + q(1:nq,nb+1)*conjg(hh(nb,1))
#endif
tau1 = hh(1,1)
tau1 = hh(1,1)
tau2 = hh(1,2)
tau2 = hh(1,2)
...
@@ -210,22 +291,58 @@
...
@@ -210,22 +291,58 @@
q(1:nq,nb+1) = q(1:nq,nb+1) + x(1:nq)*hh(nb,1)
q(1:nq,nb+1) = q(1:nq,nb+1) + x(1:nq)*hh(nb,1)
#if REALCASE==1
#ifdef DOUBLE_PRECISION_REAL
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel real generic simple: double_hh_trafo_complex_generic_simple_double")
#endif
#else /* DOUBLE_PRECISION_REAL */
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel real generic simple: double_hh_trafo_complex_generic_simple_single")
#endif
#endif /* DOUBLE_PRECISION_REAL */
#endif /* REALCASE==1 */
#if COMPLEXCASE==1
#ifdef DOUBLE_PRECISION_COMPLEX
#ifdef DOUBLE_PRECISION_COMPLEX
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel complex generic simple: double_hh_trafo_complex_generic_simple_double")
call timer%stop("kernel complex generic simple: double_hh_trafo_complex_generic_simple_double")
#endif
#endif
#else
#else
/* DOUBLE_PRECISION_COMPLEX */
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS