Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
3172d0c1
Commit
3172d0c1
authored
Mar 01, 2019
by
Andreas Marek
Browse files
Real generic_simple_block6 kernel
parent
580978df
Changes
7
Hide whitespace changes
Inline
Side-by-side
Makefile.am
View file @
3172d0c1
...
...
@@ -109,6 +109,7 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa2/kernels/complex_template.F90
\
src/elpa2/kernels/simple_template.F90
\
src/elpa2/kernels/simple_block4_template.F90
\
src/elpa2/kernels/simple_block6_template.F90
\
src/elpa2/pack_unpack_cpu.F90
\
src/elpa2/pack_unpack_gpu.F90
\
src/elpa2/compute_hh_trafo.F90
\
...
...
@@ -193,6 +194,9 @@ if WITH_REAL_GENERIC_SIMPLE_BLOCK4_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_simple_block4.F90
endif
if
WITH_REAL_GENERIC_SIMPLE_BLOCK6_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_simple_block6.F90
endif
if
WITH_REAL_BGP_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_bgp.f90
endif
...
...
@@ -786,6 +790,7 @@ EXTRA_DIST = \
src/elpa2/kernels/real_template.F90
\
src/elpa2/kernels/simple_template.F90
\
src/elpa2/kernels/simple_block4_template.F90
\
src/elpa2/kernels/simple_block6_template.F90
\
src/elpa2/pack_unpack_cpu.F90
\
src/elpa2/pack_unpack_gpu.F90
\
src/elpa2/qr/elpa_pdgeqrf_template.F90
\
...
...
configure.ac
View file @
3172d0c1
...
...
@@ -550,6 +550,7 @@ m4_define(elpa_m4_generic_kernels, [
real_generic
real_generic_simple
real_generic_simple_block4
real_generic_simple_block6
complex_generic
complex_generic_simple
])
...
...
elpa/elpa_constants.h.in
View file @
3172d0c1
...
...
@@ -47,7 +47,8 @@ enum ELPA_SOLVERS {
X(ELPA_2STAGE_REAL_VSX_BLOCK2, 22, @ELPA_2STAGE_REAL_VSX_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK4, 23, @ELPA_2STAGE_REAL_VSX_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK6, 24, @ELPA_2STAGE_REAL_VSX_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4, 25, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4_COMPILED@, __VA_ARGS__)
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4, 25, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6, 26, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6_COMPILED@, __VA_ARGS__)
#define ELPA_FOR_ALL_2STAGE_REAL_KERNELS_AND_DEFAULT(X) \
ELPA_FOR_ALL_2STAGE_REAL_KERNELS(X) \
...
...
src/elpa2/compute_hh_trafo.F90
View file @
3172d0c1
...
...
@@ -81,6 +81,10 @@
use
real_generic_simple_block4_kernel
!, only : double_hh_trafo_generic_simple
#endif
#if defined(WITH_REAL_GENERIC_SIMPLE_BLOCK6_KERNEL) && !(defined(USE_ASSUMED_SIZE))
use
real_generic_simple_block6_kernel
!, only : double_hh_trafo_generic_simple
#endif
#if defined(WITH_REAL_GENERIC_KERNEL) && !(defined(USE_ASSUMED_SIZE))
use
real_generic_kernel
!, only : double_hh_trafo_generic
#endif
...
...
@@ -133,6 +137,7 @@
#if REALCASE == 1
! real(kind=C_DATATYPE_KIND) :: a(stripe_width,a_dim2,stripe_count)
real
(
kind
=
C_DATATYPE_KIND
),
pointer
::
a
(:,:,:)
! real(kind=C_DATATYPE_KIND), target :: a1(stripe_width,a_dim2,stripe_count)
#endif
#if COMPLEXCASE == 1
! complex(kind=C_DATATYPE_KIND) :: a(stripe_width,a_dim2,stripe_count)
...
...
@@ -144,6 +149,7 @@
#if REALCASE == 1
! real(kind=C_DATATYPE_KIND) :: a(stripe_width,a_dim2,stripe_count,max_threads)
real
(
kind
=
C_DATATYPE_KIND
),
pointer
::
a
(:,:,:,:)
#endif
#if COMPLEXCASE == 1
! complex(kind=C_DATATYPE_KIND) :: a(stripe_width,a_dim2,stripe_count,max_threads)
...
...
@@ -170,13 +176,14 @@
integer
(
kind
=
ik
)
::
j
,
nl
,
jj
,
jjj
,
n_times
#if REALCASE == 1
real
(
kind
=
C_DATATYPE_KIND
)
::
w
(
nbw
,
6
)
! real(kind=C_DATATYPE_KIND) :: w1(nbw,6)
#endif
#if COMPLEXCASE == 1
complex
(
kind
=
C_DATATYPE_KIND
)
::
w
(
nbw
,
2
)
#endif
real
(
kind
=
c_double
)
::
ttt
! MPI_WTIME always needs double
integer
::
k
,
l
,
m
j
=
-99
if
(
wantDebug
)
then
...
...
@@ -1395,7 +1402,8 @@
&
MATH_DATATYPE
&
&
_
generic_simple_
&
&
PRECISION
&
&
(
a
(
1
:
stripe_width
,
jj
+
off
+
a_off
-1
:
jj
+
off
+
a_off
-1
+
nbw
,
istripe
),
w
(
1
:
nbw
,
1
:
6
),
nbw
,
nl
,
stripe_width
,
nbw
)
&
(
a
(
1
:
stripe_width
,
jj
+
off
+
a_off
-1
:
jj
+
off
+
a_off
-1
+
nbw
,
istripe
),
w
(
1
:
nbw
,
1
:
6
),
&
nbw
,
nl
,
stripe_width
,
nbw
)
#endif
#endif
...
...
@@ -1415,7 +1423,8 @@
&
MATH_DATATYPE
&
&
_
cpu_
&
&
PRECISION
&
&
(
a
(
1
:
stripe_width
,
1
+
off
+
a_off
:
1
+
off
+
a_off
+
nbw
-1
,
istripe
),
bcast_buffer
(
1
:
nbw
,
off
+1
),
nbw
,
nl
,
stripe_width
)
&
(
a
(
1
:
stripe_width
,
1
+
off
+
a_off
:
1
+
off
+
a_off
+
nbw
-1
,
istripe
),
bcast_buffer
(
1
:
nbw
,
off
+1
),
&
nbw
,
nl
,
stripe_width
)
#endif
#endif /* (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_GENERIC_SIMPLE_BLOCK6_KERNEL)) */
...
...
@@ -1427,7 +1436,198 @@
#endif /* REALCASE */
#if REALCASE == 1
! generic simple block4 real kernel
!real generic simple block6 kernel
#if defined(WITH_REAL_GENERIC_SIMPLE_BLOCK6_KERNEL)
#ifndef WITH_FIXED_REAL_KERNEL
if
(
kernel
.eq.
ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6
)
then
#endif /* not WITH_FIXED_REAL_KERNEL */
! X86 INTRINSIC CODE, USING 6 HOUSEHOLDER VECTORS
do
j
=
ncols
,
6
,
-6
w
(:,
1
)
=
bcast_buffer
(
1
:
nbw
,
j
+
off
)
w
(:,
2
)
=
bcast_buffer
(
1
:
nbw
,
j
+
off
-1
)
w
(:,
3
)
=
bcast_buffer
(
1
:
nbw
,
j
+
off
-2
)
w
(:,
4
)
=
bcast_buffer
(
1
:
nbw
,
j
+
off
-3
)
w
(:,
5
)
=
bcast_buffer
(
1
:
nbw
,
j
+
off
-4
)
w
(:,
6
)
=
bcast_buffer
(
1
:
nbw
,
j
+
off
-5
)
! w1(:,:) = w(:,:)
! a1(:,:,:) = a(:,:,:)
#ifdef WITH_OPENMP
#ifdef USE_ASSUMED_SIZE
call
hexa_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
generic_simple_6hv_
&
&
PRECISION
&
&
(
a
(
1
,
j
+
off
+
a_off
-5
,
istripe
,
my_thread
),
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
#else
call
hexa_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
generic_simple_6hv_
&
&
PRECISION
&
&
(
a
(
1
:
stripe_width
,
j
+
off
+
a_off
-5
:
j
+
off
+
a_off
-1
,
istripe
,
my_thread
),
w
(
1
:
nbw
,
1
:
6
),
&
nbw
,
nl
,
stripe_width
,
nbw
)
#endif
#else /* WITH_OPENMP */
!print *,"j=",j," off=",off," a_off=",a_off," istripe=",istripe," nbw=",nbw," nl=",nl, &
! " stipe_width=",stripe_width,"nbw=",nbw
#ifdef USE_ASSUMED_SIZE
call
hexa_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
generic_simple_6hv_
&
&
PRECISION
&
&
(
a
(
1
,
j
+
off
+
a_off
-5
,
istripe
),
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
#else
!print *,"a=",a(1,151,1),"a1=",a1(1,151,1)
!call hexa_hh_trafo_&
! &MATH_DATATYPE&
! &_sse_6hv_&
! &PRECISION&
! & (c_loc(a(1,j+off+a_off-5,istripe)), w, nbw, nl, stripe_width, nbw)
call
hexa_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
generic_simple_6hv_
&
&
PRECISION
&
&
(
a
(
1
:
stripe_width
,
j
+
off
+
a_off
-5
:
j
+
off
+
a_off
+
nbw
-1
,
istripe
),
w
(
1
:
nbw
,
1
:
6
),
&
nbw
,
nl
,
stripe_width
,
nbw
)
!print *," did : ",stripe_width,j+off+a_off-5,j+off+a_off+nbw-1,istripe
!do k=1,nbw
! do l=1,6
! if (w(k,l) .ne. w1(k,l)) then
! print *," w not identical ",k,l,w(k,l),w1(k,l)
! stop
! endif
! enddo
!enddo
!do k=1,stripe_width
! do l=j+off+a_off-5,j+off+a_off+nbw-1
! do m=1,istripe
! if (a(k,l,m) .ne. 0.) then
! if (abs(a(k,l,m)-a1(k,l,m))/abs(a(k,l,m)) .gt. 1e-6) then
! print *,j,k,l,m,a(k,l,m),a1(k,l,m)
! stop
! endif
! else
! if(a1(k,l,m) .ne. 0.0) then
! print *,k,l,m,a1(k,l,m)," but should be zero"
! stop
! endif
! endif
! enddo
! enddo
!enddo
!print *,"done j=",j
#endif
#endif /* WITH_OPENMP */
enddo
do
jj
=
j
,
4
,
-4
w
(:,
1
)
=
bcast_buffer
(
1
:
nbw
,
jj
+
off
)
w
(:,
2
)
=
bcast_buffer
(
1
:
nbw
,
jj
+
off
-1
)
w
(:,
3
)
=
bcast_buffer
(
1
:
nbw
,
jj
+
off
-2
)
w
(:,
4
)
=
bcast_buffer
(
1
:
nbw
,
jj
+
off
-3
)
#ifdef WITH_OPENMP
#ifdef USE_ASSUMED_SIZE
call
quad_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
generic_simple_4hv_
&
&
PRECISION
&
&
(
a
(
1
,
jj
+
off
+
a_off
-3
,
istripe
,
my_thread
),
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
#else
call
quad_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
generic_simple_4hv_
&
&
PRECISION
&
&
(
a
(
1
:
stripe_width
,
jj
+
off
+
a_off
-3
:
jj
+
off
+
a_off
+
nbw
-1
,
istripe
,
my_thread
),
&
w
(
1
:
nbw
,
1
:
6
),
nbw
,
nl
,
stripe_width
,
nbw
)
#endif
#else /* WITH_OPENMP */
#ifdef USE_ASSUMED_SIZE
call
quad_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
generic_simple_4hv_
&
&
PRECISION
&
&
(
a
(
1
,
jj
+
off
+
a_off
-3
,
istripe
),
w
,
&
nbw
,
nl
,
stripe_width
,
nbw
)
#else
call
quad_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
generic_simple_4hv_
&
&
PRECISION
&
&
(
a
(
1
:
stripe_width
,
jj
+
off
+
a_off
-3
:
jj
+
off
+
a_off
+
nbw
-1
,
istripe
),
&
w
(
1
:
nbw
,
1
:
6
),
nbw
,
nl
,
stripe_width
,
nbw
)
#endif
#endif /* WITH_OPENMP */
enddo
do
jjj
=
jj
,
2
,
-2
w
(:,
1
)
=
bcast_buffer
(
1
:
nbw
,
jjj
+
off
)
w
(:,
2
)
=
bcast_buffer
(
1
:
nbw
,
jjj
+
off
-1
)
#ifdef WITH_OPENMP
#ifdef USE_ASSUMED_SIZE
call
double_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
generic_simple_
&
&
PRECISION
&
&
(
a
(
1
,
jjj
+
off
+
a_off
-1
,
istripe
,
my_thread
),
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
#else
call
double_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
generic_simple_
&
&
PRECISION
&
&
(
a
(
1
:
stripe_width
,
jj
+
off
+
a_off
-1
:
jj
+
off
+
a_off
-1
+
nbw
,
istripe
,
my_thread
),
w
(
1
:
nbw
,
1
:
6
),
nbw
,
&
nl
,
stripe_width
,
nbw
)
#endif
#else /* WITH_OPENMP */
#ifdef USE_ASSUMED_SIZE
call
double_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
generic_simple_
&
&
PRECISION
&
&
(
a
(
1
,
jjj
+
off
+
a_off
-1
,
istripe
),
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
#else
call
double_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
generic_simple_
&
&
PRECISION
&
&
(
a
(
1
:
stripe_width
,
jj
+
off
+
a_off
-1
:
jj
+
off
+
a_off
-1
+
nbw
,
istripe
),
w
(
1
:
nbw
,
1
:
6
),
nbw
,
nl
,
&
stripe_width
,
nbw
)
#endif
#endif /* WITH_OPENMP */
enddo
#ifdef WITH_OPENMP
if
(
jjj
==
1
)
call
single_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
cpu_openmp_
&
&
PRECISION
&
&
(
a
(
1
:
stripe_width
,
1
+
off
+
a_off
:
1
+
off
+
a_off
+
nbw
-1
,
istripe
,
my_thread
),
&
bcast_buffer
(
1
:
nbw
,
off
+1
),
nbw
,
nl
,
stripe_width
)
#else
if
(
jjj
==
1
)
call
single_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
cpu_
&
&
PRECISION
&
&
(
a
(
1
:
stripe_width
,
1
+
off
+
a_off
:
1
+
off
+
a_off
+
nbw
-1
,
istripe
),
bcast_buffer
(
1
:
nbw
,
off
+1
),
nbw
,
nl
,
stripe_width
)
#endif
#ifndef WITH_FIXED_REAL_KERNEL
endif
#endif /* not WITH_FIXED_REAL_KERNEL */
#endif /* WITH_REAL_GENERIC_SIMPLE_BLOCK6_KERNEL */
#endif /* REALCASE */
#if REALCASE == 1
! sparc64 block 4 real kernel
#if defined(WITH_REAL_SPARC64_BLOCK4_KERNEL)
#ifndef WITH_FIXED_REAL_KERNEL
...
...
src/elpa2/kernels/real_simple_block6.F90
0 → 100644
View file @
3172d0c1
#if 0
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
!
! --------------------------------------------------------------------------------------------------
!
! This file contains the compute intensive kernels for the Householder transformations.
!
! This is the small and simple version (no hand unrolling of loops etc.) but for some
! compilers this performs better than a sophisticated version with transformed and unrolled loops.
!
! It should be compiled with the highest possible optimization level.
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
!
! Author: A. Marek, MPCDF
! --------------------------------------------------------------------------------------------------
#endif
#include "config-f90.h"
#ifndef USE_ASSUMED_SIZE
module
real_generic_simple_block6_kernel
private
public
hexa_hh_trafo_real_generic_simple_6hv_double
#ifdef WANT_SINGLE_PRECISION_REAL
public
hexa_hh_trafo_real_generic_simple_6hv_single
#endif
contains
#endif
#define REALCASE 1
#define DOUBLE_PRECISION 1
#include "../../general/precision_macros.h"
#include "simple_block6_template.F90"
#undef REALCASE
#undef DOUBLE_PRECISION
#ifdef WANT_SINGLE_PRECISION_REAL
#define REALCASE 1
#define SINGLE_PRECISION 1
#include "../../general/precision_macros.h"
#include "simple_block6_template.F90"
#undef REALCASE
#undef SINGLE_PRECISION
#endif
#ifndef USE_ASSUMED_SIZE
end
module
real_generic_simple_block6_kernel
#endif
! --------------------------------------------------------------------------------------------------
src/elpa2/kernels/simple_block6_template.F90
0 → 100644
View file @
3172d0c1
#if 0
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
!
! --------------------------------------------------------------------------------------------------
!
! This file contains the compute intensive kernels for the Householder transformations.
!
! This is the small and simple version (no hand unrolling of loops etc.) but for some
! compilers this performs better than a sophisticated version with transformed and unrolled loops.
!
! It should be compiled with the highest possible optimization level.
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
!
! Author: A. Marek, MPCDF
! --------------------------------------------------------------------------------------------------
#endif
subroutine
hexa_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
generic_simple_6hv_
&
&
PRECISION
&
&
(
q
,
hh
,
nb
,
nq
,
ldq
,
ldh
)
use
precision
use
elpa_abstract_impl
implicit
none
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
,
nq
,
ldq
,
ldh
#if REALCASE==1
#ifdef USE_ASSUMED_SIZE
real
(
kind
=
C_DATATYPE_KIND
),
intent
(
inout
)
::
q
(
ldq
,
*
)
real
(
kind
=
C_DATATYPE_KIND
),
intent
(
in
)
::
hh
(
ldh
,
*
)
#else
real
(
kind
=
C_DATATYPE_KIND
),
intent
(
inout
)
::
q
(
1
:
ldq
,
1
:
nb
+5
)
real
(
kind
=
C_DATATYPE_KIND
),
intent
(
in
)
::
hh
(
1
:
ldh
,
1
:
6
)
#endif
real
(
kind
=
C_DATATYPE_KIND
)
::
scalarproduct
(
15
)
real
(
kind
=
C_DATATYPE_KIND
)
::
vs_1_2
,
vs_1_3
,
vs_2_3
,
vs_1_4
,
vs_2_4
,
vs_3_4
real
(
kind
=
C_DATATYPE_KIND
)
::
vs_1_5
,
vs_1_6
,
vs_2_5
,
vs_2_6
,
vs_3_5
real
(
kind
=
C_DATATYPE_KIND
)
::
vs_3_6
,
vs_4_5
,
vs_4_6
,
vs_5_6
real
(
kind
=
C_DATATYPE_KIND
)
::
a_1_1
(
nq
),
a_2_1
(
nq
),
a_3_1
(
nq
),
a_4_1
(
nq
),
a_5_1
(
nq
),
a_6_1
(
nq
)
real
(
kind
=
C_DATATYPE_KIND
)
::
h_6_5
,
h_6_4
,
h_6_3
,
h_6_2
,
h_6_1
real
(
kind
=
C_DATATYPE_KIND
)
::
h_5_4
,
h_5_3
,
h_5_2
,
h_5_1
real
(
kind
=
C_DATATYPE_KIND
)
::
h_4_3
,
h_4_2
,
h_4_1
real
(
kind
=
C_DATATYPE_KIND
)
::
h_2_1
,
h_3_2
,
h_3_1
real
(
kind
=
C_DATATYPE_KIND
)
::
h1
,
h2
,
h3
,
h4
,
h5
,
h6
real
(
kind
=
C_DATATYPE_KIND
)
::
w
(
nq
),
z
(
nq
),
x
(
nq
),
y
(
nq
),
t
(
nq
),
v
(
nq
)
real
(
kind
=
C_DATATYPE_KIND
)
::
tau1
,
tau2
,
tau3
,
tau4
,
tau5
,
tau6
#endif /* REALCASE==1 */
integer
(
kind
=
ik
)
::
i
,
j
! Calculate dot product of the two Householder vectors
scalarproduct
(
1
)
=
hh
(
2
,
2
)
scalarproduct
(
2
)
=
hh
(
3
,
3
)
scalarproduct
(
3
)
=
hh
(
2
,
3
)
scalarproduct
(
4
)
=
hh
(
4
,
4
)
scalarproduct
(
5
)
=
hh
(
3
,
4
)
scalarproduct
(
6
)
=
hh
(
2
,
4
)
scalarproduct
(
7
)
=
hh
(
5
,
5
)
scalarproduct
(
8
)
=
hh
(
4
,
5
)
scalarproduct
(
9
)
=
hh
(
3
,
5
)
scalarproduct
(
10
)
=
hh
(
2
,
5
)
scalarproduct
(
11
)
=
hh
(
6
,
6
)
scalarproduct
(
12
)
=
hh
(
5
,
6
)
scalarproduct
(
13
)
=
hh
(
4
,
6
)
scalarproduct
(
14
)
=
hh
(
3
,
6
)
scalarproduct
(
15
)
=
hh
(
2
,
6
)
scalarproduct
(
1
)
=
scalarproduct
(
1
)
+
hh
(
2
,
1
)
*
hh
(
3
,
2
)
scalarproduct
(
3
)
=
scalarproduct
(
3
)
+
hh
(
2
,
2
)
*
hh
(
3
,
3
)
scalarproduct
(
6
)
=
scalarproduct
(
6
)
+
hh
(
2
,
3
)
*
hh
(
3
,
4
)
scalarproduct
(
10
)
=
scalarproduct
(
10
)
+
hh
(
2
,
4
)
*
hh
(
3
,
5
)
scalarproduct
(
15
)
=
scalarproduct
(
15
)
+
hh
(
2
,
5
)
*
hh
(
3
,
6
)
scalarproduct
(
1
)
=
scalarproduct
(
1
)
+
hh
(
3
,
1
)
*
hh
(
4
,
2
)
scalarproduct
(
3
)
=
scalarproduct
(
3
)
+
hh
(
3
,
2
)
*
hh
(
4
,
3
)
scalarproduct
(
6
)
=
scalarproduct
(
6
)
+
hh
(
3
,
3
)
*
hh
(
4
,
4
)
scalarproduct
(
10
)
=
scalarproduct
(
10
)
+
hh
(
3
,
4
)
*
hh
(
4
,
5
)
scalarproduct
(
15
)
=
scalarproduct
(
15
)
+
hh
(
3
,
5
)
*
hh
(
4
,
6
)
scalarproduct
(
2
)
=
scalarproduct
(
2
)
+
hh
(
2
,
1
)
*
hh
(
4
,
3
)
scalarproduct
(
5
)
=
scalarproduct
(
5
)
+
hh
(
2
,
2
)
*
hh
(
4
,
4
)
scalarproduct
(
9
)
=
scalarproduct
(
9
)
+
hh
(
2
,
3
)
*
hh
(
4
,
5
)
scalarproduct
(
14
)
=
scalarproduct
(
14
)
+
hh
(
2
,
4
)
*
hh
(
4
,
6
)
scalarproduct
(
1
)
=
scalarproduct
(
1
)
+
hh
(
4
,
1
)
*
hh
(
5
,
2
)
scalarproduct
(
3
)
=
scalarproduct
(
3
)
+
hh
(
4
,
2
)
*
hh
(
5
,
3
)
scalarproduct
(
6
)
=
scalarproduct
(
6
)
+
hh
(
4
,
3
)
*
hh
(
5
,
4
)
scalarproduct
(
10
)
=
scalarproduct
(
10
)
+
hh
(
4
,
4
)
*
hh
(
5
,
5
)
scalarproduct
(
15
)
=
scalarproduct
(
15
)
+
hh
(
4
,
5
)
*
hh
(
5
,
6
)
scalarproduct
(
2
)
=
scalarproduct
(
2
)
+
hh
(
3
,
1
)
*
hh
(
5
,
3
)
scalarproduct
(
5
)
=
scalarproduct
(
5
)
+
hh
(
3
,
2
)
*
hh
(
5
,
4
)
scalarproduct
(
9
)
=
scalarproduct
(
9
)
+
hh
(
3
,
3
)
*
hh
(
5
,
5
)
scalarproduct
(
14
)
=
scalarproduct
(
14
)
+
hh
(
3
,
4
)
*
hh
(
5
,
6
)
scalarproduct
(
4
)
=
scalarproduct
(
4
)
+
hh
(
2
,
1
)
*
hh
(
5
,
4
)
scalarproduct
(
8
)
=
scalarproduct
(
8
)
+
hh
(
2
,
2
)
*
hh
(
5
,
5
)
scalarproduct
(
13
)
=
scalarproduct
(
13
)
+
hh
(
2
,
3
)
*
hh
(
5
,
6
)
scalarproduct
(
1
)
=
scalarproduct
(
1
)
+
hh
(
5
,
1
)
*
hh
(
6
,
2
)
scalarproduct
(
3
)
=
scalarproduct
(
3
)
+
hh
(
5
,
2
)
*
hh
(
6
,
3
)
scalarproduct
(
6
)
=
scalarproduct
(
6
)
+
hh
(
5
,
3
)
*
hh
(
6
,
4
)
scalarproduct
(
10
)
=
scalarproduct
(
10
)
+
hh
(
5
,
4
)
*
hh
(
6
,
5
)
scalarproduct
(
15
)
=
scalarproduct
(
15
)
+
hh
(
5
,
5
)
*
hh
(
6
,
6
)
scalarproduct
(
2
)
=
scalarproduct
(
2
)
+
hh
(
4
,
1
)
*
hh
(
6
,
3
)
scalarproduct
(
5
)
=
scalarproduct
(
5
)
+
hh
(
4
,
2
)
*
hh
(
6
,
4
)
scalarproduct
(
9
)
=
scalarproduct
(
9
)
+
hh
(
4
,
3
)
*
hh
(
6
,
5
)
scalarproduct
(
14
)
=
scalarproduct
(
14
)
+
hh
(
4
,
4
)
*
hh
(
6
,
6
)
scalarproduct
(
4
)
=
scalarproduct
(
4
)
+
hh
(
3
,
1
)
*
hh
(
6
,
4
)
scalarproduct
(
8
)
=
scalarproduct
(
8
)
+
hh
(
3
,
2
)
*
hh
(
6
,
5
)
scalarproduct
(
13
)
=
scalarproduct
(
13
)
+
hh
(
3
,
3
)
*
hh
(
6
,
6
)
scalarproduct
(
7
)
=
scalarproduct
(
7
)
+
hh
(
2
,
1
)
*
hh
(
6
,
5
)
scalarproduct
(
12
)
=
scalarproduct
(
12
)
+
hh
(
2
,
2
)
*
hh
(
6
,
6
)
!DIR$ IVDEP
do
i
=
7
,
nb
scalarproduct
(
1
)
=
scalarproduct
(
1
)
+
hh
(
i
-1
,
1
)
*
hh
(
i
,
2
)
scalarproduct
(
3
)
=
scalarproduct
(
3
)
+
hh
(
i
-1
,
2
)
*
hh
(
i
,
3
)
scalarproduct
(
6
)
=
scalarproduct
(
6
)
+
hh
(
i
-1
,
3
)
*
hh
(
i
,
4
)
scalarproduct
(
10
)
=
scalarproduct
(
10
)
+
hh
(
i
-1
,
4
)
*
hh
(
i
,
5
)
scalarproduct
(
15
)
=
scalarproduct
(
15
)
+
hh
(
i
-1
,
5
)
*
hh
(
i
,
6
)
scalarproduct
(
2
)
=
scalarproduct
(
2
)
+
hh
(
i
-2
,
1
)
*
hh
(
i
,
3
)
scalarproduct
(
5
)
=
scalarproduct
(
5
)
+
hh
(
i
-2
,
2
)
*
hh
(
i
,
4
)
scalarproduct
(
9
)
=
scalarproduct
(
9
)
+
hh
(
i
-2
,
3
)
*
hh
(
i
,
5
)
scalarproduct
(
14
)
=
scalarproduct
(
14
)
+
hh
(
i
-2
,
4
)
*
hh
(
i
,
6
)
scalarproduct
(
4
)
=
scalarproduct
(
4
)
+
hh
(
i
-3
,
1
)
*
hh
(