Commit 23334f01 authored by Andreas Marek's avatar Andreas Marek
Browse files

Fix problem with legacy build

parent f867d600
...@@ -170,6 +170,7 @@ ...@@ -170,6 +170,7 @@
#endif #endif
real(kind=c_double) :: ttt ! MPI_WTIME always needs double real(kind=c_double) :: ttt ! MPI_WTIME always needs double
#if REALCASE == 1 #if REALCASE == 1
if (kernel .eq. ELPA_2STAGE_REAL_GPU) then if (kernel .eq. ELPA_2STAGE_REAL_GPU) then
! ncols - indicates the number of HH reflectors to apply; at least 1 must be available ! ncols - indicates the number of HH reflectors to apply; at least 1 must be available
...@@ -304,7 +305,6 @@ ...@@ -304,7 +305,6 @@
#endif /* REALCASE */ #endif /* REALCASE */
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!FORTRAN CODE / X86 INRINISIC CODE / BG ASSEMBLER USING 2 HOUSEHOLDER VECTORS !FORTRAN CODE / X86 INRINISIC CODE / BG ASSEMBLER USING 2 HOUSEHOLDER VECTORS
#if REALCASE == 1 #if REALCASE == 1
! generic kernel real case ! generic kernel real case
...@@ -324,14 +324,14 @@ ...@@ -324,14 +324,14 @@
&MATH_DATATYPE& &MATH_DATATYPE&
&_generic_& &_generic_&
&PRECISION& &PRECISION&
& (obj, a(1,j+off+a_off-1,istripe,my_thread), w, nbw, nl, stripe_width, nbw) & (a(1,j+off+a_off-1,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
#else #else
call double_hh_trafo_& call double_hh_trafo_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_generic_& &_generic_&
&PRECISION& &PRECISION&
& (obj, a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw-1, istripe,my_thread), w(1:nbw,1:6), & & (a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw-1, istripe,my_thread), w(1:nbw,1:6), &
nbw, nl, stripe_width, nbw) nbw, nl, stripe_width, nbw)
#endif #endif
...@@ -342,14 +342,14 @@ ...@@ -342,14 +342,14 @@
&MATH_DATATYPE& &MATH_DATATYPE&
&_generic_& &_generic_&
&PRECISION& &PRECISION&
& (obj, a(1,j+off+a_off-1,istripe),w, nbw, nl, stripe_width, nbw) & (a(1,j+off+a_off-1,istripe),w, nbw, nl, stripe_width, nbw)
#else #else
call double_hh_trafo_& call double_hh_trafo_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_generic_& &_generic_&
&PRECISION& &PRECISION&
& (obj, a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw-1,istripe),w(1:nbw,1:6), nbw, nl, stripe_width, nbw) & (a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw-1,istripe),w(1:nbw,1:6), nbw, nl, stripe_width, nbw)
#endif #endif
#endif /* WITH_OPENMP */ #endif /* WITH_OPENMP */
...@@ -379,13 +379,13 @@ ...@@ -379,13 +379,13 @@
&MATH_DATATYPE& &MATH_DATATYPE&
&_generic_& &_generic_&
&PRECISION& &PRECISION&
& (obj, a(1,j+off+a_off,istripe,my_thread), bcast_buffer(1,j+off),nbw,nl,stripe_width) & (a(1,j+off+a_off,istripe,my_thread), bcast_buffer(1,j+off),nbw,nl,stripe_width)
#else #else
call single_hh_trafo_& call single_hh_trafo_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_generic_& &_generic_&
&PRECISION& &PRECISION&
& (obj, a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe,my_thread), bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width) & (a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe,my_thread), bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width)
#endif #endif
#else /* WITH_OPENMP */ #else /* WITH_OPENMP */
...@@ -395,13 +395,13 @@ ...@@ -395,13 +395,13 @@
&MATH_DATATYPE& &MATH_DATATYPE&
&_generic_& &_generic_&
&PRECISION& &PRECISION&
& (obj, a(1,j+off+a_off,istripe), bcast_buffer(1,j+off),nbw,nl,stripe_width) & (a(1,j+off+a_off,istripe), bcast_buffer(1,j+off),nbw,nl,stripe_width)
#else #else
call single_hh_trafo_& call single_hh_trafo_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_generic_& &_generic_&
&PRECISION& &PRECISION&
& (obj, a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width) & (a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width)
#endif #endif
#endif /* WITH_OPENMP */ #endif /* WITH_OPENMP */
...@@ -414,6 +414,8 @@ ...@@ -414,6 +414,8 @@
#endif /* COMPLEXCASE */ #endif /* COMPLEXCASE */
#if REALCASE == 1 #if REALCASE == 1
! generic simple real kernel ! generic simple real kernel
#if defined(WITH_REAL_GENERIC_SIMPLE_KERNEL) #if defined(WITH_REAL_GENERIC_SIMPLE_KERNEL)
#ifndef WITH_FIXED_REAL_KERNEL #ifndef WITH_FIXED_REAL_KERNEL
...@@ -429,13 +431,13 @@ ...@@ -429,13 +431,13 @@
&MATH_DATATYPE& &MATH_DATATYPE&
&_generic_simple_& &_generic_simple_&
&PRECISION& &PRECISION&
& (obj, a(1,j+off+a_off-1,istripe,my_thread), w, nbw, nl, stripe_width, nbw) & (a(1,j+off+a_off-1,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
#else #else
call double_hh_trafo_& call double_hh_trafo_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_generic_simple_& &_generic_simple_&
&PRECISION& &PRECISION&
& (obj, a(1:stripe_width,j+off+a_off-1:j+off+a_off-1+nbw,istripe,my_thread), w, nbw, nl, stripe_width, nbw) & (a(1:stripe_width,j+off+a_off-1:j+off+a_off-1+nbw,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
#endif #endif
...@@ -446,13 +448,13 @@ ...@@ -446,13 +448,13 @@
&MATH_DATATYPE& &MATH_DATATYPE&
&_generic_simple_& &_generic_simple_&
&PRECISION& &PRECISION&
& (obj, a(1,j+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw) & (a(1,j+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw)
#else #else
call double_hh_trafo_& call double_hh_trafo_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_generic_simple_& &_generic_simple_&
&PRECISION& &PRECISION&
& (obj, a(1:stripe_width,j+off+a_off-1:j+off+a_off-1+nbw,istripe), w, nbw, nl, stripe_width, nbw) & (a(1:stripe_width,j+off+a_off-1:j+off+a_off-1+nbw,istripe), w, nbw, nl, stripe_width, nbw)
#endif #endif
...@@ -481,13 +483,13 @@ ...@@ -481,13 +483,13 @@
&MATH_DATATYPE& &MATH_DATATYPE&
&_generic_simple_& &_generic_simple_&
&PRECISION& &PRECISION&
& (obj, a(1,j+off+a_off,istripe,my_thread), bcast_buffer(1,j+off),nbw,nl,stripe_width) & (a(1,j+off+a_off,istripe,my_thread), bcast_buffer(1,j+off),nbw,nl,stripe_width)
#else #else
call single_hh_trafo_& call single_hh_trafo_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_generic_simple_& &_generic_simple_&
&PRECISION& &PRECISION&
& (obj, a(1:stripe_width, j+off+a_off:j+off+a_off+nbw-1,istripe,my_thread), bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width) & (a(1:stripe_width, j+off+a_off:j+off+a_off+nbw-1,istripe,my_thread), bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width)
#endif #endif
#else /* WITH_OPENMP */ #else /* WITH_OPENMP */
...@@ -497,13 +499,13 @@ ...@@ -497,13 +499,13 @@
&MATH_DATATYPE& &MATH_DATATYPE&
&_generic_simple_& &_generic_simple_&
&PRECISION& &PRECISION&
& (obj, a(1,j+off+a_off,istripe), bcast_buffer(1,j+off),nbw,nl,stripe_width) & (a(1,j+off+a_off,istripe), bcast_buffer(1,j+off),nbw,nl,stripe_width)
#else #else
call single_hh_trafo_& call single_hh_trafo_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_generic_simple_& &_generic_simple_&
&PRECISION& &PRECISION&
& (obj, a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width) & (a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width)
#endif #endif
#endif /* WITH_OPENMP */ #endif /* WITH_OPENMP */
...@@ -512,6 +514,7 @@ ...@@ -512,6 +514,7 @@
endif ! (kernel .eq. ELPA_2STAGE_COMPLEX_GENERIC_SIMPLE) endif ! (kernel .eq. ELPA_2STAGE_COMPLEX_GENERIC_SIMPLE)
#endif /* not WITH_FIXED_COMPLEX_KERNEL */ #endif /* not WITH_FIXED_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_GENERIC_SIMPLE_KERNEL */ #endif /* WITH_COMPLEX_GENERIC_SIMPLE_KERNEL */
#endif /* COMPLEXCASE */ #endif /* COMPLEXCASE */
#if REALCASE == 1 #if REALCASE == 1
...@@ -519,6 +522,7 @@ ...@@ -519,6 +522,7 @@
#if defined(WITH_REAL_SSE_ASSEMBLY_KERNEL) #if defined(WITH_REAL_SSE_ASSEMBLY_KERNEL)
#ifndef WITH_FIXED_REAL_KERNEL #ifndef WITH_FIXED_REAL_KERNEL
if (kernel .eq. ELPA_2STAGE_REAL_SSE_ASSEMBLY) then if (kernel .eq. ELPA_2STAGE_REAL_SSE_ASSEMBLY) then
#endif /* not WITH_FIXED_REAL_KERNEL */ #endif /* not WITH_FIXED_REAL_KERNEL */
do j = ncols, 2, -2 do j = ncols, 2, -2
w(:,1) = bcast_buffer(1:nbw,j+off) w(:,1) = bcast_buffer(1:nbw,j+off)
...@@ -547,6 +551,7 @@ ...@@ -547,6 +551,7 @@
#endif /* REALCASE */ #endif /* REALCASE */
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
! sse assembly kernel complex case ! sse assembly kernel complex case
#if defined(WITH_COMPLEX_SSE_ASSEMBLY_KERNEL) #if defined(WITH_COMPLEX_SSE_ASSEMBLY_KERNEL)
#ifndef WITH_FIXED_COMPLEX_KERNEL #ifndef WITH_FIXED_COMPLEX_KERNEL
...@@ -581,6 +586,7 @@ ...@@ -581,6 +586,7 @@
#endif #endif
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
! sse block1 complex kernel ! sse block1 complex kernel
#if defined(WITH_COMPLEX_SSE_BLOCK1_KERNEL) #if defined(WITH_COMPLEX_SSE_BLOCK1_KERNEL)
#ifndef WITH_FIXED_COMPLEX_KERNEL #ifndef WITH_FIXED_COMPLEX_KERNEL
...@@ -618,6 +624,7 @@ ...@@ -618,6 +624,7 @@
#endif /* REALCASE */ #endif /* REALCASE */
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
! avx block1 complex kernel ! avx block1 complex kernel
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL) #if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL)
#ifndef WITH_FIXED_COMPLEX_KERNEL #ifndef WITH_FIXED_COMPLEX_KERNEL
...@@ -656,6 +663,7 @@ ...@@ -656,6 +663,7 @@
#endif /* REALCASE */ #endif /* REALCASE */
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
! avx512 block1 complex kernel ! avx512 block1 complex kernel
#if defined(WITH_COMPLEX_AVX512_BLOCK1_KERNEL) #if defined(WITH_COMPLEX_AVX512_BLOCK1_KERNEL)
#ifndef WITH_FIXED_COMPLEX_KERNEL #ifndef WITH_FIXED_COMPLEX_KERNEL
...@@ -693,6 +701,7 @@ ...@@ -693,6 +701,7 @@
#ifndef WITH_FIXED_REAL_KERNEL #ifndef WITH_FIXED_REAL_KERNEL
if (kernel .eq. ELPA_2STAGE_REAL_SSE_BLOCK2) then if (kernel .eq. ELPA_2STAGE_REAL_SSE_BLOCK2) then
#endif /* not WITH_FIXED_REAL_KERNEL */ #endif /* not WITH_FIXED_REAL_KERNEL */
#if (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_SSE_BLOCK6_KERNEL) && !defined(WITH_REAL_SSE_BLOCK4_KERNEL)) #if (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_SSE_BLOCK6_KERNEL) && !defined(WITH_REAL_SSE_BLOCK4_KERNEL))
...@@ -776,6 +785,7 @@ ...@@ -776,6 +785,7 @@
if ((kernel .eq. ELPA_2STAGE_REAL_AVX_BLOCK2) .or. & if ((kernel .eq. ELPA_2STAGE_REAL_AVX_BLOCK2) .or. &
(kernel .eq. ELPA_2STAGE_REAL_AVX2_BLOCK2)) then (kernel .eq. ELPA_2STAGE_REAL_AVX2_BLOCK2)) then
#endif /* not WITH_FIXED_REAL_KERNEL */ #endif /* not WITH_FIXED_REAL_KERNEL */
#if (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_AVX_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX_BLOCK4_KERNEL) && !defined(WITH_REAL_AVX2_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX2_BLOCK4_KERNEL)) #if (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_AVX_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX_BLOCK4_KERNEL) && !defined(WITH_REAL_AVX2_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX2_BLOCK4_KERNEL))
...@@ -807,6 +817,7 @@ ...@@ -807,6 +817,7 @@
#endif /* REALCASE */ #endif /* REALCASE */
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
! implementation of avx block 2 complex case ! implementation of avx block 2 complex case
#if defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL) #if defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL)
#ifndef WITH_FIXED_COMPLEX_KERNEL #ifndef WITH_FIXED_COMPLEX_KERNEL
...@@ -860,6 +871,7 @@ ...@@ -860,6 +871,7 @@
#ifndef WITH_FIXED_REAL_KERNEL #ifndef WITH_FIXED_REAL_KERNEL
if ((kernel .eq. ELPA_2STAGE_REAL_AVX512_BLOCK2)) then if ((kernel .eq. ELPA_2STAGE_REAL_AVX512_BLOCK2)) then
#endif /* not WITH_FIXED_REAL_KERNEL */ #endif /* not WITH_FIXED_REAL_KERNEL */
#if (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_AVX512_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX512_BLOCK4_KERNEL)) #if (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_AVX512_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX512_BLOCK4_KERNEL))
...@@ -891,6 +903,7 @@ ...@@ -891,6 +903,7 @@
#endif /* REALCASE */ #endif /* REALCASE */
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
! implementation of avx512 block 2 complex case ! implementation of avx512 block 2 complex case
#if defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL) #if defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL)
#ifndef WITH_FIXED_COMPLEX_KERNEL #ifndef WITH_FIXED_COMPLEX_KERNEL
...@@ -941,6 +954,7 @@ ...@@ -941,6 +954,7 @@
#if defined(WITH_REAL_BGP_KERNEL) #if defined(WITH_REAL_BGP_KERNEL)
#ifndef WITH_FIXED_REAL_KERNEL #ifndef WITH_FIXED_REAL_KERNEL
if (kernel .eq. ELPA_2STAGE_REAL_BGP) then if (kernel .eq. ELPA_2STAGE_REAL_BGP) then
#endif /* not WITH_FIXED_REAL_KERNEL */ #endif /* not WITH_FIXED_REAL_KERNEL */
do j = ncols, 2, -2 do j = ncols, 2, -2
w(:,1) = bcast_buffer(1:nbw,j+off) w(:,1) = bcast_buffer(1:nbw,j+off)
...@@ -963,6 +977,7 @@ ...@@ -963,6 +977,7 @@
#if defined(WITH_REAL_BGQ_KERNEL) #if defined(WITH_REAL_BGQ_KERNEL)
#ifndef WITH_FIXED_REAL_KERNEL #ifndef WITH_FIXED_REAL_KERNEL
if (kernel .eq. ELPA_2STAGE_REAL_BGQ) then if (kernel .eq. ELPA_2STAGE_REAL_BGQ) then
#endif /* not WITH_FIXED_REAL_KERNEL */ #endif /* not WITH_FIXED_REAL_KERNEL */
do j = ncols, 2, -2 do j = ncols, 2, -2
w(:,1) = bcast_buffer(1:nbw,j+off) w(:,1) = bcast_buffer(1:nbw,j+off)
...@@ -989,20 +1004,19 @@ ...@@ -989,20 +1004,19 @@
#if REALCASE == 1 #if REALCASE == 1
#ifdef WITH_OPENMP #ifdef WITH_OPENMP
if (j==1) call single_hh_trafo_& if (j==1) call single_hh_trafo_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_cpu_openmp_& &_cpu_openmp_&
&PRECISION& &PRECISION&
& (obj, a(1:stripe_width, 1+off+a_off:1+off+a_off+nbw-1,istripe,my_thread), & & (a(1:stripe_width, 1+off+a_off:1+off+a_off+nbw-1,istripe,my_thread), &
bcast_buffer(1:nbw,off+1), nbw, nl,stripe_width) bcast_buffer(1:nbw,off+1), nbw, nl,stripe_width)
#else #else
if (j==1) call single_hh_trafo_& if (j==1) call single_hh_trafo_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_cpu_& &_cpu_&
&PRECISION& &PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+1), nbw, nl,& & (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+1), nbw, nl,&
stripe_width) stripe_width)
#endif #endif
...@@ -1020,6 +1034,7 @@ ...@@ -1020,6 +1034,7 @@
#if defined(WITH_REAL_SSE_BLOCK4_KERNEL) #if defined(WITH_REAL_SSE_BLOCK4_KERNEL)
#ifndef WITH_FIXED_REAL_KERNEL #ifndef WITH_FIXED_REAL_KERNEL
if (kernel .eq. ELPA_2STAGE_REAL_SSE_BLOCK4) then if (kernel .eq. ELPA_2STAGE_REAL_SSE_BLOCK4) then
#endif /* not WITH_FIXED_REAL_KERNEL */ #endif /* not WITH_FIXED_REAL_KERNEL */
#if (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_SSE_BLOCK6_KERNEL)) #if (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_SSE_BLOCK6_KERNEL))
...@@ -1065,14 +1080,14 @@ ...@@ -1065,14 +1080,14 @@
&MATH_DATATYPE& &MATH_DATATYPE&
&_cpu_openmp_& &_cpu_openmp_&
&PRECISION& &PRECISION&
& (obj,a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), & & (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), &
bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width) bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#else #else
if (jj==1) call single_hh_trafo_& if (jj==1) call single_hh_trafo_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_cpu_& &_cpu_&
&PRECISION& &PRECISION&
& (obj,a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width) & (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#endif #endif
#endif /* (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_SSE_BLOCK6_KERNEL)) */ #endif /* (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_SSE_BLOCK6_KERNEL)) */
...@@ -1094,6 +1109,7 @@ ...@@ -1094,6 +1109,7 @@
#ifndef WITH_FIXED_REAL_KERNEL #ifndef WITH_FIXED_REAL_KERNEL
if ((kernel .eq. ELPA_2STAGE_REAL_AVX_BLOCK4) .or. & if ((kernel .eq. ELPA_2STAGE_REAL_AVX_BLOCK4) .or. &
(kernel .eq. ELPA_2STAGE_REAL_AVX2_BLOCK4)) then (kernel .eq. ELPA_2STAGE_REAL_AVX2_BLOCK4)) then
#endif /* not WITH_FIXED_REAL_KERNEL */ #endif /* not WITH_FIXED_REAL_KERNEL */
#if (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_AVX_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX2_BLOCK6_KERNEL)) #if (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_AVX_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX2_BLOCK6_KERNEL))
...@@ -1139,14 +1155,14 @@ ...@@ -1139,14 +1155,14 @@
&MATH_DATATYPE& &MATH_DATATYPE&
&_cpu_openmp_& &_cpu_openmp_&
&PRECISION& &PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), & & (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), &
bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width) bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#else #else
if (jj==1) call single_hh_trafo_& if (jj==1) call single_hh_trafo_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_cpu_& &_cpu_&
&PRECISION& &PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width) & (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#endif #endif
#endif /* (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_AVX_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX2_BLOCK6_KERNEL)) */ #endif /* (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_AVX_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX2_BLOCK6_KERNEL)) */
...@@ -1213,14 +1229,14 @@ ...@@ -1213,14 +1229,14 @@
&MATH_DATATYPE& &MATH_DATATYPE&
&_cpu_openmp_& &_cpu_openmp_&
&PRECISION& &PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), & & (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), &
bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width) bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#else #else
if (jj==1) call single_hh_trafo_& if (jj==1) call single_hh_trafo_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_cpu_& &_cpu_&
&PRECISION& &PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), & & (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), &
bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width) bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#endif #endif
...@@ -1242,6 +1258,7 @@ ...@@ -1242,6 +1258,7 @@
#if defined(WITH_REAL_SSE_BLOCK6_KERNEL) #if defined(WITH_REAL_SSE_BLOCK6_KERNEL)
#ifndef WITH_FIXED_REAL_KERNEL #ifndef WITH_FIXED_REAL_KERNEL
if (kernel .eq. ELPA_2STAGE_REAL_SSE_BLOCK6) then if (kernel .eq. ELPA_2STAGE_REAL_SSE_BLOCK6) then
#endif /* not WITH_FIXED_REAL_KERNEL */ #endif /* not WITH_FIXED_REAL_KERNEL */
! X86 INTRINSIC CODE, USING 6 HOUSEHOLDER VECTORS ! X86 INTRINSIC CODE, USING 6 HOUSEHOLDER VECTORS
do j = ncols, 6, -6 do j = ncols, 6, -6
...@@ -1307,14 +1324,14 @@ ...@@ -1307,14 +1324,14 @@
&MATH_DATATYPE& &MATH_DATATYPE&
&_cpu_openmp_& &_cpu_openmp_&
&PRECISION& &PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), & & (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), &
bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width) bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#else #else
if (jjj==1) call single_hh_trafo_& if (jjj==1) call single_hh_trafo_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_cpu_& &_cpu_&
&PRECISION& &PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width) & (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#endif #endif
#ifndef WITH_FIXED_REAL_KERNEL #ifndef WITH_FIXED_REAL_KERNEL
endif endif
...@@ -1334,6 +1351,7 @@ ...@@ -1334,6 +1351,7 @@
#ifndef WITH_FIXED_REAL_KERNEL #ifndef WITH_FIXED_REAL_KERNEL
if ((kernel .eq. ELPA_2STAGE_REAL_AVX_BLOCK6) .or. & if ((kernel .eq. ELPA_2STAGE_REAL_AVX_BLOCK6) .or. &
(kernel .eq. ELPA_2STAGE_REAL_AVX2_BLOCK6)) then (kernel .eq. ELPA_2STAGE_REAL_AVX2_BLOCK6)) then
#endif /* not WITH_FIXED_REAL_KERNEL */ #endif /* not WITH_FIXED_REAL_KERNEL */
! X86 INTRINSIC CODE, USING 6 HOUSEHOLDER VECTORS ! X86 INTRINSIC CODE, USING 6 HOUSEHOLDER VECTORS
do j = ncols, 6, -6 do j = ncols, 6, -6
...@@ -1398,14 +1416,14 @@ ...@@ -1398,14 +1416,14 @@
&MATH_DATATYPE& &MATH_DATATYPE&
&_cpu_openmp_& &_cpu_openmp_&
&PRECISION& &PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), & & (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), &
bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width) bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#else