Commit 23334f01 authored by Andreas Marek's avatar Andreas Marek
Browse files

Fix problem with legacy build

parent f867d600
......@@ -170,6 +170,7 @@
#endif
real(kind=c_double) :: ttt ! MPI_WTIME always needs double
#if REALCASE == 1
if (kernel .eq. ELPA_2STAGE_REAL_GPU) then
! ncols - indicates the number of HH reflectors to apply; at least 1 must be available
......@@ -304,7 +305,6 @@
#endif /* REALCASE */
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!FORTRAN CODE / X86 INRINISIC CODE / BG ASSEMBLER USING 2 HOUSEHOLDER VECTORS
#if REALCASE == 1
! generic kernel real case
......@@ -324,14 +324,14 @@
&MATH_DATATYPE&
&_generic_&
&PRECISION&
& (obj, a(1,j+off+a_off-1,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
& (a(1,j+off+a_off-1,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo_&
&MATH_DATATYPE&
&_generic_&
&PRECISION&
& (obj, a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw-1, istripe,my_thread), w(1:nbw,1:6), &
& (a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw-1, istripe,my_thread), w(1:nbw,1:6), &
nbw, nl, stripe_width, nbw)
#endif
......@@ -342,14 +342,14 @@
&MATH_DATATYPE&
&_generic_&
&PRECISION&
& (obj, a(1,j+off+a_off-1,istripe),w, nbw, nl, stripe_width, nbw)
& (a(1,j+off+a_off-1,istripe),w, nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo_&
&MATH_DATATYPE&
&_generic_&
&PRECISION&
& (obj, a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw-1,istripe),w(1:nbw,1:6), nbw, nl, stripe_width, nbw)
& (a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw-1,istripe),w(1:nbw,1:6), nbw, nl, stripe_width, nbw)
#endif
#endif /* WITH_OPENMP */
......@@ -379,13 +379,13 @@
&MATH_DATATYPE&
&_generic_&
&PRECISION&
& (obj, a(1,j+off+a_off,istripe,my_thread), bcast_buffer(1,j+off),nbw,nl,stripe_width)
& (a(1,j+off+a_off,istripe,my_thread), bcast_buffer(1,j+off),nbw,nl,stripe_width)
#else
call single_hh_trafo_&
&MATH_DATATYPE&
&_generic_&
&PRECISION&
& (obj, a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe,my_thread), bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width)
& (a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe,my_thread), bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width)
#endif
#else /* WITH_OPENMP */
......@@ -395,13 +395,13 @@
&MATH_DATATYPE&
&_generic_&
&PRECISION&
& (obj, a(1,j+off+a_off,istripe), bcast_buffer(1,j+off),nbw,nl,stripe_width)
& (a(1,j+off+a_off,istripe), bcast_buffer(1,j+off),nbw,nl,stripe_width)
#else
call single_hh_trafo_&
&MATH_DATATYPE&
&_generic_&
&PRECISION&
& (obj, a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width)
& (a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width)
#endif
#endif /* WITH_OPENMP */
......@@ -414,6 +414,8 @@
#endif /* COMPLEXCASE */
#if REALCASE == 1
! generic simple real kernel
#if defined(WITH_REAL_GENERIC_SIMPLE_KERNEL)
#ifndef WITH_FIXED_REAL_KERNEL
......@@ -429,13 +431,13 @@
&MATH_DATATYPE&
&_generic_simple_&
&PRECISION&
& (obj, a(1,j+off+a_off-1,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
& (a(1,j+off+a_off-1,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple_&
&PRECISION&
& (obj, a(1:stripe_width,j+off+a_off-1:j+off+a_off-1+nbw,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
& (a(1:stripe_width,j+off+a_off-1:j+off+a_off-1+nbw,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
#endif
......@@ -446,13 +448,13 @@
&MATH_DATATYPE&
&_generic_simple_&
&PRECISION&
& (obj, a(1,j+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw)
& (a(1,j+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple_&
&PRECISION&
& (obj, a(1:stripe_width,j+off+a_off-1:j+off+a_off-1+nbw,istripe), w, nbw, nl, stripe_width, nbw)
& (a(1:stripe_width,j+off+a_off-1:j+off+a_off-1+nbw,istripe), w, nbw, nl, stripe_width, nbw)
#endif
......@@ -481,13 +483,13 @@
&MATH_DATATYPE&
&_generic_simple_&
&PRECISION&
& (obj, a(1,j+off+a_off,istripe,my_thread), bcast_buffer(1,j+off),nbw,nl,stripe_width)
& (a(1,j+off+a_off,istripe,my_thread), bcast_buffer(1,j+off),nbw,nl,stripe_width)
#else
call single_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple_&
&PRECISION&
& (obj, a(1:stripe_width, j+off+a_off:j+off+a_off+nbw-1,istripe,my_thread), bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width)
& (a(1:stripe_width, j+off+a_off:j+off+a_off+nbw-1,istripe,my_thread), bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width)
#endif
#else /* WITH_OPENMP */
......@@ -497,13 +499,13 @@
&MATH_DATATYPE&
&_generic_simple_&
&PRECISION&
& (obj, a(1,j+off+a_off,istripe), bcast_buffer(1,j+off),nbw,nl,stripe_width)
& (a(1,j+off+a_off,istripe), bcast_buffer(1,j+off),nbw,nl,stripe_width)
#else
call single_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple_&
&PRECISION&
& (obj, a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width)
& (a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width)
#endif
#endif /* WITH_OPENMP */
......@@ -512,6 +514,7 @@
endif ! (kernel .eq. ELPA_2STAGE_COMPLEX_GENERIC_SIMPLE)
#endif /* not WITH_FIXED_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_GENERIC_SIMPLE_KERNEL */
#endif /* COMPLEXCASE */
#if REALCASE == 1
......@@ -519,6 +522,7 @@
#if defined(WITH_REAL_SSE_ASSEMBLY_KERNEL)
#ifndef WITH_FIXED_REAL_KERNEL
if (kernel .eq. ELPA_2STAGE_REAL_SSE_ASSEMBLY) then
#endif /* not WITH_FIXED_REAL_KERNEL */
do j = ncols, 2, -2
w(:,1) = bcast_buffer(1:nbw,j+off)
......@@ -547,6 +551,7 @@
#endif /* REALCASE */
#if COMPLEXCASE == 1
! sse assembly kernel complex case
#if defined(WITH_COMPLEX_SSE_ASSEMBLY_KERNEL)
#ifndef WITH_FIXED_COMPLEX_KERNEL
......@@ -581,6 +586,7 @@
#endif
#if COMPLEXCASE == 1
! sse block1 complex kernel
#if defined(WITH_COMPLEX_SSE_BLOCK1_KERNEL)
#ifndef WITH_FIXED_COMPLEX_KERNEL
......@@ -618,6 +624,7 @@
#endif /* REALCASE */
#if COMPLEXCASE == 1
! avx block1 complex kernel
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL)
#ifndef WITH_FIXED_COMPLEX_KERNEL
......@@ -656,6 +663,7 @@
#endif /* REALCASE */
#if COMPLEXCASE == 1
! avx512 block1 complex kernel
#if defined(WITH_COMPLEX_AVX512_BLOCK1_KERNEL)
#ifndef WITH_FIXED_COMPLEX_KERNEL
......@@ -693,6 +701,7 @@
#ifndef WITH_FIXED_REAL_KERNEL
if (kernel .eq. ELPA_2STAGE_REAL_SSE_BLOCK2) then
#endif /* not WITH_FIXED_REAL_KERNEL */
#if (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_SSE_BLOCK6_KERNEL) && !defined(WITH_REAL_SSE_BLOCK4_KERNEL))
......@@ -776,6 +785,7 @@
if ((kernel .eq. ELPA_2STAGE_REAL_AVX_BLOCK2) .or. &
(kernel .eq. ELPA_2STAGE_REAL_AVX2_BLOCK2)) then
#endif /* not WITH_FIXED_REAL_KERNEL */
#if (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_AVX_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX_BLOCK4_KERNEL) && !defined(WITH_REAL_AVX2_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX2_BLOCK4_KERNEL))
......@@ -807,6 +817,7 @@
#endif /* REALCASE */
#if COMPLEXCASE == 1
! implementation of avx block 2 complex case
#if defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL)
#ifndef WITH_FIXED_COMPLEX_KERNEL
......@@ -860,6 +871,7 @@
#ifndef WITH_FIXED_REAL_KERNEL
if ((kernel .eq. ELPA_2STAGE_REAL_AVX512_BLOCK2)) then
#endif /* not WITH_FIXED_REAL_KERNEL */
#if (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_AVX512_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX512_BLOCK4_KERNEL))
......@@ -891,6 +903,7 @@
#endif /* REALCASE */
#if COMPLEXCASE == 1
! implementation of avx512 block 2 complex case
#if defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL)
#ifndef WITH_FIXED_COMPLEX_KERNEL
......@@ -941,6 +954,7 @@
#if defined(WITH_REAL_BGP_KERNEL)
#ifndef WITH_FIXED_REAL_KERNEL
if (kernel .eq. ELPA_2STAGE_REAL_BGP) then
#endif /* not WITH_FIXED_REAL_KERNEL */
do j = ncols, 2, -2
w(:,1) = bcast_buffer(1:nbw,j+off)
......@@ -963,6 +977,7 @@
#if defined(WITH_REAL_BGQ_KERNEL)
#ifndef WITH_FIXED_REAL_KERNEL
if (kernel .eq. ELPA_2STAGE_REAL_BGQ) then
#endif /* not WITH_FIXED_REAL_KERNEL */
do j = ncols, 2, -2
w(:,1) = bcast_buffer(1:nbw,j+off)
......@@ -989,20 +1004,19 @@
#if REALCASE == 1
#ifdef WITH_OPENMP
if (j==1) call single_hh_trafo_&
&MATH_DATATYPE&
&_cpu_openmp_&
&PRECISION&
& (obj, a(1:stripe_width, 1+off+a_off:1+off+a_off+nbw-1,istripe,my_thread), &
& (a(1:stripe_width, 1+off+a_off:1+off+a_off+nbw-1,istripe,my_thread), &
bcast_buffer(1:nbw,off+1), nbw, nl,stripe_width)
#else
if (j==1) call single_hh_trafo_&
&MATH_DATATYPE&
&_cpu_&
&PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+1), nbw, nl,&
& (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+1), nbw, nl,&
stripe_width)
#endif
......@@ -1020,6 +1034,7 @@
#if defined(WITH_REAL_SSE_BLOCK4_KERNEL)
#ifndef WITH_FIXED_REAL_KERNEL
if (kernel .eq. ELPA_2STAGE_REAL_SSE_BLOCK4) then
#endif /* not WITH_FIXED_REAL_KERNEL */
#if (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_SSE_BLOCK6_KERNEL))
......@@ -1065,14 +1080,14 @@
&MATH_DATATYPE&
&_cpu_openmp_&
&PRECISION&
& (obj,a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), &
& (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), &
bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#else
if (jj==1) call single_hh_trafo_&
&MATH_DATATYPE&
&_cpu_&
&PRECISION&
& (obj,a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
& (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#endif
#endif /* (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_SSE_BLOCK6_KERNEL)) */
......@@ -1094,6 +1109,7 @@
#ifndef WITH_FIXED_REAL_KERNEL
if ((kernel .eq. ELPA_2STAGE_REAL_AVX_BLOCK4) .or. &
(kernel .eq. ELPA_2STAGE_REAL_AVX2_BLOCK4)) then
#endif /* not WITH_FIXED_REAL_KERNEL */
#if (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_AVX_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX2_BLOCK6_KERNEL))
......@@ -1139,14 +1155,14 @@
&MATH_DATATYPE&
&_cpu_openmp_&
&PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), &
& (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), &
bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#else
if (jj==1) call single_hh_trafo_&
&MATH_DATATYPE&
&_cpu_&
&PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
& (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#endif
#endif /* (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_AVX_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX2_BLOCK6_KERNEL)) */
......@@ -1213,14 +1229,14 @@
&MATH_DATATYPE&
&_cpu_openmp_&
&PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), &
& (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), &
bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#else
if (jj==1) call single_hh_trafo_&
&MATH_DATATYPE&
&_cpu_&
&PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), &
& (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), &
bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#endif
......@@ -1242,6 +1258,7 @@
#if defined(WITH_REAL_SSE_BLOCK6_KERNEL)
#ifndef WITH_FIXED_REAL_KERNEL
if (kernel .eq. ELPA_2STAGE_REAL_SSE_BLOCK6) then
#endif /* not WITH_FIXED_REAL_KERNEL */
! X86 INTRINSIC CODE, USING 6 HOUSEHOLDER VECTORS
do j = ncols, 6, -6
......@@ -1307,14 +1324,14 @@
&MATH_DATATYPE&
&_cpu_openmp_&
&PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), &
& (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), &
bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#else
if (jjj==1) call single_hh_trafo_&
&MATH_DATATYPE&
&_cpu_&
&PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
& (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#endif
#ifndef WITH_FIXED_REAL_KERNEL
endif
......@@ -1334,6 +1351,7 @@
#ifndef WITH_FIXED_REAL_KERNEL
if ((kernel .eq. ELPA_2STAGE_REAL_AVX_BLOCK6) .or. &
(kernel .eq. ELPA_2STAGE_REAL_AVX2_BLOCK6)) then
#endif /* not WITH_FIXED_REAL_KERNEL */
! X86 INTRINSIC CODE, USING 6 HOUSEHOLDER VECTORS
do j = ncols, 6, -6
......@@ -1398,14 +1416,14 @@
&MATH_DATATYPE&
&_cpu_openmp_&
&PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), &
& (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), &
bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#else
if (jjj==1) call single_hh_trafo_&
&MATH_DATATYPE&
&_cpu_&
&PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), &
& (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), &
bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#endif
#ifndef WITH_FIXED_REAL_KERNEL
......@@ -1426,6 +1444,7 @@
if ((kernel .eq. ELPA_2STAGE_REAL_AVX512_BLOCK6)) then
#endif /* not WITH_FIXED_REAL_KERNEL */
! X86 INTRINSIC CODE, USING 6 HOUSEHOLDER VECTORS
do j = ncols, 6, -6
w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1)
......@@ -1488,14 +1507,14 @@
&MATH_DATATYPE&
&_cpu_openmp_&
&PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), &
& (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), &
bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#else
if (jjj==1) call single_hh_trafo_&
&MATH_DATATYPE&
&_cpu_&
&PRECISION&
& (obj, a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
& (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#endif
#ifndef WITH_FIXED_REAL_KERNEL
endif
......
......@@ -59,15 +59,15 @@
! the Intel compiler creates a temp array copy of array q!
! this should be prevented, if possible without using assumed size arrays
#ifdef DOUBLE_PRECISION_COMPLEX
subroutine single_hh_trafo_complex_generic_double(obj, q, hh, nb, nq, ldq)
subroutine single_hh_trafo_complex_generic_double(q, hh, nb, nq, ldq)
#else
subroutine single_hh_trafo_complex_generic_single(obj, q, hh, nb, nq, ldq)
subroutine single_hh_trafo_complex_generic_single(q, hh, nb, nq, ldq)
#endif
use precision
use elpa_abstract_impl
implicit none
class(elpa_abstract_impl_t), intent(inout) :: obj
! class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik), intent(in) :: nb, nq, ldq
#ifdef USE_ASSUMED_SIZE
complex(kind=COMPLEX_DATATYPE), intent(inout) :: q(ldq,*)
......@@ -79,15 +79,15 @@
integer(kind=ik) :: i
#ifdef DOUBLE_PRECISION_COMPLEX
call obj%timer%start("kernel generic: single_hh_trafo_complex_generic_double")
#else
call obj%timer%start("kernel generic: single_hh_trafo_complex_generic_single")
#endif
!#ifdef DOUBLE_PRECISION_COMPLEX
!
! call obj%timer%start("kernel generic: single_hh_trafo_complex_generic_double")
!
!#else
!
! call obj%timer%start("kernel generic: single_hh_trafo_complex_generic_single")
!
!#endif
! Safety only:
if(mod(ldq,4) /= 0) STOP 'double_hh_trafo: ldq not divisible by 4!'
......@@ -156,15 +156,15 @@
#endif
endif
#ifdef DOUBLE_PRECISION_COMPLEX
call obj%timer%stop("kernel generic: single_hh_trafo_complex_generic_double")
#else
call obj%timer%stop("kernel generic: single_hh_trafo_complex_generic_single")
#endif
!#ifdef DOUBLE_PRECISION_COMPLEX
!
! call obj%timer%stop("kernel generic: single_hh_trafo_complex_generic_double")
!
!#else
!
! call obj%timer%stop("kernel generic: single_hh_trafo_complex_generic_single")
!
!#endif
#ifdef DOUBLE_PRECISION_COMPLEX
end subroutine single_hh_trafo_complex_generic_double
......
......@@ -62,14 +62,14 @@
&MATH_DATATYPE&
&_generic_&
&PRECISION&
& (obj, q, hh, nb, nq, ldq, ldh)
& (q, hh, nb, nq, ldq, ldh)
use precision
use iso_c_binding
use elpa_abstract_impl
implicit none
class(elpa_abstract_impl_t), intent(inout) :: obj
!class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik), intent(in) :: nb, nq, ldq, ldh
#ifdef USE_ASSUMED_SIZE
real(kind=C_DATATYPE_KIND), intent(inout) :: q(ldq,*)
......@@ -86,11 +86,11 @@
! Safety only:
call obj%timer%start("kernel generic: double_hh_trafo_&
&MATH_DATATYPE&
&_generic" // &
&PRECISION_SUFFIX &
)
! call obj%timer%start("kernel generic: double_hh_trafo_&
! &MATH_DATATYPE&
! &_generic" // &
! &PRECISION_SUFFIX &
! )
if(mod(ldq,4) /= 0) STOP 'double_hh_trafo: ldq not divisible by 4!'
......@@ -151,11 +151,11 @@
endif
call obj%timer%stop("kernel generic: double_hh_trafo_&
&MATH_DATATYPE&
&_generic" // &
&PRECISION_SUFFIX &
)
! call obj%timer%stop("kernel generic: double_hh_trafo_&
! &MATH_DATATYPE&
! &_generic" // &
! &PRECISION_SUFFIX &
! )
end subroutine
......
......@@ -66,12 +66,12 @@
&MATH_DATATYPE&
&_generic_simple_&
&PRECISION&
& (obj, q, hh, nb, nq, ldq)
& (q, hh, nb, nq, ldq)
use precision
use elpa_abstract_impl
implicit none
class(elpa_abstract_impl_t), intent(inout) :: obj
!class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik), intent(in) :: nb, nq, ldq
#ifdef USE_ASSUMED_SIZE
complex(kind=C_DATATYPE_KIND), intent(inout) :: q(ldq,*)
......@@ -83,13 +83,13 @@
integer(kind=ik) :: i
complex(kind=C_DATATYPE_KIND) :: h1, tau1, x(nq)
call obj%timer%start("kernel_&
&MATH_DATATYPE&
&_generic_simple: single_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple" // &
&PRECISION_SUFFIX &
)
!call obj%timer%start("kernel_&
!&MATH_DATATYPE&
!&_generic_simple: single_hh_trafo_&
!&MATH_DATATYPE&
!&_generic_simple" // &
!&PRECISION_SUFFIX &
!)
! Just one Householder transformation
......@@ -109,13 +109,13 @@
enddo
call obj%timer%stop("kernel_&
&MATH_DATATYPE&
&_generic_simple: single_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple" // &
&PRECISION_SUFFIX &
)
!call obj%timer%stop("kernel_&
!&MATH_DATATYPE&
!&_generic_simple: single_hh_trafo_&
!&MATH_DATATYPE&
!&_generic_simple" // &
!&PRECISION_SUFFIX &
!)
end subroutine
......@@ -128,7 +128,7 @@
&MATH_DATATYPE&
&_generic_simple_&
&PRECISION&
& (obj, q, hh, nb, nq, ldq, ldh)
& (q, hh, nb, nq, ldq, ldh)
#endif /* REALCASE == 1 */
......@@ -138,7 +138,7 @@
&MATH_DATATYPE&
&_generic_simple_&
&PRECISION&
& (obj, q, hh, nb, nq, ldq, ldh)
& (q, hh, nb, nq, ldq, ldh)
#endif /* COMPLEXCASE==1 */
......@@ -146,7 +146,7 @@
use elpa_abstract_impl
implicit none