diff --git a/src/elpa2_kernels/elpa2_kernels_complex.F90 b/src/elpa2_kernels/elpa2_kernels_complex.F90 index 85a35e50987979536985d8f0bc5d6eaa968585eb..02efee2bcf09c98238e1f7a05e1bfc6071d3948a 100644 --- a/src/elpa2_kernels/elpa2_kernels_complex.F90 +++ b/src/elpa2_kernels/elpa2_kernels_complex.F90 @@ -69,8 +69,13 @@ contains implicit none integer(kind=ik), intent(in) :: nb, nq, ldq +#ifdef DESPERATELY_WANT_ASSUMED_SIZE complex(kind=ck), intent(inout) :: q(ldq,*) complex(kind=ck), intent(in) :: hh(*) +#else + complex(kind=ck), intent(inout) :: q(1:ldq,1:nb) + complex(kind=ck), intent(in) :: hh(1:nb) +#endif integer(kind=ik) :: i #ifdef HAVE_DETAILED_TIMINGS @@ -86,15 +91,27 @@ contains ! Always a multiple of 4 Q-rows is transformed, even if nq is smaller do i=1,nq-8,12 +#ifdef DESPERATELY_WANT_ASSUMED_SIZE call hh_trafo_complex_kernel_12(q(i,1),hh, nb, ldq) +#else + call hh_trafo_complex_kernel_12(q(i:ldq,1:nb),hh(1:nb), nb, ldq) +#endif enddo ! i > nq-8 now, i.e. at most 8 rows remain if(nq-i+1 > 4) then +#ifdef DESPERATELY_WANT_ASSUMED_SIZE call hh_trafo_complex_kernel_8(q(i,1),hh, nb, ldq) +#else + call hh_trafo_complex_kernel_8(q(i:ldq,1:nb),hh(1:nb), nb, ldq) +#endif else if(nq-i+1 > 0) then +#ifdef DESPERATELY_WANT_ASSUMED_SIZE call hh_trafo_complex_kernel_4(q(i,1),hh, nb, ldq) +#else + call hh_trafo_complex_kernel_4(q(i:ldq,1:nb),hh(1:nb), nb, ldq) +#endif endif #ifdef HAVE_DETAILED_TIMINGS call timer%stop("kernel generic: single_hh_trafo_complex_generic") @@ -111,8 +128,13 @@ contains implicit none integer(kind=ik), intent(in) :: nb, nq, ldq, ldh +#ifdef DESPERATELY_WANT_ASSUMED_SIZE complex(kind=ck), intent(inout) :: q(ldq,*) complex(kind=ck), intent(in) :: hh(ldh,*) +#else + complex(kind=ck), intent(inout) :: q(1:ldq,1:nb+1) + complex(kind=ck), intent(in) :: hh(1:ldh,1:2) +#endif complex(kind=ck) :: s integer(kind=ik) :: i @@ -136,19 +158,35 @@ contains ! Always a multiple of 4 Q-rows is transformed, even if nq is smaller do i=1,nq,4 +#ifdef DESPERATELY_WANT_ASSUMED_SIZE call hh_trafo_complex_kernel_4_2hv(q(i,1),hh, nb, ldq, ldh, s) +#else + call hh_trafo_complex_kernel_4_2hv(q(i:ldq,1:nb+1),hh(1:ldh,1:2), nb, ldq, ldh, s) +#endif enddo !do i=1,nq-8,12 +#ifdef DESPERATELY_WANT_ASSUMED_SIZE ! call hh_trafo_complex_kernel_12_2hv(q(i,1),hh, nb, ldq, ldh, s) +#else + ! call hh_trafo_complex_kernel_12_2hv(q(i:ldq,1:nb+1),hh(1:ldh,1:2), nb, ldq, ldh, s) +#endif !enddo ! i > nq-8 now, i.e. at most 8 rows remain !if(nq-i+1 > 4) then +#ifdef DESPERATELY_WANT_ASSUMED_SIZE ! call hh_trafo_complex_kernel_8_2hv(q(i,1),hh, nb, ldq, ldh, s) +#else + ! call hh_trafo_complex_kernel_8_2hv(q(i:ldq,1:nb+1),hh(1:ldh,1:2), nb, ldq, ldh, s) +#endif !else if(nq-i+1 > 0) then - ! call hh_trafo_complex_kernel_4_2hv(q(i,1),hh, nb, ldq, ldh, s) +#ifdef DESPERATELY_WANT_ASSUMED_SIZE + ! call hh_trafo_complex_kernel_4_2hv(q(i:ldq,1:nb+1),hh(1:ldh,1:2), nb, ldq, ldh, s) +#else + +#endif !endif #ifdef HAVE_DETAILED_TIMINGS call timer%stop("kernel generic: double_hh_trafo_complex_generic") @@ -166,9 +204,13 @@ contains implicit none integer(kind=ik), intent(in) :: nb, ldq +#ifdef DESPERATELY_WANT_ASSUMED_SIZE complex(kind=ck), intent(inout) :: q(ldq,*) complex(kind=ck), intent(in) :: hh(*) - +#else + complex(kind=ck), intent(inout) :: q(:,:) + complex(kind=ck), intent(in) :: hh(1:nb) +#endif complex(kind=ck) :: x1, x2, x3, x4, x5, x6, x7, x8, x9, xa, xb, xc complex(kind=ck) :: h1, tau1 integer(kind=ik) :: i @@ -269,9 +311,13 @@ contains implicit none integer(kind=ik), intent(in) :: nb, ldq +#ifdef DESPERATELY_WANT_ASSUMED_SIZE complex(kind=ck), intent(inout) :: q(ldq,*) complex(kind=ck), intent(in) :: hh(*) - +#else + complex(kind=ck), intent(inout) :: q(:,:) + complex(kind=ck), intent(in) :: hh(1:nb) +#endif complex(kind=ck) :: x1, x2, x3, x4, x5, x6, x7, x8 complex(kind=ck) :: h1, tau1 integer(kind=ik) :: i @@ -351,9 +397,13 @@ contains implicit none integer(kind=ik), intent(in) :: nb, ldq +#ifdef DESPERATELY_WANT_ASSUMED_SIZE complex(kind=ck), intent(inout) :: q(ldq,*) complex(kind=ck), intent(in) :: hh(*) - +#else + complex(kind=ck), intent(inout) :: q(:,:) + complex(kind=ck), intent(in) :: hh(1:nb) +#endif complex(kind=ck) :: x1, x2, x3, x4 complex(kind=ck) :: h1, tau1 integer(kind=ik) :: i @@ -412,8 +462,13 @@ contains implicit none integer(kind=ik), intent(in) :: nb, ldq, ldh +#ifdef DESPERATELY_WANT_ASSUMED_SIZE complex(kind=ck), intent(inout) :: q(ldq,*) complex(kind=ck), intent(in) :: hh(ldh,*) +#else + complex(kind=ck), intent(inout) :: q(:,:) + complex(kind=ck), intent(in) :: hh(1:ldh,1:2) +#endif complex(kind=ck), intent(in) :: s complex(kind=ck) :: x1, x2, x3, x4, y1, y2, y3, y4 @@ -506,8 +561,13 @@ contains implicit none integer(kind=ik), intent(in) :: nb, ldq, ldh +#ifdef DESPERATELY_WANT_ASSUMED_SIZE complex(kind=ck), intent(inout) :: q(ldq,*) complex(kind=ck), intent(in) :: hh(ldh,*) +#else + complex(kind=ck), intent(inout) :: q(:,:) + complex(kind=ck), intent(in) :: hh(1:ldh,1:2) +#endif complex(kind=ck), intent(in) :: s complex(kind=ck) :: x1, x2, x3, x4, x5, x6 ,x7, x8, y1, y2, y3, y4, y5, y6, y7, y8 @@ -647,8 +707,13 @@ contains implicit none integer(kind=ik), intent(in) :: nb, ldq, ldh +#ifdef DESPERATELY_WANT_ASSUMED_SIZE complex(kind=ck), intent(inout) :: q(ldq,*) complex(kind=ck), intent(in) :: hh(ldh,*) +#else + complex(kind=ck), intent(inout) :: q(:,:) + complex(kind=ck), intent(in) :: hh(1:ldh,1:2) +#endif complex(kind=ck), intent(in) :: s complex(kind=ck) :: x1, x2, x3, x4, x5, x6 ,x7, x8, x9, x10, x11, x12, y1, y2, y3, y4, y5, y6, & diff --git a/src/mod_compute_hh_trafo_complex.F90 b/src/mod_compute_hh_trafo_complex.F90 index 9cdd50126be97f7ab9a2679a2e52022536c37a91..c7682555f53b6928643a69e0f4ad200c04a777c7 100644 --- a/src/mod_compute_hh_trafo_complex.F90 +++ b/src/mod_compute_hh_trafo_complex.F90 @@ -150,12 +150,25 @@ module compute_hh_trafo_complex ttt = mpi_wtime() do j = ncols, 1, -1 #ifdef WITH_OPENMP +#ifdef DESPERATELY_WANT_ASSUMED_SIZE + call single_hh_trafo_complex_generic(a(1,j+off+a_off,istripe,my_thread), & bcast_buffer(1,j+off),nbw,nl,stripe_width) #else + call single_hh_trafo_complex_generic(a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe,my_thread), & + bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width) +#endif + +#else /* WITH_OPENMP */ +#ifdef DESPERATELY_WANT_ASSUMED_SIZE call single_hh_trafo_complex_generic(a(1,j+off+a_off,istripe), & bcast_buffer(1,j+off),nbw,nl,stripe_width) +#else + call single_hh_trafo_complex_generic(a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe), & + bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width) #endif +#endif /* WITH_OPENMP */ + enddo #if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL) endif diff --git a/src/mod_compute_hh_trafo_real.F90 b/src/mod_compute_hh_trafo_real.F90 index cc642e26824e70de21bd4beaab87a55c3b00926e..3cf7e18cdeae736ea24bfb29cf5880129c83556a 100644 --- a/src/mod_compute_hh_trafo_real.F90 +++ b/src/mod_compute_hh_trafo_real.F90 @@ -131,7 +131,7 @@ module compute_hh_trafo_real #else call double_hh_trafo_generic(a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw-1, & istripe,my_thread), w(1:nbw,1:6), & - nbw, nl, stripe_width, nbw) + nbw, nl, stripe_width, nbw) #endif #else /* WITH_OPENMP */ @@ -141,8 +141,8 @@ module compute_hh_trafo_real nbw, nl, stripe_width, nbw) #else - call double_hh_trafo_generic(a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw+1,istripe),w(1:nbw,1:6), & - nbw, nl, stripe_width, nbw) + call double_hh_trafo_generic(a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw-1,istripe),w(1:nbw,1:6), & + nbw, nl, stripe_width, nbw) #endif #endif /* WITH_OPENMP */