Commit 1da1bd50 authored by Andreas Marek's avatar Andreas Marek
Browse files

Remove assumed size arrays from generic complex kernel

This change might be performance critical and has to be timed
carefully. Thus it is possible to switch back to the old
implementation. The new one, however, can actually be debbuged
parent 0382af47
...@@ -69,8 +69,13 @@ contains ...@@ -69,8 +69,13 @@ contains
implicit none implicit none
integer(kind=ik), intent(in) :: nb, nq, ldq integer(kind=ik), intent(in) :: nb, nq, ldq
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck), intent(inout) :: q(ldq,*) complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(*) complex(kind=ck), intent(in) :: hh(*)
#else
complex(kind=ck), intent(inout) :: q(1:ldq,1:nb)
complex(kind=ck), intent(in) :: hh(1:nb)
#endif
integer(kind=ik) :: i integer(kind=ik) :: i
#ifdef HAVE_DETAILED_TIMINGS #ifdef HAVE_DETAILED_TIMINGS
...@@ -86,15 +91,27 @@ contains ...@@ -86,15 +91,27 @@ contains
! Always a multiple of 4 Q-rows is transformed, even if nq is smaller ! Always a multiple of 4 Q-rows is transformed, even if nq is smaller
do i=1,nq-8,12 do i=1,nq-8,12
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
call hh_trafo_complex_kernel_12(q(i,1),hh, nb, ldq) call hh_trafo_complex_kernel_12(q(i,1),hh, nb, ldq)
#else
call hh_trafo_complex_kernel_12(q(i:ldq,1:nb),hh(1:nb), nb, ldq)
#endif
enddo enddo
! i > nq-8 now, i.e. at most 8 rows remain ! i > nq-8 now, i.e. at most 8 rows remain
if(nq-i+1 > 4) then if(nq-i+1 > 4) then
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
call hh_trafo_complex_kernel_8(q(i,1),hh, nb, ldq) call hh_trafo_complex_kernel_8(q(i,1),hh, nb, ldq)
#else
call hh_trafo_complex_kernel_8(q(i:ldq,1:nb),hh(1:nb), nb, ldq)
#endif
else if(nq-i+1 > 0) then else if(nq-i+1 > 0) then
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
call hh_trafo_complex_kernel_4(q(i,1),hh, nb, ldq) call hh_trafo_complex_kernel_4(q(i,1),hh, nb, ldq)
#else
call hh_trafo_complex_kernel_4(q(i:ldq,1:nb),hh(1:nb), nb, ldq)
#endif
endif endif
#ifdef HAVE_DETAILED_TIMINGS #ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel generic: single_hh_trafo_complex_generic") call timer%stop("kernel generic: single_hh_trafo_complex_generic")
...@@ -111,8 +128,13 @@ contains ...@@ -111,8 +128,13 @@ contains
implicit none implicit none
integer(kind=ik), intent(in) :: nb, nq, ldq, ldh integer(kind=ik), intent(in) :: nb, nq, ldq, ldh
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck), intent(inout) :: q(ldq,*) complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(ldh,*) complex(kind=ck), intent(in) :: hh(ldh,*)
#else
complex(kind=ck), intent(inout) :: q(1:ldq,1:nb+1)
complex(kind=ck), intent(in) :: hh(1:ldh,1:2)
#endif
complex(kind=ck) :: s complex(kind=ck) :: s
integer(kind=ik) :: i integer(kind=ik) :: i
...@@ -136,19 +158,35 @@ contains ...@@ -136,19 +158,35 @@ contains
! Always a multiple of 4 Q-rows is transformed, even if nq is smaller ! Always a multiple of 4 Q-rows is transformed, even if nq is smaller
do i=1,nq,4 do i=1,nq,4
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
call hh_trafo_complex_kernel_4_2hv(q(i,1),hh, nb, ldq, ldh, s) call hh_trafo_complex_kernel_4_2hv(q(i,1),hh, nb, ldq, ldh, s)
#else
call hh_trafo_complex_kernel_4_2hv(q(i:ldq,1:nb+1),hh(1:ldh,1:2), nb, ldq, ldh, s)
#endif
enddo enddo
!do i=1,nq-8,12 !do i=1,nq-8,12
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
! call hh_trafo_complex_kernel_12_2hv(q(i,1),hh, nb, ldq, ldh, s) ! call hh_trafo_complex_kernel_12_2hv(q(i,1),hh, nb, ldq, ldh, s)
#else
! call hh_trafo_complex_kernel_12_2hv(q(i:ldq,1:nb+1),hh(1:ldh,1:2), nb, ldq, ldh, s)
#endif
!enddo !enddo
! i > nq-8 now, i.e. at most 8 rows remain ! i > nq-8 now, i.e. at most 8 rows remain
!if(nq-i+1 > 4) then !if(nq-i+1 > 4) then
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
! call hh_trafo_complex_kernel_8_2hv(q(i,1),hh, nb, ldq, ldh, s) ! call hh_trafo_complex_kernel_8_2hv(q(i,1),hh, nb, ldq, ldh, s)
#else
! call hh_trafo_complex_kernel_8_2hv(q(i:ldq,1:nb+1),hh(1:ldh,1:2), nb, ldq, ldh, s)
#endif
!else if(nq-i+1 > 0) then !else if(nq-i+1 > 0) then
! call hh_trafo_complex_kernel_4_2hv(q(i,1),hh, nb, ldq, ldh, s) #ifdef DESPERATELY_WANT_ASSUMED_SIZE
! call hh_trafo_complex_kernel_4_2hv(q(i:ldq,1:nb+1),hh(1:ldh,1:2), nb, ldq, ldh, s)
#else
#endif
!endif !endif
#ifdef HAVE_DETAILED_TIMINGS #ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel generic: double_hh_trafo_complex_generic") call timer%stop("kernel generic: double_hh_trafo_complex_generic")
...@@ -166,9 +204,13 @@ contains ...@@ -166,9 +204,13 @@ contains
implicit none implicit none
integer(kind=ik), intent(in) :: nb, ldq integer(kind=ik), intent(in) :: nb, ldq
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck), intent(inout) :: q(ldq,*) complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(*) complex(kind=ck), intent(in) :: hh(*)
#else
complex(kind=ck), intent(inout) :: q(:,:)
complex(kind=ck), intent(in) :: hh(1:nb)
#endif
complex(kind=ck) :: x1, x2, x3, x4, x5, x6, x7, x8, x9, xa, xb, xc complex(kind=ck) :: x1, x2, x3, x4, x5, x6, x7, x8, x9, xa, xb, xc
complex(kind=ck) :: h1, tau1 complex(kind=ck) :: h1, tau1
integer(kind=ik) :: i integer(kind=ik) :: i
...@@ -269,9 +311,13 @@ contains ...@@ -269,9 +311,13 @@ contains
implicit none implicit none
integer(kind=ik), intent(in) :: nb, ldq integer(kind=ik), intent(in) :: nb, ldq
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck), intent(inout) :: q(ldq,*) complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(*) complex(kind=ck), intent(in) :: hh(*)
#else
complex(kind=ck), intent(inout) :: q(:,:)
complex(kind=ck), intent(in) :: hh(1:nb)
#endif
complex(kind=ck) :: x1, x2, x3, x4, x5, x6, x7, x8 complex(kind=ck) :: x1, x2, x3, x4, x5, x6, x7, x8
complex(kind=ck) :: h1, tau1 complex(kind=ck) :: h1, tau1
integer(kind=ik) :: i integer(kind=ik) :: i
...@@ -351,9 +397,13 @@ contains ...@@ -351,9 +397,13 @@ contains
implicit none implicit none
integer(kind=ik), intent(in) :: nb, ldq integer(kind=ik), intent(in) :: nb, ldq
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck), intent(inout) :: q(ldq,*) complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(*) complex(kind=ck), intent(in) :: hh(*)
#else
complex(kind=ck), intent(inout) :: q(:,:)
complex(kind=ck), intent(in) :: hh(1:nb)
#endif
complex(kind=ck) :: x1, x2, x3, x4 complex(kind=ck) :: x1, x2, x3, x4
complex(kind=ck) :: h1, tau1 complex(kind=ck) :: h1, tau1
integer(kind=ik) :: i integer(kind=ik) :: i
...@@ -412,8 +462,13 @@ contains ...@@ -412,8 +462,13 @@ contains
implicit none implicit none
integer(kind=ik), intent(in) :: nb, ldq, ldh integer(kind=ik), intent(in) :: nb, ldq, ldh
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck), intent(inout) :: q(ldq,*) complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(ldh,*) complex(kind=ck), intent(in) :: hh(ldh,*)
#else
complex(kind=ck), intent(inout) :: q(:,:)
complex(kind=ck), intent(in) :: hh(1:ldh,1:2)
#endif
complex(kind=ck), intent(in) :: s complex(kind=ck), intent(in) :: s
complex(kind=ck) :: x1, x2, x3, x4, y1, y2, y3, y4 complex(kind=ck) :: x1, x2, x3, x4, y1, y2, y3, y4
...@@ -506,8 +561,13 @@ contains ...@@ -506,8 +561,13 @@ contains
implicit none implicit none
integer(kind=ik), intent(in) :: nb, ldq, ldh integer(kind=ik), intent(in) :: nb, ldq, ldh
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck), intent(inout) :: q(ldq,*) complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(ldh,*) complex(kind=ck), intent(in) :: hh(ldh,*)
#else
complex(kind=ck), intent(inout) :: q(:,:)
complex(kind=ck), intent(in) :: hh(1:ldh,1:2)
#endif
complex(kind=ck), intent(in) :: s complex(kind=ck), intent(in) :: s
complex(kind=ck) :: x1, x2, x3, x4, x5, x6 ,x7, x8, y1, y2, y3, y4, y5, y6, y7, y8 complex(kind=ck) :: x1, x2, x3, x4, x5, x6 ,x7, x8, y1, y2, y3, y4, y5, y6, y7, y8
...@@ -647,8 +707,13 @@ contains ...@@ -647,8 +707,13 @@ contains
implicit none implicit none
integer(kind=ik), intent(in) :: nb, ldq, ldh integer(kind=ik), intent(in) :: nb, ldq, ldh
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck), intent(inout) :: q(ldq,*) complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(ldh,*) complex(kind=ck), intent(in) :: hh(ldh,*)
#else
complex(kind=ck), intent(inout) :: q(:,:)
complex(kind=ck), intent(in) :: hh(1:ldh,1:2)
#endif
complex(kind=ck), intent(in) :: s complex(kind=ck), intent(in) :: s
complex(kind=ck) :: x1, x2, x3, x4, x5, x6 ,x7, x8, x9, x10, x11, x12, y1, y2, y3, y4, y5, y6, & complex(kind=ck) :: x1, x2, x3, x4, x5, x6 ,x7, x8, x9, x10, x11, x12, y1, y2, y3, y4, y5, y6, &
......
...@@ -150,12 +150,25 @@ module compute_hh_trafo_complex ...@@ -150,12 +150,25 @@ module compute_hh_trafo_complex
ttt = mpi_wtime() ttt = mpi_wtime()
do j = ncols, 1, -1 do j = ncols, 1, -1
#ifdef WITH_OPENMP #ifdef WITH_OPENMP
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
call single_hh_trafo_complex_generic(a(1,j+off+a_off,istripe,my_thread), & call single_hh_trafo_complex_generic(a(1,j+off+a_off,istripe,my_thread), &
bcast_buffer(1,j+off),nbw,nl,stripe_width) bcast_buffer(1,j+off),nbw,nl,stripe_width)
#else #else
call single_hh_trafo_complex_generic(a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe,my_thread), &
bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width)
#endif
#else /* WITH_OPENMP */
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
call single_hh_trafo_complex_generic(a(1,j+off+a_off,istripe), & call single_hh_trafo_complex_generic(a(1,j+off+a_off,istripe), &
bcast_buffer(1,j+off),nbw,nl,stripe_width) bcast_buffer(1,j+off),nbw,nl,stripe_width)
#else
call single_hh_trafo_complex_generic(a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe), &
bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width)
#endif #endif
#endif /* WITH_OPENMP */
enddo enddo
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL) #if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
endif endif
......
...@@ -131,7 +131,7 @@ module compute_hh_trafo_real ...@@ -131,7 +131,7 @@ module compute_hh_trafo_real
#else #else
call double_hh_trafo_generic(a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw-1, & call double_hh_trafo_generic(a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw-1, &
istripe,my_thread), w(1:nbw,1:6), & istripe,my_thread), w(1:nbw,1:6), &
nbw, nl, stripe_width, nbw) nbw, nl, stripe_width, nbw)
#endif #endif
#else /* WITH_OPENMP */ #else /* WITH_OPENMP */
...@@ -141,8 +141,8 @@ module compute_hh_trafo_real ...@@ -141,8 +141,8 @@ module compute_hh_trafo_real
nbw, nl, stripe_width, nbw) nbw, nl, stripe_width, nbw)
#else #else
call double_hh_trafo_generic(a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw+1,istripe),w(1:nbw,1:6), & call double_hh_trafo_generic(a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw-1,istripe),w(1:nbw,1:6), &
nbw, nl, stripe_width, nbw) nbw, nl, stripe_width, nbw)
#endif #endif
#endif /* WITH_OPENMP */ #endif /* WITH_OPENMP */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment