Commit 1da1bd50 authored by Andreas Marek's avatar Andreas Marek

Remove assumed size arrays from generic complex kernel

This change might be performance critical and has to be timed
carefully. Thus it is possible to switch back to the old
implementation. The new one, however, can actually be debbuged
parent 0382af47
......@@ -69,8 +69,13 @@ contains
implicit none
integer(kind=ik), intent(in) :: nb, nq, ldq
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(*)
#else
complex(kind=ck), intent(inout) :: q(1:ldq,1:nb)
complex(kind=ck), intent(in) :: hh(1:nb)
#endif
integer(kind=ik) :: i
#ifdef HAVE_DETAILED_TIMINGS
......@@ -86,15 +91,27 @@ contains
! Always a multiple of 4 Q-rows is transformed, even if nq is smaller
do i=1,nq-8,12
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
call hh_trafo_complex_kernel_12(q(i,1),hh, nb, ldq)
#else
call hh_trafo_complex_kernel_12(q(i:ldq,1:nb),hh(1:nb), nb, ldq)
#endif
enddo
! i > nq-8 now, i.e. at most 8 rows remain
if(nq-i+1 > 4) then
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
call hh_trafo_complex_kernel_8(q(i,1),hh, nb, ldq)
#else
call hh_trafo_complex_kernel_8(q(i:ldq,1:nb),hh(1:nb), nb, ldq)
#endif
else if(nq-i+1 > 0) then
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
call hh_trafo_complex_kernel_4(q(i,1),hh, nb, ldq)
#else
call hh_trafo_complex_kernel_4(q(i:ldq,1:nb),hh(1:nb), nb, ldq)
#endif
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel generic: single_hh_trafo_complex_generic")
......@@ -111,8 +128,13 @@ contains
implicit none
integer(kind=ik), intent(in) :: nb, nq, ldq, ldh
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(ldh,*)
#else
complex(kind=ck), intent(inout) :: q(1:ldq,1:nb+1)
complex(kind=ck), intent(in) :: hh(1:ldh,1:2)
#endif
complex(kind=ck) :: s
integer(kind=ik) :: i
......@@ -136,19 +158,35 @@ contains
! Always a multiple of 4 Q-rows is transformed, even if nq is smaller
do i=1,nq,4
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
call hh_trafo_complex_kernel_4_2hv(q(i,1),hh, nb, ldq, ldh, s)
#else
call hh_trafo_complex_kernel_4_2hv(q(i:ldq,1:nb+1),hh(1:ldh,1:2), nb, ldq, ldh, s)
#endif
enddo
!do i=1,nq-8,12
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
! call hh_trafo_complex_kernel_12_2hv(q(i,1),hh, nb, ldq, ldh, s)
#else
! call hh_trafo_complex_kernel_12_2hv(q(i:ldq,1:nb+1),hh(1:ldh,1:2), nb, ldq, ldh, s)
#endif
!enddo
! i > nq-8 now, i.e. at most 8 rows remain
!if(nq-i+1 > 4) then
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
! call hh_trafo_complex_kernel_8_2hv(q(i,1),hh, nb, ldq, ldh, s)
#else
! call hh_trafo_complex_kernel_8_2hv(q(i:ldq,1:nb+1),hh(1:ldh,1:2), nb, ldq, ldh, s)
#endif
!else if(nq-i+1 > 0) then
! call hh_trafo_complex_kernel_4_2hv(q(i,1),hh, nb, ldq, ldh, s)
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
! call hh_trafo_complex_kernel_4_2hv(q(i:ldq,1:nb+1),hh(1:ldh,1:2), nb, ldq, ldh, s)
#else
#endif
!endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel generic: double_hh_trafo_complex_generic")
......@@ -166,9 +204,13 @@ contains
implicit none
integer(kind=ik), intent(in) :: nb, ldq
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(*)
#else
complex(kind=ck), intent(inout) :: q(:,:)
complex(kind=ck), intent(in) :: hh(1:nb)
#endif
complex(kind=ck) :: x1, x2, x3, x4, x5, x6, x7, x8, x9, xa, xb, xc
complex(kind=ck) :: h1, tau1
integer(kind=ik) :: i
......@@ -269,9 +311,13 @@ contains
implicit none
integer(kind=ik), intent(in) :: nb, ldq
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(*)
#else
complex(kind=ck), intent(inout) :: q(:,:)
complex(kind=ck), intent(in) :: hh(1:nb)
#endif
complex(kind=ck) :: x1, x2, x3, x4, x5, x6, x7, x8
complex(kind=ck) :: h1, tau1
integer(kind=ik) :: i
......@@ -351,9 +397,13 @@ contains
implicit none
integer(kind=ik), intent(in) :: nb, ldq
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(*)
#else
complex(kind=ck), intent(inout) :: q(:,:)
complex(kind=ck), intent(in) :: hh(1:nb)
#endif
complex(kind=ck) :: x1, x2, x3, x4
complex(kind=ck) :: h1, tau1
integer(kind=ik) :: i
......@@ -412,8 +462,13 @@ contains
implicit none
integer(kind=ik), intent(in) :: nb, ldq, ldh
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(ldh,*)
#else
complex(kind=ck), intent(inout) :: q(:,:)
complex(kind=ck), intent(in) :: hh(1:ldh,1:2)
#endif
complex(kind=ck), intent(in) :: s
complex(kind=ck) :: x1, x2, x3, x4, y1, y2, y3, y4
......@@ -506,8 +561,13 @@ contains
implicit none
integer(kind=ik), intent(in) :: nb, ldq, ldh
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(ldh,*)
#else
complex(kind=ck), intent(inout) :: q(:,:)
complex(kind=ck), intent(in) :: hh(1:ldh,1:2)
#endif
complex(kind=ck), intent(in) :: s
complex(kind=ck) :: x1, x2, x3, x4, x5, x6 ,x7, x8, y1, y2, y3, y4, y5, y6, y7, y8
......@@ -647,8 +707,13 @@ contains
implicit none
integer(kind=ik), intent(in) :: nb, ldq, ldh
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(ldh,*)
#else
complex(kind=ck), intent(inout) :: q(:,:)
complex(kind=ck), intent(in) :: hh(1:ldh,1:2)
#endif
complex(kind=ck), intent(in) :: s
complex(kind=ck) :: x1, x2, x3, x4, x5, x6 ,x7, x8, x9, x10, x11, x12, y1, y2, y3, y4, y5, y6, &
......
......@@ -150,12 +150,25 @@ module compute_hh_trafo_complex
ttt = mpi_wtime()
do j = ncols, 1, -1
#ifdef WITH_OPENMP
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
call single_hh_trafo_complex_generic(a(1,j+off+a_off,istripe,my_thread), &
bcast_buffer(1,j+off),nbw,nl,stripe_width)
#else
call single_hh_trafo_complex_generic(a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe,my_thread), &
bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width)
#endif
#else /* WITH_OPENMP */
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
call single_hh_trafo_complex_generic(a(1,j+off+a_off,istripe), &
bcast_buffer(1,j+off),nbw,nl,stripe_width)
#else
call single_hh_trafo_complex_generic(a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe), &
bcast_buffer(1:nbw,j+off),nbw,nl,stripe_width)
#endif
#endif /* WITH_OPENMP */
enddo
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
endif
......
......@@ -131,7 +131,7 @@ module compute_hh_trafo_real
#else
call double_hh_trafo_generic(a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw-1, &
istripe,my_thread), w(1:nbw,1:6), &
nbw, nl, stripe_width, nbw)
nbw, nl, stripe_width, nbw)
#endif
#else /* WITH_OPENMP */
......@@ -141,8 +141,8 @@ module compute_hh_trafo_real
nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo_generic(a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw+1,istripe),w(1:nbw,1:6), &
nbw, nl, stripe_width, nbw)
call double_hh_trafo_generic(a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw-1,istripe),w(1:nbw,1:6), &
nbw, nl, stripe_width, nbw)
#endif
#endif /* WITH_OPENMP */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment