Commit 82bdbe47 authored by Pavel Kus's avatar Pavel Kus

fixing compute_hh_trafo for OpenMP

parent bb71defe
...@@ -1452,24 +1452,24 @@ ...@@ -1452,24 +1452,24 @@
w(:,2) = bcast_buffer(1:nbw,j+off-1) w(:,2) = bcast_buffer(1:nbw,j+off-1)
w(:,3) = bcast_buffer(1:nbw,j+off-2) w(:,3) = bcast_buffer(1:nbw,j+off-2)
w(:,4) = bcast_buffer(1:nbw,j+off-3) w(:,4) = bcast_buffer(1:nbw,j+off-3)
!#ifdef WITH_OPENMP #ifdef WITH_OPENMP
!
!#ifdef USE_ASSUMED_SIZE #ifdef USE_ASSUMED_SIZE
! call quad_hh_trafo_& call quad_hh_trafo_&
! &MATH_DATATYPE& &MATH_DATATYPE&
! &_blas_4hv_& &_blas_4hv_&
! &PRECISION& &PRECISION&
! & (a(1,j+off+a_off-3,istripe,my_thread), w, nbw, nl, stripe_width, nbw) & (a(1,j+off+a_off-3,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
!#else #else
! call quad_hh_trafo_& call quad_hh_trafo_&
! &MATH_DATATYPE& &MATH_DATATYPE&
! &_blas_4hv_& &_blas_4hv_&
! &PRECISION& &PRECISION&
! & (a(1:stripe_width,j+off+a_off-3:j+off+a_off+nbw-1,istripe,my_thread), w(1:nbw,1:6), nbw, nl, & & (a(1:stripe_width,j+off+a_off-3:j+off+a_off+nbw-1,istripe,my_thread), w(1:nbw,1:6), nbw, nl, &
! stripe_width, nbw) stripe_width, nbw)
!#endif #endif
!
!#else #else
#ifdef USE_ASSUMED_SIZE #ifdef USE_ASSUMED_SIZE
call quad_hh_trafo_& call quad_hh_trafo_&
...@@ -1486,7 +1486,7 @@ ...@@ -1486,7 +1486,7 @@
stripe_width, nbw) stripe_width, nbw)
#endif #endif
!#endif #endif
enddo enddo
! do jj = j, 2, -2 ! do jj = j, 2, -2
...@@ -1529,17 +1529,26 @@ ...@@ -1529,17 +1529,26 @@
!!#endif !!#endif
! enddo ! enddo
!#ifdef WITH_OPENMP #ifdef WITH_OPENMP
! !TODO use this after blas kernel for 2 vectors developed
! if (jj==1) call single_hh_trafo_& !!!! if (jj==1) call single_hh_trafo_&
! &MATH_DATATYPE& !!!! &MATH_DATATYPE&
! &_cpu_openmp_& !!!! &_cpu_openmp_&
! &PRECISION& !!!! &PRECISION&
! & (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), & !!!! & (a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, istripe,my_thread), &
! bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width) !!!! bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
!
!#else
! transform more of the remaining vectors by single kernel before the double is introduced
do jj = j, 1, -1
call single_hh_trafo_&
&MATH_DATATYPE&
&_cpu_openmp_&
&PRECISION&
& (a(1:stripe_width,jj+off+a_off:jj+off+a_off+nbw-1, istripe,my_thread), &
bcast_buffer(1:nbw,off+jj), nbw, nl, stripe_width)
enddo
#else
!TODO use this after blas kernel for 2 vectors developed
!!!! if (jj==1) call single_hh_trafo_& !!!! if (jj==1) call single_hh_trafo_&
!!!! &MATH_DATATYPE& !!!! &MATH_DATATYPE&
!!!! &_cpu_& !!!! &_cpu_&
...@@ -1556,10 +1565,8 @@ ...@@ -1556,10 +1565,8 @@
& (a(1:stripe_width,jj+off+a_off:jj+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+jj), & & (a(1:stripe_width,jj+off+a_off:jj+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,off+jj), &
nbw, nl, stripe_width) nbw, nl, stripe_width)
enddo enddo
#endif
!#endif
#endif /* (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_BLAS_BLOCK6_KERNEL)) */ #endif /* (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_BLAS_BLOCK6_KERNEL)) */
#ifndef WITH_FIXED_REAL_KERNEL #ifndef WITH_FIXED_REAL_KERNEL
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment