Commit 0ab252df authored by Andreas Marek's avatar Andreas Marek
Browse files

Comment AVX-512 single precision calls, since not implemented yet

parent 0389d9e3
......@@ -1059,30 +1059,30 @@ module compute_hh_trafo_real
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#endif /* WITH_REAL_AVX_BLOCK2_KERNEL || WITH_REAL_AVX2_BLOCK2_KERNEL */
#if defined(WITH_REAL_AVX512_BLOCK2_KERNEL)
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
if (THIS_REAL_ELPA_KERNEL .eq. REAL_ELPA_KERNEL_AVX512_BLOCK2) then
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL) || (defined(WITH_ONE_SPECIFIC_REAL_KERNEL) && !defined(WITH_REAL_AVX512_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX512_BLOCK4_KERNEL))
do j = ncols, 2, -2
w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1)
#ifdef WITH_OPENMP
call double_hh_trafo_real_avx512_2hv_single(c_loc(a(1,j+off+a_off-1,istripe,my_thread)), &
w, nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo_real_avx512_2hv_single(c_loc(a(1,j+off+a_off-1,istripe)), &
w, nbw, nl, stripe_width, nbw)
#endif
enddo
#endif /* defined(WITH_NO_SPECIFIC_REAL_KERNEL) || (defined(WITH_ONE_SPECIFIC_REAL_KERNEL) && !defined(WITH_REAL_AVX512_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX512_BLOCK4_KERNEL) ) */
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#endif /* WITH_REAL_AVX512_BLOCK2_KERNEL */
!#if defined(WITH_REAL_AVX512_BLOCK2_KERNEL)
!#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
! if (THIS_REAL_ELPA_KERNEL .eq. REAL_ELPA_KERNEL_AVX512_BLOCK2) then
!#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
!
!#if defined(WITH_NO_SPECIFIC_REAL_KERNEL) || (defined(WITH_ONE_SPECIFIC_REAL_KERNEL) && !defined(WITH_REAL_AVX512_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX512_BLOCK4_KERNEL))
! do j = ncols, 2, -2
! w(:,1) = bcast_buffer(1:nbw,j+off)
! w(:,2) = bcast_buffer(1:nbw,j+off-1)
!#ifdef WITH_OPENMP
! call double_hh_trafo_real_avx512_2hv_single(c_loc(a(1,j+off+a_off-1,istripe,my_thread)), &
! w, nbw, nl, stripe_width, nbw)
!#else
! call double_hh_trafo_real_avx512_2hv_single(c_loc(a(1,j+off+a_off-1,istripe)), &
! w, nbw, nl, stripe_width, nbw)
!#endif
! enddo
!
!#endif /* defined(WITH_NO_SPECIFIC_REAL_KERNEL) || (defined(WITH_ONE_SPECIFIC_REAL_KERNEL) && !defined(WITH_REAL_AVX512_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX512_BLOCK4_KERNEL) ) */
!
!#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
! endif
!#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
!#endif /* WITH_REAL_AVX512_BLOCK2_KERNEL */
#if defined(WITH_REAL_BGP_KERNEL)
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
......@@ -1241,52 +1241,52 @@ module compute_hh_trafo_real
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#endif /* WITH_REAL_AVX_BLOCK4_KERNEL || WITH_REAL_AVX2_BLOCK4_KERNEL */
#if defined(WITH_REAL_AVX512_BLOCK4_KERNEL)
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
if (THIS_REAL_ELPA_KERNEL .eq. REAL_ELPA_KERNEL_AVX512_BLOCK4) then
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL) || (defined(WITH_ONE_SPECIFIC_REAL_KERNEL) && !defined(WITH_REAL_AVX512_BLOCK6_KERNEL))
! X86 INTRINSIC CODE, USING 4 HOUSEHOLDER VECTORS
do j = ncols, 4, -4
w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1)
w(:,3) = bcast_buffer(1:nbw,j+off-2)
w(:,4) = bcast_buffer(1:nbw,j+off-3)
#ifdef WITH_OPENMP
call quad_hh_trafo_real_avx512_4hv_single(c_loc(a(1,j+off+a_off-3,istripe,my_thread)), w, &
nbw, nl, stripe_width, nbw)
#else
call quad_hh_trafo_real_avx512_4hv_single(c_loc(a(1,j+off+a_off-3,istripe)), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
do jj = j, 2, -2
w(:,1) = bcast_buffer(1:nbw,jj+off)
w(:,2) = bcast_buffer(1:nbw,jj+off-1)
#ifdef WITH_OPENMP
call double_hh_trafo_real_avx512_2hv_single(c_loc(a(1,jj+off+a_off-1,istripe,my_thread)), &
w, nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo_real_avx512_2hv_single(c_loc(a(1,jj+off+a_off-1,istripe)), &
w, nbw, nl, stripe_width, nbw)
#endif
enddo
#ifdef WITH_OPENMP
if (jj==1) call single_hh_trafo_real_cpu_openmp_single(a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, &
istripe,my_thread), &
bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#else
if (jj==1) call single_hh_trafo_real_cpu_single(a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), &
bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#endif
#endif /* defined(WITH_NO_SPECIFIC_REAL_KERNEL) || (defined(WITH_ONE_SPECIFIC_REAL_KERNEL) && !defined(WITH_REAL_AVX_BLOCK6_KERNEL) ) */
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#endif /* WITH_REAL_AVX512_BLOCK4_KERNEL */
!#if defined(WITH_REAL_AVX512_BLOCK4_KERNEL)
!#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
! if (THIS_REAL_ELPA_KERNEL .eq. REAL_ELPA_KERNEL_AVX512_BLOCK4) then
!#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
!
!#if defined(WITH_NO_SPECIFIC_REAL_KERNEL) || (defined(WITH_ONE_SPECIFIC_REAL_KERNEL) && !defined(WITH_REAL_AVX512_BLOCK6_KERNEL))
! ! X86 INTRINSIC CODE, USING 4 HOUSEHOLDER VECTORS
! do j = ncols, 4, -4
! w(:,1) = bcast_buffer(1:nbw,j+off)
! w(:,2) = bcast_buffer(1:nbw,j+off-1)
! w(:,3) = bcast_buffer(1:nbw,j+off-2)
! w(:,4) = bcast_buffer(1:nbw,j+off-3)
!#ifdef WITH_OPENMP
! call quad_hh_trafo_real_avx512_4hv_single(c_loc(a(1,j+off+a_off-3,istripe,my_thread)), w, &
! nbw, nl, stripe_width, nbw)
!#else
! call quad_hh_trafo_real_avx512_4hv_single(c_loc(a(1,j+off+a_off-3,istripe)), w, &
! nbw, nl, stripe_width, nbw)
!#endif
! enddo
! do jj = j, 2, -2
! w(:,1) = bcast_buffer(1:nbw,jj+off)
! w(:,2) = bcast_buffer(1:nbw,jj+off-1)
!#ifdef WITH_OPENMP
! call double_hh_trafo_real_avx512_2hv_single(c_loc(a(1,jj+off+a_off-1,istripe,my_thread)), &
! w, nbw, nl, stripe_width, nbw)
!#else
! call double_hh_trafo_real_avx512_2hv_single(c_loc(a(1,jj+off+a_off-1,istripe)), &
! w, nbw, nl, stripe_width, nbw)
!#endif
! enddo
!#ifdef WITH_OPENMP
! if (jj==1) call single_hh_trafo_real_cpu_openmp_single(a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, &
! istripe,my_thread), &
! bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
!#else
! if (jj==1) call single_hh_trafo_real_cpu_single(a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), &
! bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
!#endif
!
!#endif /* defined(WITH_NO_SPECIFIC_REAL_KERNEL) || (defined(WITH_ONE_SPECIFIC_REAL_KERNEL) && !defined(WITH_REAL_AVX_BLOCK6_KERNEL) ) */
!
!#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
! endif
!#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
!#endif /* WITH_REAL_AVX512_BLOCK4_KERNEL */
#if defined(WITH_REAL_SSE_BLOCK6_KERNEL)
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
......@@ -1407,66 +1407,66 @@ module compute_hh_trafo_real
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#endif /* WITH_REAL_AVX_BLOCK6_KERNEL || WITH_REAL_AVX2_BLOCK6_KERNEL */
#if defined(WITH_REAL_AVX512_BLOCK6_KERNEL)
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
if ((THIS_REAL_ELPA_KERNEL .eq. REAL_ELPA_KERNEL_AVX512_BLOCK6)) then
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
! X86 INTRINSIC CODE, USING 6 HOUSEHOLDER VECTORS
do j = ncols, 6, -6
w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1)
w(:,3) = bcast_buffer(1:nbw,j+off-2)
w(:,4) = bcast_buffer(1:nbw,j+off-3)
w(:,5) = bcast_buffer(1:nbw,j+off-4)
w(:,6) = bcast_buffer(1:nbw,j+off-5)
#ifdef WITH_OPENMP
call hexa_hh_trafo_real_avx512_6hv_single(c_loc(a(1,j+off+a_off-5,istripe,my_thread)), w, &
nbw, nl, stripe_width, nbw)
#else
call hexa_hh_trafo_real_avx512_6hv_single(c_loc(a(1,j+off+a_off-5,istripe)), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
do jj = j, 4, -4
w(:,1) = bcast_buffer(1:nbw,jj+off)
w(:,2) = bcast_buffer(1:nbw,jj+off-1)
w(:,3) = bcast_buffer(1:nbw,jj+off-2)
w(:,4) = bcast_buffer(1:nbw,jj+off-3)
#ifdef WITH_OPENMP
call quad_hh_trafo_real_avx512_4hv_single(c_loc(a(1,jj+off+a_off-3,istripe,my_thread)), w, &
nbw, nl, stripe_width, nbw)
#else
call quad_hh_trafo_real_avx512_4hv_single(c_loc(a(1,jj+off+a_off-3,istripe)), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
do jjj = jj, 2, -2
w(:,1) = bcast_buffer(1:nbw,jjj+off)
w(:,2) = bcast_buffer(1:nbw,jjj+off-1)
#ifdef WITH_OPENMP
call double_hh_trafo_real_avx512_2hv_single(c_loc(a(1,jjj+off+a_off-1,istripe,my_thread)), &
w, nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo_real_avx512_2hv_single(c_loc(a(1,jjj+off+a_off-1,istripe)), &
w, nbw, nl, stripe_width, nbw)
#endif
enddo
#ifdef WITH_OPENMP
if (jjj==1) call single_hh_trafo_real_cpu_openmp_single(a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, &
istripe,my_thread), &
bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#else
if (jjj==1) call single_hh_trafo_real_cpu_single(a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), &
bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
#endif
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#endif /* WITH_REAL_AVX512_BLOCK6_KERNEL */
!#if defined(WITH_REAL_AVX512_BLOCK6_KERNEL)
!#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
!
! if ((THIS_REAL_ELPA_KERNEL .eq. REAL_ELPA_KERNEL_AVX512_BLOCK6)) then
!#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
! ! X86 INTRINSIC CODE, USING 6 HOUSEHOLDER VECTORS
! do j = ncols, 6, -6
! w(:,1) = bcast_buffer(1:nbw,j+off)
! w(:,2) = bcast_buffer(1:nbw,j+off-1)
! w(:,3) = bcast_buffer(1:nbw,j+off-2)
! w(:,4) = bcast_buffer(1:nbw,j+off-3)
! w(:,5) = bcast_buffer(1:nbw,j+off-4)
! w(:,6) = bcast_buffer(1:nbw,j+off-5)
!
!#ifdef WITH_OPENMP
! call hexa_hh_trafo_real_avx512_6hv_single(c_loc(a(1,j+off+a_off-5,istripe,my_thread)), w, &
! nbw, nl, stripe_width, nbw)
!#else
! call hexa_hh_trafo_real_avx512_6hv_single(c_loc(a(1,j+off+a_off-5,istripe)), w, &
! nbw, nl, stripe_width, nbw)
!#endif
! enddo
! do jj = j, 4, -4
! w(:,1) = bcast_buffer(1:nbw,jj+off)
! w(:,2) = bcast_buffer(1:nbw,jj+off-1)
! w(:,3) = bcast_buffer(1:nbw,jj+off-2)
! w(:,4) = bcast_buffer(1:nbw,jj+off-3)
!
!#ifdef WITH_OPENMP
! call quad_hh_trafo_real_avx512_4hv_single(c_loc(a(1,jj+off+a_off-3,istripe,my_thread)), w, &
! nbw, nl, stripe_width, nbw)
!#else
! call quad_hh_trafo_real_avx512_4hv_single(c_loc(a(1,jj+off+a_off-3,istripe)), w, &
! nbw, nl, stripe_width, nbw)
!#endif
! enddo
! do jjj = jj, 2, -2
! w(:,1) = bcast_buffer(1:nbw,jjj+off)
! w(:,2) = bcast_buffer(1:nbw,jjj+off-1)
!
!#ifdef WITH_OPENMP
! call double_hh_trafo_real_avx512_2hv_single(c_loc(a(1,jjj+off+a_off-1,istripe,my_thread)), &
! w, nbw, nl, stripe_width, nbw)
!#else
! call double_hh_trafo_real_avx512_2hv_single(c_loc(a(1,jjj+off+a_off-1,istripe)), &
! w, nbw, nl, stripe_width, nbw)
!#endif
! enddo
!#ifdef WITH_OPENMP
! if (jjj==1) call single_hh_trafo_real_cpu_openmp_single(a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1, &
! istripe,my_thread), &
! bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
!#else
! if (jjj==1) call single_hh_trafo_real_cpu_single(a(1:stripe_width,1+off+a_off:1+off+a_off+nbw-1,istripe), &
! bcast_buffer(1:nbw,off+1), nbw, nl, stripe_width)
!#endif
!#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
! endif
!#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
!#endif /* WITH_REAL_AVX512_BLOCK6_KERNEL */
endif ! GPU_KERNEL
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment