Commit de20e578 authored by Andreas Marek's avatar Andreas Marek

Single precision complex block2 AVX-512 kernel

parent d6867ff5
...@@ -265,9 +265,9 @@ endif ...@@ -265,9 +265,9 @@ endif
if WITH_COMPLEX_AVX512_BLOCK2_KERNEL if WITH_COMPLEX_AVX512_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx512_2hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx512_2hv_double_precision.c
#if WANT_SINGLE_PRECISION_COMPLEX if WANT_SINGLE_PRECISION_COMPLEX
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx512_2hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx512_2hv_single_precision.c
#endif endif
endif endif
.cu.lo: .cu.lo:
......
...@@ -568,33 +568,33 @@ module compute_hh_trafo_complex ...@@ -568,33 +568,33 @@ module compute_hh_trafo_complex
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */ #endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_AVX_BLOCK2_KERNEL || WITH_COMPLEX_AVX2_BLOCK2_KERNEL */ #endif /* WITH_COMPLEX_AVX_BLOCK2_KERNEL || WITH_COMPLEX_AVX2_BLOCK2_KERNEL */
!#if defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL) #if defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL)
!#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL) #if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
! if ( (THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_AVX512_BLOCK2) ) then if ( (THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_AVX512_BLOCK2) ) then
!#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */ #endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
! ttt = mpi_wtime() ttt = mpi_wtime()
! do j = ncols, 2, -2 do j = ncols, 2, -2
! w(:,1) = bcast_buffer(1:nbw,j+off) w(:,1) = bcast_buffer(1:nbw,j+off)
! w(:,2) = bcast_buffer(1:nbw,j+off-1) w(:,2) = bcast_buffer(1:nbw,j+off-1)
!#ifdef WITH_OPENMP #ifdef WITH_OPENMP
! call double_hh_trafo_complex_avx512_2hv_single(a(1,j+off+a_off-1,istripe,my_thread), & call double_hh_trafo_complex_avx512_2hv_single(a(1,j+off+a_off-1,istripe,my_thread), &
! w, nbw, nl, stripe_width, nbw) w, nbw, nl, stripe_width, nbw)
!#else #else
! call double_hh_trafo_complex_avx512_2hv_single(a(1,j+off+a_off-1,istripe), & call double_hh_trafo_complex_avx512_2hv_single(a(1,j+off+a_off-1,istripe), &
! w, nbw, nl, stripe_width, nbw) w, nbw, nl, stripe_width, nbw)
!#endif #endif
! enddo enddo
!#ifdef WITH_OPENMP #ifdef WITH_OPENMP
! if (j==1) call single_hh_trafo_complex_avx512_1hv_single(a(1,1+off+a_off,istripe,my_thread), & if (j==1) call single_hh_trafo_complex_avx512_1hv_single(a(1,1+off+a_off,istripe,my_thread), &
! bcast_buffer(1,off+1), nbw, nl, stripe_width) bcast_buffer(1,off+1), nbw, nl, stripe_width)
!#else #else
! if (j==1) call single_hh_trafo_complex_avx512_1hv_single(a(1,1+off+a_off,istripe), & if (j==1) call single_hh_trafo_complex_avx512_1hv_single(a(1,1+off+a_off,istripe), &
! bcast_buffer(1,off+1), nbw, nl, stripe_width) bcast_buffer(1,off+1), nbw, nl, stripe_width)
!#endif #endif
!#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL) #if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
! endif ! (THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_AVX512_BLOCK2) endif ! (THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_AVX512_BLOCK2)
!#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */ #endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
!#endif /* WITH_COMPLEX_AVX512_BLOCK2_KERNEL */ #endif /* WITH_COMPLEX_AVX512_BLOCK2_KERNEL */
#if defined(WITH_COMPLEX_GENERIC_SIMPLE_KERNEL) #if defined(WITH_COMPLEX_GENERIC_SIMPLE_KERNEL)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment