Commit 3b218e91 authored by Andreas Marek's avatar Andreas Marek

SIngle precision complex block1 AVX-512 kernel

parent 50d4667f
......@@ -237,9 +237,9 @@ endif
if WITH_COMPLEX_AVX512_BLOCK1_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx512_1hv_double_precision.c
#if WANT_SINGLE_PRECISION_COMPLEX
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx512_1hv_single_precision.c
#endif
if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx512_1hv_single_precision.c
endif
endif
if WITH_COMPLEX_SSE_BLOCK2_KERNEL
......
......@@ -739,27 +739,27 @@ module compute_hh_trafo_complex
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_AVX_BLOCK1_KERNEL || WITH_COMPLEX_AVX_BLOCK1_KERNEL */
!#if defined(WITH_COMPLEX_AVX512_BLOCK1_KERNEL)
!#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
! if ( (THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_AVX512_BLOCK1)) then
!#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
! ttt = mpi_wtime()
!#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL) || (defined(WITH_ONE_SPECIFIC_COMPLEX_KERNEL) && !defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL) )
! do j = ncols, 1, -1
!#ifdef WITH_OPENMP
! call single_hh_trafo_complex_avx512_1hv_single(a(1,j+off+a_off,istripe,my_thread), &
! bcast_buffer(1,j+off),nbw,nl,stripe_width)
!#else
! call single_hh_trafo_complex_avx512_1hv_single(a(1,j+off+a_off,istripe), &
! bcast_buffer(1,j+off),nbw,nl,stripe_width)
!#endif
! enddo
!#endif /* defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL) || (defined(WITH_ONE_SPECIFIC_COMPLEX_KERNEL) && !defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL) ) */
!
!#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
! endif ! ( (THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_AVX512_BLOCK1))
!#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
!#endif /* WITH_COMPLEX_AVX512_BLOCK1_KERNEL */
#if defined(WITH_COMPLEX_AVX512_BLOCK1_KERNEL)
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
if ( (THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_AVX512_BLOCK1)) then
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
ttt = mpi_wtime()
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL) || (defined(WITH_ONE_SPECIFIC_COMPLEX_KERNEL) && !defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL) )
do j = ncols, 1, -1
#ifdef WITH_OPENMP
call single_hh_trafo_complex_avx512_1hv_single(a(1,j+off+a_off,istripe,my_thread), &
bcast_buffer(1,j+off),nbw,nl,stripe_width)
#else
call single_hh_trafo_complex_avx512_1hv_single(a(1,j+off+a_off,istripe), &
bcast_buffer(1,j+off),nbw,nl,stripe_width)
#endif
enddo
#endif /* defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL) || (defined(WITH_ONE_SPECIFIC_COMPLEX_KERNEL) && !defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL) ) */
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
endif ! ( (THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_AVX512_BLOCK1))
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_AVX512_BLOCK1_KERNEL */
#ifdef WITH_OPENMP
if (my_thread==1) then
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment