Commit 7d80e062 authored by Andreas Marek's avatar Andreas Marek
Browse files

Error in usage of AVX2 kernels

The AVX2 kernels might not have been called, depending on how
ELPA has been build
parent a5a54212
......@@ -206,7 +206,7 @@ module compute_hh_trafo_complex
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_AVX_BLOCK2_KERNEL */
#endif /* WITH_COMPLEX_AVX_BLOCK2_KERNEL || WITH_COMPLEX_AVX2_BLOCK2_KERNEL */
#if defined(WITH_COMPLEX_GENERIC_SIMPLE_KERNEL)
......@@ -349,7 +349,7 @@ module compute_hh_trafo_complex
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_AVX_BLOCK1_KERNEL */
#endif /* WITH_COMPLEX_AVX_BLOCK1_KERNEL || WITH_COMPLEX_AVX2_BLOCK1_KERNEL */
#ifdef WITH_OPENMP
if (my_thread==1) then
......@@ -484,11 +484,12 @@ module compute_hh_trafo_complex
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_AVX_BLOCK2_KERNEL */
#endif /* WITH_COMPLEX_SSE_BLOCK2_KERNEL */
#if defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL)
#if defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL)
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
if (THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_AVX_BLOCK2) then
if ((THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_AVX_BLOCK2) .or. &
(THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_AVX2_BLOCK2)) then
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
ttt = mpi_wtime()
do j = ncols, 2, -2
......@@ -512,7 +513,7 @@ module compute_hh_trafo_complex
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_AVX_BLOCK2_KERNEL */
#endif /* WITH_COMPLEX_AVX_BLOCK2_KERNEL || WITH_COMPLEX_AVX2_BLOCK2_KERNEL */
#if defined(WITH_COMPLEX_GENERIC_SIMPLE_KERNEL)
......@@ -634,9 +635,10 @@ module compute_hh_trafo_complex
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_SSE_BLOCK1_KERNEL */
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL)
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL)
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
if (THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_AVX_BLOCK1) then
if ((THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_AVX_BLOCK1) .or. &
(THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_AVX2_BLOCK1)) then
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
ttt = mpi_wtime()
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL) || (defined(WITH_ONE_SPECIFIC_COMPLEX_KERNEL) && !defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) && !defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL))
......@@ -654,7 +656,7 @@ module compute_hh_trafo_complex
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_AVX_BLOCK1_KERNEL */
#endif /* WITH_COMPLEX_AVX_BLOCK1_KERNEL || WITH_COMPLEX_AVX2_BLOCK1_KERNEL*/
#ifdef WITH_OPENMP
if (my_thread==1) then
......
......@@ -896,9 +896,10 @@ module compute_hh_trafo_real
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#endif /* WITH_REAL_SSE_BLOCK2_KERNEL */
#if defined(WITH_REAL_AVX_BLOCK2_KERNEL)
#if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined((WITH_REAL_AVX2_BLOCK2_KERNEL)
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
if (THIS_REAL_ELPA_KERNEL .eq. REAL_ELPA_KERNEL_AVX_BLOCK2) then
if ((THIS_REAL_ELPA_KERNEL .eq. REAL_ELPA_KERNEL_AVX_BLOCK2) .or. &
(THIS_REAL_ELPA_KERNEL .eq. REAL_ELPA_KERNEL_AVX2_BLOCK2))then
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL) || (defined(WITH_ONE_SPECIFIC_REAL_KERNEL) && !defined(WITH_REAL_AVX_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX2_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX_BLOCK4_KERNEL) && !defined(WITH_REAL_AVX2_BLOCK4_KERNEL) )
......@@ -919,7 +920,7 @@ module compute_hh_trafo_real
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#endif /* WITH_REAL_AVX_BLOCK2_KERNEL */
#endif /* WITH_REAL_AVX_BLOCK2_KERNEL || WITH_REAL_AVX2_BLOCK2_KERNEL */
#if defined(WITH_REAL_BGP_KERNEL)
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
......@@ -1076,7 +1077,7 @@ module compute_hh_trafo_real
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#endif /* WITH_REAL_AVX_BLOCK4_KERNEL */
#endif /* WITH_REAL_AVX_BLOCK4_KERNEL || WITH_REAL_AVX2_BLOCK4_KERNEL */
#if defined(WITH_REAL_SSE_BLOCK6_KERNEL)
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
......@@ -1195,7 +1196,7 @@ module compute_hh_trafo_real
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#endif /* WITH_REAL_AVX_BLOCK4_KERNEL */
#endif /* WITH_REAL_AVX_BLOCK6_KERNEL || WITH_REAL_AVX2_BLOCK6_KERNEL */
endif ! GPU_KERNEL
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment