Include AVX2 sources in automake sources

parent b25cb10d
...@@ -95,6 +95,10 @@ endif ...@@ -95,6 +95,10 @@ endif
if WITH_REAL_AVX_BLOCK2_KERNEL if WITH_REAL_AVX_BLOCK2_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_2hv.c libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_2hv.c
else
if WITH_REAL_AVX2_BLOCK2_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_2hv.c
endif
endif endif
if WITH_REAL_SSE_BLOCK4_KERNEL if WITH_REAL_SSE_BLOCK4_KERNEL
...@@ -103,6 +107,10 @@ endif ...@@ -103,6 +107,10 @@ endif
if WITH_REAL_AVX_BLOCK4_KERNEL if WITH_REAL_AVX_BLOCK4_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_4hv.c libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_4hv.c
else
if WITH_REAL_AVX2_BLOCK4_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_4hv.c
endif
endif endif
if WITH_REAL_SSE_BLOCK6_KERNEL if WITH_REAL_SSE_BLOCK6_KERNEL
...@@ -111,6 +119,10 @@ endif ...@@ -111,6 +119,10 @@ endif
if WITH_REAL_AVX_BLOCK6_KERNEL if WITH_REAL_AVX_BLOCK6_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_6hv.c libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_6hv.c
else
if WITH_REAL_AVX2_BLOCK6_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_6hv.c
endif
endif endif
if WITH_COMPLEX_SSE_BLOCK1_KERNEL if WITH_COMPLEX_SSE_BLOCK1_KERNEL
...@@ -119,6 +131,10 @@ endif ...@@ -119,6 +131,10 @@ endif
if WITH_COMPLEX_AVX_BLOCK1_KERNEL if WITH_COMPLEX_AVX_BLOCK1_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_1hv.c libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_1hv.c
else
if WITH_COMPLEX_AVX2_BLOCK1_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_1hv.c
endif
endif endif
if WITH_COMPLEX_SSE_BLOCK2_KERNEL if WITH_COMPLEX_SSE_BLOCK2_KERNEL
...@@ -127,6 +143,10 @@ endif ...@@ -127,6 +143,10 @@ endif
if WITH_COMPLEX_AVX_BLOCK2_KERNEL if WITH_COMPLEX_AVX_BLOCK2_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_2hv.c libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_2hv.c
else
if WITH_COMPLEX_AVX2_BLOCK2_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_2hv.c
endif
endif endif
include generated_headers.am include generated_headers.am
......
...@@ -88,7 +88,7 @@ static __forceinline void hh_trafo_complex_kernel_8_AVX_1hv(double complex* q, ...@@ -88,7 +88,7 @@ static __forceinline void hh_trafo_complex_kernel_8_AVX_1hv(double complex* q,
static __forceinline void hh_trafo_complex_kernel_4_AVX_1hv(double complex* q, double complex* hh, int nb, int ldq); static __forceinline void hh_trafo_complex_kernel_4_AVX_1hv(double complex* q, double complex* hh, int nb, int ldq);
/* /*
!f>#ifdef HAVE_AVX !f>#if defined(HAVE_AVX) || defined(HAVE_AVX2)
!f> interface !f> interface
!f> subroutine single_hh_trafo_complex_avx_avx2_1hv(q, hh, pnb, pnq, pldq) bind(C, name="single_hh_trafo_complex_avx_avx2_1hv") !f> subroutine single_hh_trafo_complex_avx_avx2_1hv(q, hh, pnb, pnq, pldq) bind(C, name="single_hh_trafo_complex_avx_avx2_1hv")
!f> use, intrinsic :: iso_c_binding !f> use, intrinsic :: iso_c_binding
......
...@@ -89,7 +89,7 @@ static __forceinline void hh_trafo_complex_kernel_4_AVX_2hv(double complex* q, d ...@@ -89,7 +89,7 @@ static __forceinline void hh_trafo_complex_kernel_4_AVX_2hv(double complex* q, d
static __forceinline void hh_trafo_complex_kernel_2_AVX_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s); static __forceinline void hh_trafo_complex_kernel_2_AVX_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s);
/* /*
!f>#ifdef HAVE_AVX !f>#if defined(HAVE_AVX) || defined(HAVE_AVX2)
!f> interface !f> interface
!f> subroutine double_hh_trafo_complex_avx_avx2_2hv(q, hh, pnb, pnq, pldq, pldh) bind(C, name="double_hh_trafo_complex_avx_avx2_2hv") !f> subroutine double_hh_trafo_complex_avx_avx2_2hv(q, hh, pnb, pnq, pldq, pldh) bind(C, name="double_hh_trafo_complex_avx_avx2_2hv")
!f> use, intrinsic :: iso_c_binding !f> use, intrinsic :: iso_c_binding
......
...@@ -87,7 +87,7 @@ __forceinline void hh_trafo_kernel_16_AVX_2hv(double* q, double* hh, int nb, int ...@@ -87,7 +87,7 @@ __forceinline void hh_trafo_kernel_16_AVX_2hv(double* q, double* hh, int nb, int
__forceinline void hh_trafo_kernel_24_AVX_2hv(double* q, double* hh, int nb, int ldq, int ldh, double s); __forceinline void hh_trafo_kernel_24_AVX_2hv(double* q, double* hh, int nb, int ldq, int ldh, double s);
/* /*
!f>#ifdef HAVE_AVX !f>#if defined(HAVE_AVX) || defined(HAVE_AVX2)
!f> interface !f> interface
!f> subroutine double_hh_trafo_real_avx_avx2_2hv(q, hh, pnb, pnq, pldq, pldh) bind(C, name="double_hh_trafo_real_avx_avx2_2hv") !f> subroutine double_hh_trafo_real_avx_avx2_2hv(q, hh, pnb, pnq, pldq, pldh) bind(C, name="double_hh_trafo_real_avx_avx2_2hv")
!f> use, intrinsic :: iso_c_binding !f> use, intrinsic :: iso_c_binding
......
...@@ -89,7 +89,7 @@ __forceinline void hh_trafo_kernel_8_AVX_4hv(double* q, double* hh, int nb, int ...@@ -89,7 +89,7 @@ __forceinline void hh_trafo_kernel_8_AVX_4hv(double* q, double* hh, int nb, int
__forceinline void hh_trafo_kernel_12_AVX_4hv(double* q, double* hh, int nb, int ldq, int ldh, double s_1_2, double s_1_3, double s_2_3, double s_1_4, double s_2_4, double s_3_4); __forceinline void hh_trafo_kernel_12_AVX_4hv(double* q, double* hh, int nb, int ldq, int ldh, double s_1_2, double s_1_3, double s_2_3, double s_1_4, double s_2_4, double s_3_4);
/* /*
!f>#ifdef HAVE_AVX !f>#if defined(HAVE_AVX) || defined(HAVE_AVX2)
!f> interface !f> interface
!f> subroutine quad_hh_trafo_real_avx_avx2_4hv(q, hh, pnb, pnq, pldq, pldh) bind(C, name="quad_hh_trafo_real_avx_avx2_4hv") !f> subroutine quad_hh_trafo_real_avx_avx2_4hv(q, hh, pnb, pnq, pldq, pldh) bind(C, name="quad_hh_trafo_real_avx_avx2_4hv")
!f> use, intrinsic :: iso_c_binding !f> use, intrinsic :: iso_c_binding
......
...@@ -89,7 +89,7 @@ static void hh_trafo_kernel_4_AVX_6hv(double* q, double* hh, int nb, int ldq, in ...@@ -89,7 +89,7 @@ static void hh_trafo_kernel_4_AVX_6hv(double* q, double* hh, int nb, int ldq, in
static void hh_trafo_kernel_8_AVX_6hv(double* q, double* hh, int nb, int ldq, int ldh, double* scalarprods); static void hh_trafo_kernel_8_AVX_6hv(double* q, double* hh, int nb, int ldq, int ldh, double* scalarprods);
/* /*
!f>#ifdef HAVE_AVX !f>#if defined(HAVE_AVX) || defined(HAVE_AVX2)
!f> interface !f> interface
!f> subroutine hexa_hh_trafo_real_avx_avx2_6hv(q, hh, pnb, pnq, pldq, pldh) bind(C, name="hexa_hh_trafo_real_avx_avx2_6hv") !f> subroutine hexa_hh_trafo_real_avx_avx2_6hv(q, hh, pnb, pnq, pldq, pldh) bind(C, name="hexa_hh_trafo_real_avx_avx2_6hv")
!f> use, intrinsic :: iso_c_binding !f> use, intrinsic :: iso_c_binding
......
...@@ -321,6 +321,14 @@ program test_complex2 ...@@ -321,6 +321,14 @@ program test_complex2
#endif #endif
#endif #endif
#ifdef WITH_COMPLEX_AVX2_BLOCK2_KERNEL
COMPLEX_ELPA_KERNEL_AVX2_BLOCK2)
#else
#ifdef WITH_COMPLEX_AVX2_BLOCK1_KERNEL
COMPLEX_ELPA_KERNEL_AVX2_BLOCK1)
#endif
#endif
#else /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */ #else /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#ifdef WITH_COMPLEX_SSE_BLOCK1_KERNEL #ifdef WITH_COMPLEX_SSE_BLOCK1_KERNEL
...@@ -339,6 +347,14 @@ program test_complex2 ...@@ -339,6 +347,14 @@ program test_complex2
COMPLEX_ELPA_KERNEL_AVX_BLOCK2) COMPLEX_ELPA_KERNEL_AVX_BLOCK2)
#endif #endif
#ifdef WITH_COMPLEX_AVX2_BLOCK1_KERNEL
COMPLEX_ELPA_KERNEL_AVX2_BLOCK1)
#endif
#ifdef WITH_COMPLEX_AVX2_BLOCK2_KERNEL
COMPLEX_ELPA_KERNEL_AVX2_BLOCK2)
#endif
#endif /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */ #endif /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */ #endif /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
......
...@@ -306,6 +306,7 @@ program test_real2 ...@@ -306,6 +306,7 @@ program test_real2
#endif #endif
#endif #endif
#endif #endif
#ifdef WITH_REAL_AVX_BLOCK6_KERNEL #ifdef WITH_REAL_AVX_BLOCK6_KERNEL
REAL_ELPA_KERNEL_AVX_BLOCK6) REAL_ELPA_KERNEL_AVX_BLOCK6)
#else #else
...@@ -318,6 +319,18 @@ program test_real2 ...@@ -318,6 +319,18 @@ program test_real2
#endif #endif
#endif #endif
#ifdef WITH_REAL_AVX2_BLOCK6_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK6)
#else
#ifdef WITH_REAL_AVX2_BLOCK4_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK4)
#else
#ifdef WITH_REAL_AVX2_BLOCK2_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK2)
#endif
#endif
#endif
#else /* WITH_ONE_SPECIFIC_REAL_KERNEL */ #else /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#ifdef WITH_REAL_SSE_BLOCK2_KERNEL #ifdef WITH_REAL_SSE_BLOCK2_KERNEL
...@@ -344,6 +357,18 @@ program test_real2 ...@@ -344,6 +357,18 @@ program test_real2
REAL_ELPA_KERNEL_AVX_BLOCK6) REAL_ELPA_KERNEL_AVX_BLOCK6)
#endif #endif
#ifdef WITH_REAL_AVX2_BLOCK2_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK2)
#endif
#ifdef WITH_REAL_AVX_BLOCK4_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK4)
#endif
#ifdef WITH_REAL_AVX2_BLOCK6_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK6)
#endif
#endif /* WITH_ONE_SPECIFIC_REAL_KERNEL */ #endif /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#ifdef WITH_REAL_BGP_KERNEL #ifdef WITH_REAL_BGP_KERNEL
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment