Commit a4f915b0 authored by Andreas Marek's avatar Andreas Marek

Include AVX2 sources in automake sources

parent b25cb10d
......@@ -95,6 +95,10 @@ endif
if WITH_REAL_AVX_BLOCK2_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_2hv.c
else
if WITH_REAL_AVX2_BLOCK2_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_2hv.c
endif
endif
if WITH_REAL_SSE_BLOCK4_KERNEL
......@@ -103,6 +107,10 @@ endif
if WITH_REAL_AVX_BLOCK4_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_4hv.c
else
if WITH_REAL_AVX2_BLOCK4_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_4hv.c
endif
endif
if WITH_REAL_SSE_BLOCK6_KERNEL
......@@ -111,6 +119,10 @@ endif
if WITH_REAL_AVX_BLOCK6_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_6hv.c
else
if WITH_REAL_AVX2_BLOCK6_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_6hv.c
endif
endif
if WITH_COMPLEX_SSE_BLOCK1_KERNEL
......@@ -119,6 +131,10 @@ endif
if WITH_COMPLEX_AVX_BLOCK1_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_1hv.c
else
if WITH_COMPLEX_AVX2_BLOCK1_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_1hv.c
endif
endif
if WITH_COMPLEX_SSE_BLOCK2_KERNEL
......@@ -127,6 +143,10 @@ endif
if WITH_COMPLEX_AVX_BLOCK2_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_2hv.c
else
if WITH_COMPLEX_AVX2_BLOCK2_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_2hv.c
endif
endif
include generated_headers.am
......
......@@ -88,7 +88,7 @@ static __forceinline void hh_trafo_complex_kernel_8_AVX_1hv(double complex* q,
static __forceinline void hh_trafo_complex_kernel_4_AVX_1hv(double complex* q, double complex* hh, int nb, int ldq);
/*
!f>#ifdef HAVE_AVX
!f>#if defined(HAVE_AVX) || defined(HAVE_AVX2)
!f> interface
!f> subroutine single_hh_trafo_complex_avx_avx2_1hv(q, hh, pnb, pnq, pldq) bind(C, name="single_hh_trafo_complex_avx_avx2_1hv")
!f> use, intrinsic :: iso_c_binding
......
......@@ -89,7 +89,7 @@ static __forceinline void hh_trafo_complex_kernel_4_AVX_2hv(double complex* q, d
static __forceinline void hh_trafo_complex_kernel_2_AVX_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s);
/*
!f>#ifdef HAVE_AVX
!f>#if defined(HAVE_AVX) || defined(HAVE_AVX2)
!f> interface
!f> subroutine double_hh_trafo_complex_avx_avx2_2hv(q, hh, pnb, pnq, pldq, pldh) bind(C, name="double_hh_trafo_complex_avx_avx2_2hv")
!f> use, intrinsic :: iso_c_binding
......
......@@ -87,7 +87,7 @@ __forceinline void hh_trafo_kernel_16_AVX_2hv(double* q, double* hh, int nb, int
__forceinline void hh_trafo_kernel_24_AVX_2hv(double* q, double* hh, int nb, int ldq, int ldh, double s);
/*
!f>#ifdef HAVE_AVX
!f>#if defined(HAVE_AVX) || defined(HAVE_AVX2)
!f> interface
!f> subroutine double_hh_trafo_real_avx_avx2_2hv(q, hh, pnb, pnq, pldq, pldh) bind(C, name="double_hh_trafo_real_avx_avx2_2hv")
!f> use, intrinsic :: iso_c_binding
......
......@@ -89,7 +89,7 @@ __forceinline void hh_trafo_kernel_8_AVX_4hv(double* q, double* hh, int nb, int
__forceinline void hh_trafo_kernel_12_AVX_4hv(double* q, double* hh, int nb, int ldq, int ldh, double s_1_2, double s_1_3, double s_2_3, double s_1_4, double s_2_4, double s_3_4);
/*
!f>#ifdef HAVE_AVX
!f>#if defined(HAVE_AVX) || defined(HAVE_AVX2)
!f> interface
!f> subroutine quad_hh_trafo_real_avx_avx2_4hv(q, hh, pnb, pnq, pldq, pldh) bind(C, name="quad_hh_trafo_real_avx_avx2_4hv")
!f> use, intrinsic :: iso_c_binding
......
......@@ -89,7 +89,7 @@ static void hh_trafo_kernel_4_AVX_6hv(double* q, double* hh, int nb, int ldq, in
static void hh_trafo_kernel_8_AVX_6hv(double* q, double* hh, int nb, int ldq, int ldh, double* scalarprods);
/*
!f>#ifdef HAVE_AVX
!f>#if defined(HAVE_AVX) || defined(HAVE_AVX2)
!f> interface
!f> subroutine hexa_hh_trafo_real_avx_avx2_6hv(q, hh, pnb, pnq, pldq, pldh) bind(C, name="hexa_hh_trafo_real_avx_avx2_6hv")
!f> use, intrinsic :: iso_c_binding
......
......@@ -321,6 +321,14 @@ program test_complex2
#endif
#endif
#ifdef WITH_COMPLEX_AVX2_BLOCK2_KERNEL
COMPLEX_ELPA_KERNEL_AVX2_BLOCK2)
#else
#ifdef WITH_COMPLEX_AVX2_BLOCK1_KERNEL
COMPLEX_ELPA_KERNEL_AVX2_BLOCK1)
#endif
#endif
#else /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#ifdef WITH_COMPLEX_SSE_BLOCK1_KERNEL
......@@ -339,6 +347,14 @@ program test_complex2
COMPLEX_ELPA_KERNEL_AVX_BLOCK2)
#endif
#ifdef WITH_COMPLEX_AVX2_BLOCK1_KERNEL
COMPLEX_ELPA_KERNEL_AVX2_BLOCK1)
#endif
#ifdef WITH_COMPLEX_AVX2_BLOCK2_KERNEL
COMPLEX_ELPA_KERNEL_AVX2_BLOCK2)
#endif
#endif /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
......
......@@ -306,6 +306,7 @@ program test_real2
#endif
#endif
#endif
#ifdef WITH_REAL_AVX_BLOCK6_KERNEL
REAL_ELPA_KERNEL_AVX_BLOCK6)
#else
......@@ -318,6 +319,18 @@ program test_real2
#endif
#endif
#ifdef WITH_REAL_AVX2_BLOCK6_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK6)
#else
#ifdef WITH_REAL_AVX2_BLOCK4_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK4)
#else
#ifdef WITH_REAL_AVX2_BLOCK2_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK2)
#endif
#endif
#endif
#else /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#ifdef WITH_REAL_SSE_BLOCK2_KERNEL
......@@ -344,6 +357,18 @@ program test_real2
REAL_ELPA_KERNEL_AVX_BLOCK6)
#endif
#ifdef WITH_REAL_AVX2_BLOCK2_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK2)
#endif
#ifdef WITH_REAL_AVX_BLOCK4_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK4)
#endif
#ifdef WITH_REAL_AVX2_BLOCK6_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK6)
#endif
#endif /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#ifdef WITH_REAL_BGP_KERNEL
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment