Unverified Commit 69fd894b authored by Andreas Marek's avatar Andreas Marek
Browse files

Merge branch 'master' into ELPA_GPU

parents 1a19eaab a4f915b0
This diff is collapsed.
...@@ -31,7 +31,12 @@ https://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html ...@@ -31,7 +31,12 @@ https://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html
Some interface need an extra argument, specifying the number of matrix columns Some interface need an extra argument, specifying the number of matrix columns
(see the documentation) (see the documentation)
The state of release 2015.11.001 defines this interface
- 5 - 5
Compatible API change w.r.t. previous version: Interfaces have been added to unify Compatible API change w.r.t. previous version: Interfaces have been added to unify
the naming conventions. The old interface are still available the naming conventions. The old interface are still available
The state of release 2016.05.001 defines this interface
...@@ -120,6 +120,13 @@ if WITH_REAL_AVX_BLOCK2_KERNEL ...@@ -120,6 +120,13 @@ if WITH_REAL_AVX_BLOCK2_KERNEL
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_2hv_single_precision.c libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_2hv_single_precision.c
endif endif
else
if WITH_REAL_AVX2_BLOCK2_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_2hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_2hv_single_precision.c
endif
endif
endif endif
if WITH_REAL_SSE_BLOCK4_KERNEL if WITH_REAL_SSE_BLOCK4_KERNEL
...@@ -134,6 +141,13 @@ if WITH_REAL_AVX_BLOCK4_KERNEL ...@@ -134,6 +141,13 @@ if WITH_REAL_AVX_BLOCK4_KERNEL
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_4hv_single_precision.c libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_4hv_single_precision.c
endif endif
else
if WITH_REAL_AVX2_BLOCK4_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_4hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_4hv_single_precision.c
endif
endif
endif endif
if WITH_REAL_SSE_BLOCK6_KERNEL if WITH_REAL_SSE_BLOCK6_KERNEL
...@@ -148,6 +162,13 @@ if WITH_REAL_AVX_BLOCK6_KERNEL ...@@ -148,6 +162,13 @@ if WITH_REAL_AVX_BLOCK6_KERNEL
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_6hv_single_precision.c libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_6hv_single_precision.c
endif endif
else
if WITH_REAL_AVX2_BLOCK6_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_6hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_6hv_single_precision.c
endif
endif
endif endif
if WITH_COMPLEX_SSE_BLOCK1_KERNEL if WITH_COMPLEX_SSE_BLOCK1_KERNEL
...@@ -162,6 +183,13 @@ if WITH_COMPLEX_AVX_BLOCK1_KERNEL ...@@ -162,6 +183,13 @@ if WITH_COMPLEX_AVX_BLOCK1_KERNEL
if WANT_SINGLE_PRECISION_COMPLEX if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_1hv_single_precision.c libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_1hv_single_precision.c
endif endif
else
if WITH_COMPLEX_AVX2_BLOCK1_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_1hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_1hv_single_precision.c
endif
endif
endif endif
if WITH_COMPLEX_SSE_BLOCK2_KERNEL if WITH_COMPLEX_SSE_BLOCK2_KERNEL
...@@ -176,6 +204,13 @@ if WITH_COMPLEX_AVX_BLOCK2_KERNEL ...@@ -176,6 +204,13 @@ if WITH_COMPLEX_AVX_BLOCK2_KERNEL
if WANT_SINGLE_PRECISION_COMPLEX if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_2hv_single_precision.c libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_2hv_single_precision.c
endif endif
else
if WITH_COMPLEX_AVX2_BLOCK2_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_2hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_2hv_single_precision.c
endif
endif
endif endif
.cu.lo: .cu.lo:
......
...@@ -87,7 +87,7 @@ the possible configure options. ...@@ -87,7 +87,7 @@ the possible configure options.
## Using *ELPA* ## Using *ELPA*
Please have a look at the "**USERS_GUIDE**" file, to get a documentation or at the [online] Please have a look at the "**USERS_GUIDE**" file, to get a documentation or at the [online]
(http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2015.11.001/html/index.html) doygen (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2016.05.001/html/index.html) doygen
documentation, where you find the definition of the interfaces. documentation, where you find the definition of the interfaces.
## Contributing to *ELPA* ## Contributing to *ELPA*
......
This file contains the release notes for the ELPA 2015.11.001 version This file contains the release notes for the ELPA 2016.05.001 version
What is new? What is new?
------------- -------------
For changes since release ELPA 2015.11.011 please have a loog at the Changelog file
a) ABI change a) ABI change
--------------------- ---------------------
Most importantly, the ABI of the ELPA libray changed! Most importantly, the ABI of the ELPA library changed!
A rebuild/relink of the user code using the ELPA library is mandatory! New interfaces have been added, the old ones still exists, thus a rebuild
Hopefully, this will be the last ABI change for some time. of your code (build with ELPA 2015.11.001) should not be necessary. However,
it is not guaranteed whether your Fortran compiler produces compatible
b) C interface Fortran modules if content is added to this modules. To be on the safe sight,
---------------------- re-compiling your application with the new ELPA version is not a bad idea.
ELPA now is shipped with a C interface to directly call the ELPA library (written in Fortran) For details of the ABI changes (including the history of changes) please have a
from C code. Header files are provided to declare the c functions. look at the "LIBRARY_INTERFACE" file
Since ELPA is still a Fortran library it might be necessary to link it together with the needed
Fortran runtime libraries in your C code.
Any incompatibilities to previous version?
Any incompatibles to previous version?
--------------------------------------- ---------------------------------------
As mentioned before, the ABI of ELPA has changed! It will be necessary As mentioned before, the ABI of ELPA was changed such, that there should be no
to rebuild the programs using ELPA, if this new version should be used. incompatibility with the previous version ELPA 2015.11.001. There is an
Beware, using the new library with code which was build with an older verion incompatibility with older versions than ELPA 2015.11.001 !
should not even run. If it does, the results will be wrong !
...@@ -9,7 +9,7 @@ Local documentation (via man pages) should be available (if *ELPA* has been inst ...@@ -9,7 +9,7 @@ Local documentation (via man pages) should be available (if *ELPA* has been inst
For example "man get_elpa_communicators" should provide the documentation for the *ELPA* function which sets For example "man get_elpa_communicators" should provide the documentation for the *ELPA* function which sets
the necessary communicators. the necessary communicators.
Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2015.11.001/html/index.html) Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2016.05.001/html/index.html)
for each *ELPA* release is available. for each *ELPA* release is available.
### General concept of the *ELPA* library ### ### General concept of the *ELPA* library ###
...@@ -20,7 +20,7 @@ The *ELPA* library consists of two main parts: ...@@ -20,7 +20,7 @@ The *ELPA* library consists of two main parts:
Both variants of the *ELPA* solvers are available for real or complex valued matrices. Both variants of the *ELPA* solvers are available for real or complex valued matrices.
Thus *ELPA* provides the following user functions (see man pages or [online] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2015.11.001/html/index.html) for details): Thus *ELPA* provides the following user functions (see man pages or [online] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2016.05.001/html/index.html) for details):
- get_elpa_communicators : set the row / column communicators for *ELPA* - get_elpa_communicators : set the row / column communicators for *ELPA*
- solve_evp_complex_1stage : solve a complex valued eigenvale proplem with the *ELPA 1stage* solver - solve_evp_complex_1stage : solve a complex valued eigenvale proplem with the *ELPA 1stage* solver
......
...@@ -732,6 +732,15 @@ DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-avx-block4-kernel]-only,[real-avx-block ...@@ -732,6 +732,15 @@ DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-avx-block4-kernel]-only,[real-avx-block
dnl real-avx-block6 kernel dnl real-avx-block6 kernel
DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-avx-block6-kernel-only],[real-avx-block6-kernel],[install_real_avx_block6]) DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-avx-block6-kernel-only],[real-avx-block6-kernel],[install_real_avx_block6])
dnl real-avx2-block2 kernel
DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-avx2-block2-kernel-only],[real-avx2-block2-kernel],[install_real_avx2_block2])
dnl real-avx2-block4 kernel
DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-avx2-block4-kernel]-only,[real-avx2-block4-kernel],[install_real_avx2_block4])
dnl real-avx2-block6 kernel
DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-avx2-block6-kernel-only],[real-avx2-block6-kernel],[install_real_avx2_block6])
dnl complex kernels dnl complex kernels
dnl generic kernel dnl generic kernel
...@@ -761,6 +770,13 @@ DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-avx-block1-kernel-only],[complex- ...@@ -761,6 +770,13 @@ DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-avx-block1-kernel-only],[complex-
dnl complex-avx-block2 kernel dnl complex-avx-block2 kernel
DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-avx-block2-kernel-only],[complex-avx-block2-kernel],[install_complex_avx_block2]) DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-avx-block2-kernel-only],[complex-avx-block2-kernel],[install_complex_avx_block2])
dnl complex-avx2-block1 kernel
DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-avx2-block1-kernel-only],[complex-avx2-block1-kernel],[install_complex_avx2_block1])
dnl complex-avx2-block2 kernel
DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-avx2-block2-kernel-only],[complex-avx2-block2-kernel],[install_complex_avx2_block2])
dnl set the conditionals according to the previous tests dnl set the conditionals according to the previous tests
if test x"${can_use_iso_fortran_env}" = x"yes" ; then if test x"${can_use_iso_fortran_env}" = x"yes" ; then
......
...@@ -69,6 +69,7 @@ AC_DEFUN([AX_ELPA_OPENMP], ...@@ -69,6 +69,7 @@ AC_DEFUN([AX_ELPA_OPENMP],
dnl GCC >= 4.2 -fopenmp dnl GCC >= 4.2 -fopenmp
dnl SunPRO C -xopenmp dnl SunPRO C -xopenmp
dnl Intel C -openmp dnl Intel C -openmp
dnl Intel > 14.0 -qopenmp
dnl SGI C, PGI C -mp dnl SGI C, PGI C -mp
dnl Tru64 Compaq C -omp dnl Tru64 Compaq C -omp
dnl IBM C (AIX, Linux) -qsmp=omp dnl IBM C (AIX, Linux) -qsmp=omp
...@@ -77,7 +78,7 @@ AC_DEFUN([AX_ELPA_OPENMP], ...@@ -77,7 +78,7 @@ AC_DEFUN([AX_ELPA_OPENMP],
dnl will fail (since we know that it failed without the option), dnl will fail (since we know that it failed without the option),
dnl therefore the loop will continue searching for an option, and dnl therefore the loop will continue searching for an option, and
dnl no output file called 'penmp' or 'mp' is created. dnl no output file called 'penmp' or 'mp' is created.
for ac_option in -openmp -fopenmp -xopenmp -mp -omp -qsmp=omp; do for ac_option in -qopenmp -openmp -fopenmp -xopenmp -mp -omp -qsmp=omp; do
ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option" _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
AC_LINK_IFELSE([AC_LANG_SOURCE([_AX_ELPA_LANG_OPENMP])], AC_LINK_IFELSE([AC_LANG_SOURCE([_AX_ELPA_LANG_OPENMP])],
......
...@@ -88,7 +88,7 @@ static __forceinline void hh_trafo_complex_kernel_8_AVX_1hv_double(double compl ...@@ -88,7 +88,7 @@ static __forceinline void hh_trafo_complex_kernel_8_AVX_1hv_double(double compl
static __forceinline void hh_trafo_complex_kernel_4_AVX_1hv_double(double complex* q, double complex* hh, int nb, int ldq); static __forceinline void hh_trafo_complex_kernel_4_AVX_1hv_double(double complex* q, double complex* hh, int nb, int ldq);
/* /*
!f>#ifdef HAVE_AVX !f>#if defined(HAVE_AVX) || defined(HAVE_AVX2)
!f> interface !f> interface
!f> subroutine single_hh_trafo_complex_avx_avx2_1hv_double(q, hh, pnb, pnq, pldq) & !f> subroutine single_hh_trafo_complex_avx_avx2_1hv_double(q, hh, pnb, pnq, pldq) &
!f> bind(C, name="single_hh_trafo_complex_avx_avx2_1hv_double") !f> bind(C, name="single_hh_trafo_complex_avx_avx2_1hv_double")
......
...@@ -89,7 +89,7 @@ static __forceinline void hh_trafo_complex_kernel_4_AVX_2hv_double(double comple ...@@ -89,7 +89,7 @@ static __forceinline void hh_trafo_complex_kernel_4_AVX_2hv_double(double comple
static __forceinline void hh_trafo_complex_kernel_2_AVX_2hv_double(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s); static __forceinline void hh_trafo_complex_kernel_2_AVX_2hv_double(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s);
/* /*
!f>#ifdef HAVE_AVX !f>#if defined(HAVE_AVX) || defined(HAVE_AVX2)
!f> interface !f> interface
!f> subroutine double_hh_trafo_complex_avx_avx2_2hv_double(q, hh, pnb, pnq, pldq, pldh) & !f> subroutine double_hh_trafo_complex_avx_avx2_2hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="double_hh_trafo_complex_avx_avx2_2hv_double") !f> bind(C, name="double_hh_trafo_complex_avx_avx2_2hv_double")
......
...@@ -88,7 +88,7 @@ __forceinline void hh_trafo_kernel_24_AVX_2hv_double(double* q, double* hh, int ...@@ -88,7 +88,7 @@ __forceinline void hh_trafo_kernel_24_AVX_2hv_double(double* q, double* hh, int
void double_hh_trafo_real_avx_avx2_2hv_double(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh); void double_hh_trafo_real_avx_avx2_2hv_double(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh);
/* /*
!f>#ifdef HAVE_AVX !f>#if defined(HAVE_AVX) || defined(HAVE_AVX2)
!f> interface !f> interface
!f> subroutine double_hh_trafo_real_avx_avx2_2hv_double(q, hh, pnb, pnq, pldq, pldh) & !f> subroutine double_hh_trafo_real_avx_avx2_2hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="double_hh_trafo_real_avx_avx2_2hv_double") !f> bind(C, name="double_hh_trafo_real_avx_avx2_2hv_double")
......
...@@ -91,7 +91,7 @@ __forceinline void hh_trafo_kernel_12_AVX_4hv_double(double* q, double* hh, int ...@@ -91,7 +91,7 @@ __forceinline void hh_trafo_kernel_12_AVX_4hv_double(double* q, double* hh, int
void quad_hh_trafo_real_avx_avx2_4hv_double(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh); void quad_hh_trafo_real_avx_avx2_4hv_double(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh);
/* /*
!f>#ifdef HAVE_AVX !f>#if defined(HAVE_AVX) || defined(HAVE_AVX2)
!f> interface !f> interface
!f> subroutine quad_hh_trafo_real_avx_avx2_4hv_double(q, hh, pnb, pnq, pldq, pldh) & !f> subroutine quad_hh_trafo_real_avx_avx2_4hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="quad_hh_trafo_real_avx_avx2_4hv_double") !f> bind(C, name="quad_hh_trafo_real_avx_avx2_4hv_double")
......
...@@ -90,7 +90,7 @@ static void hh_trafo_kernel_8_AVX_6hv_double(double* q, double* hh, int nb, int ...@@ -90,7 +90,7 @@ static void hh_trafo_kernel_8_AVX_6hv_double(double* q, double* hh, int nb, int
void hexa_hh_trafo_real_avx_avx2_6hv_double(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh); void hexa_hh_trafo_real_avx_avx2_6hv_double(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh);
/* /*
!f>#ifdef HAVE_AVX !f>#if defined(HAVE_AVX) || defined(HAVE_AVX2)
!f> interface !f> interface
!f> subroutine hexa_hh_trafo_real_avx_avx2_6hv_double(q, hh, pnb, pnq, pldq, pldh) & !f> subroutine hexa_hh_trafo_real_avx_avx2_6hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="hexa_hh_trafo_real_avx_avx2_6hv_double") !f> bind(C, name="hexa_hh_trafo_real_avx_avx2_6hv_double")
......
...@@ -164,6 +164,20 @@ module ELPA2_utilities ...@@ -164,6 +164,20 @@ module ELPA2_utilities
#endif #endif
#endif /* #if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL) */ #endif /* #if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX2_BLOCK2_KERNEL) || defined(WITH_REAL_AVX2_BLOCK4_KERNEL) || defined(WITH_REAL_AVX2_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX2_BLOCK6_KERNEL
integer, parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX2_BLOCK6
#else
#ifdef WITH_REAL_AVX2_BLOCK4_KERNEL
integer, parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX2_BLOCK4
#else
#ifdef WITH_REAL_AVX2_BLOCK2_KERNEL
integer, parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX2_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX2_BLOCK2_KERNEL) || defined(WITH_REAL_AVX2_BLOCK4_KERNEL) || defined(WITH_REAL_AVX2_BLOCK6_KERNEL) */
#ifdef WITH_REAL_BGP_KERNEL #ifdef WITH_REAL_BGP_KERNEL
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BGP integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BGP
#endif #endif
...@@ -220,6 +234,21 @@ module ELPA2_utilities ...@@ -220,6 +234,21 @@ module ELPA2_utilities
#endif #endif
#endif /* #if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL) */ #endif /* #if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX2_BLOCK2_KERNEL) || defined(WITH_REAL_AVX2_BLOCK4_KERNEL) || defined(WITH_REAL_AVX2_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX2_BLOCK6_KERNEL
integer, parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX2_BLOCK6
#else
#ifdef WITH_REAL_AVX2_BLOCK4_KERNEL
integer, parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX2_BLOCK4
#else
#ifdef WITH_REAL_AVX2_BLOCK2_KERNEL
integer, parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX2_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX2_BLOCK2_KERNEL) || defined(WITH_REAL_AVX2_BLOCK4_KERNEL) || defined(WITH_REAL_AVX2_BLOCK6_KERNEL) */
#ifdef WITH_REAL_BGP_KERNEL #ifdef WITH_REAL_BGP_KERNEL
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BGP integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BGP
#endif #endif
...@@ -302,6 +331,17 @@ module ELPA2_utilities ...@@ -302,6 +331,17 @@ module ELPA2_utilities
#endif #endif
#endif /* defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) */ #endif /* defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX2_BLOCK2_KERNEL
integer, parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_AVX2_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX2_BLOCK1_KERNEL
integer, parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_AVX2_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL) */
#ifdef WITH_GPU_VERSION #ifdef WITH_GPU_VERSION
integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_GPU integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_GPU
#endif #endif
...@@ -346,6 +386,16 @@ module ELPA2_utilities ...@@ -346,6 +386,16 @@ module ELPA2_utilities
#endif #endif
#endif /* defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) */ #endif /* defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX2_BLOCK2_KERNEL
integer, parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_AVX2_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX2_BLOCK1_KERNEL
integer, parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_AVX2_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL) */
#ifdef WITH_GPU_VERSION #ifdef WITH_GPU_VERSION
integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_GPU integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_GPU
#endif #endif
......
...@@ -365,6 +365,14 @@ program test_complex2_choose_kernel_with_api_double_precision ...@@ -365,6 +365,14 @@ program test_complex2_choose_kernel_with_api_double_precision
#endif #endif
#endif #endif
#ifdef WITH_COMPLEX_AVX2_BLOCK2_KERNEL
COMPLEX_ELPA_KERNEL_AVX2_BLOCK2)
#else
#ifdef WITH_COMPLEX_AVX2_BLOCK1_KERNEL
COMPLEX_ELPA_KERNEL_AVX2_BLOCK1)
#endif
#endif
#ifdef WITH_GPU_VERSION #ifdef WITH_GPU_VERSION
COMPLEX_ELPA_KERNEL_GPU) COMPLEX_ELPA_KERNEL_GPU)
#endif #endif
......
...@@ -368,6 +368,15 @@ program test_complex2_choose_kernel_with_api_single_precision ...@@ -368,6 +368,15 @@ program test_complex2_choose_kernel_with_api_single_precision
#endif #endif
#endif #endif
#ifdef WITH_COMPLEX_AVX2_BLOCK2_KERNEL
COMPLEX_ELPA_KERNEL_AVX2_BLOCK2)
#else
#ifdef WITH_COMPLEX_AVX2_BLOCK1_KERNEL
COMPLEX_ELPA_KERNEL_AVX2_BLOCK1)
#endif
#endif
#else /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */ #else /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#ifdef WITH_COMPLEX_SSE_BLOCK1_KERNEL #ifdef WITH_COMPLEX_SSE_BLOCK1_KERNEL
...@@ -386,6 +395,15 @@ program test_complex2_choose_kernel_with_api_single_precision ...@@ -386,6 +395,15 @@ program test_complex2_choose_kernel_with_api_single_precision
COMPLEX_ELPA_KERNEL_AVX_BLOCK2) COMPLEX_ELPA_KERNEL_AVX_BLOCK2)
#endif #endif
#ifdef WITH_COMPLEX_AVX2_BLOCK1_KERNEL
COMPLEX_ELPA_KERNEL_AVX2_BLOCK1)
#endif
#ifdef WITH_COMPLEX_AVX2_BLOCK2_KERNEL
COMPLEX_ELPA_KERNEL_AVX2_BLOCK2)
#endif
#endif /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */ #endif /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#ifdef WITH_GPU_VERSION #ifdef WITH_GPU_VERSION
......
...@@ -368,6 +368,19 @@ program test_real2_choose_kernel_with_api_double_precision ...@@ -368,6 +368,19 @@ program test_real2_choose_kernel_with_api_double_precision
#endif #endif
#endif #endif
#ifdef WITH_REAL_AVX2_BLOCK6_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK6)
#else
#ifdef WITH_REAL_AVX2_BLOCK4_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK4)
#else
#ifdef WITH_REAL_AVX2_BLOCK2_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK2)
#endif
#endif
#endif
#else /* WITH_ONE_SPECIFIC_REAL_KERNEL */ #else /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#ifdef WITH_REAL_SSE_BLOCK2_KERNEL #ifdef WITH_REAL_SSE_BLOCK2_KERNEL
...@@ -394,6 +407,19 @@ program test_real2_choose_kernel_with_api_double_precision ...@@ -394,6 +407,19 @@ program test_real2_choose_kernel_with_api_double_precision
REAL_ELPA_KERNEL_AVX_BLOCK6) REAL_ELPA_KERNEL_AVX_BLOCK6)
#endif #endif
#ifdef WITH_REAL_AVX2_BLOCK2_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK2)
#endif
#ifdef WITH_REAL_AVX2_BLOCK4_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK4)
#endif
#ifdef WITH_REAL_AVX2_BLOCK6_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK6)
#endif
#endif /* WITH_ONE_SPECIFIC_REAL_KERNEL */ #endif /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#ifdef WITH_REAL_BGP_KERNEL #ifdef WITH_REAL_BGP_KERNEL
......
...@@ -356,6 +356,7 @@ program test_real2_choose_kernel_with_api_single_precision ...@@ -356,6 +356,7 @@ program test_real2_choose_kernel_with_api_single_precision
#endif #endif
#endif #endif
#endif #endif
#ifdef WITH_REAL_AVX_BLOCK6_KERNEL #ifdef WITH_REAL_AVX_BLOCK6_KERNEL
REAL_ELPA_KERNEL_AVX_BLOCK6) REAL_ELPA_KERNEL_AVX_BLOCK6)
#else #else
...@@ -368,6 +369,18 @@ program test_real2_choose_kernel_with_api_single_precision ...@@ -368,6 +369,18 @@ program test_real2_choose_kernel_with_api_single_precision
#endif #endif
#endif #endif
#ifdef WITH_REAL_AVX2_BLOCK6_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK6)
#else
#ifdef WITH_REAL_AVX2_BLOCK4_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK4)
#else
#ifdef WITH_REAL_AVX2_BLOCK2_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK2)
#endif
#endif
#endif
#else /* WITH_ONE_SPECIFIC_REAL_KERNEL */ #else /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#ifdef WITH_REAL_SSE_BLOCK2_KERNEL #ifdef WITH_REAL_SSE_BLOCK2_KERNEL
...@@ -394,6 +407,17 @@ program test_real2_choose_kernel_with_api_single_precision ...@@ -394,6 +407,17 @@ program test_real2_choose_kernel_with_api_single_precision
REAL_ELPA_KERNEL_AVX_BLOCK6) REAL_ELPA_KERNEL_AVX_BLOCK6)
#endif #endif
#ifdef WITH_REAL_AVX2_BLOCK2_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK2)
#endif
#ifdef WITH_REAL_AVX_BLOCK4_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK4)
#endif
#ifdef WITH_REAL_AVX2_BLOCK6_KERNEL
REAL_ELPA_KERNEL_AVX2_BLOCK6)
#endif
#endif /* WITH_ONE_SPECIFIC_REAL_KERNEL */ #endif /* WITH_ONE_SPECIFIC_REAL_KERNEL */
......