Commit 53f2f2c6 authored by Andreas Marek's avatar Andreas Marek

ELPA_2014.06 prepare release

Now it is possible
- to choose the kernel (real and complex independently) at run-time
  via environment variables, or
- to specify the kernel (real and complex independently) at runtime
  via specifing the kernel in the call to ELPA

This has a few implications
1) The ELPA 2014.06 release has a change in the API and is thus not
   binary compatible with previous versions
2) if no kernels are specified, a default kernel is choosen
3) if a wrong kernel is specified, a default kernel is choosen

For sake of simplicity it is still possible to build ELPA with
support for only one kernel, as in previous versions. However, it is
still not binary compatible to previous versions
parent c090a89f
This diff is collapsed.
This diff is collapsed.
......@@ -3,6 +3,9 @@
/* Define to 1 if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* Fortran can querry environment variables */
#undef HAVE_ENVIRONMENT_CHECKING
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
......@@ -67,45 +70,47 @@
/* Version number of package */
#undef VERSION
/* use kernel tuned for AVX on AMD Bulldozer (written in gcc assembler) */
#undef WITH_AMD_BULLDOZER
/* can use complex_avx_block1 kernel */
#undef WITH_COMPLEX_AVX_BLOCK1_KERNEL
/* use AVX optimized complex kernel with blocking 1 (written in gcc assembler)
*/
#undef WITH_AVX_COMPLEX_BLOCK1
/* can use complex_avx_block2 kernel */
#undef WITH_COMPLEX_AVX_BLOCK2_KERNEL
/* use AVX optimized complex kernel with blocking 2 (written in gcc assembler)
*/
#undef WITH_AVX_COMPLEX_BLOCK2
/* can use complex generic kernel */
#undef WITH_COMPLEX_GENERIC_KERNEL
/* use AVX optimized real kernel with blocking 2 (written in gcc assembler) */
#undef WITH_AVX_REAL_BLOCK2
/* can use complex generic-simple kernel */
#undef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
/* use AVX optimized real kernel with blocking 4 (written in gcc assembler) */
#undef WITH_AVX_REAL_BLOCK4
/* can use complex SSE kernel */
#undef WITH_COMPLEX_SSE_KERNEL
/* use AVX optimized real kernel with blocking 6 (written in gcc assembler) */
#undef WITH_AVX_REAL_BLOCK6
/* use OpenMP threading */
#undef WITH_OPENMP
/* use kernel tuned for AVX on Intel Sandybridge (written in gcc assembler) */
#undef WITH_AVX_SANDYBRIDGE
/* can use real_avx_block2 kernel */
#undef WITH_REAL_AVX_BLOCK2_KERNEL
/* use optimized kernel for IBM BG/P */
#undef WITH_BGP
/* can use real_avx_block4 kernel */
#undef WITH_REAL_AVX_BLOCK4_KERNEL
/* use optimized kernel for IBM BG/Q */
#undef WITH_BGQ
/* can use real_avx_block6 kernel */
#undef WITH_REAL_AVX_BLOCK6_KERNEL
/* use generic kernel for all architectures (with some hand-coded
optimizations) */
#undef WITH_GENERIC
/* can use real BGP kernel */
#undef WITH_REAL_BGP_KERNEL
/* use generic simple kernel for all architectures (without any hand-coded
optimizations) */
#undef WITH_GENERIC_SIMPLE
/* can use real BGQ kernel */
#undef WITH_REAL_BGQ_KERNEL
/* use OpenMP threading */
#undef WITH_OPENMP
/* can use real generic kernel */
#undef WITH_REAL_GENERIC_KERNEL
/* can use real generic-simple kernel */
#undef WITH_REAL_GENERIC_SIMPLE_KERNEL
/* can use real SSE kernel */
#undef WITH_REAL_SSE_KERNEL
/* use kernel tuned for SSE (written in gcc assembler) */
#undef WITH_SSE_AS
/* use specific real kernel */
#undef WITH_SPECIFIC_COMPLEX_KERNEL
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -130,7 +130,7 @@ static __forceinline void hh_trafo_complex_kernel_4_C_1hv(std::complex<double>*
}
#endif // if 0
void single_hh_trafo_complex_(std::complex<double>* q, std::complex<double>* hh, int* pnb, int* pnq, int* pldq)
void single_hh_trafo_complex_sse_avx_1hv_(std::complex<double>* q, std::complex<double>* hh, int* pnb, int* pnq, int* pldq)
{
int i;
int nb = *pnb;
......
......@@ -179,7 +179,7 @@ static __forceinline void hh_trafo_complex_kernel_4_C_2hv(std::complex<double>*
}
#endif
void double_hh_trafo_complex_(std::complex<double>* q, std::complex<double>* hh, int* pnb, int* pnq, int* pldq, int* pldh)
void double_hh_trafo_complex_sse_avx_2hv_(std::complex<double>* q, std::complex<double>* hh, int* pnb, int* pnq, int* pldq, int* pldh)
{
int i;
int nb = *pnb;
......
......@@ -77,12 +77,12 @@ __forceinline void hh_trafo_kernel_8_SSE_2hv(double* q, double* hh, int nb, int
__forceinline void hh_trafo_kernel_12_SSE_2hv(double* q, double* hh, int nb, int ldq, int ldh, double s);
#endif
void double_hh_trafo_(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh);
void double_hh_trafo_real_sse_avx_2hv_(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh);
#if 0
void double_hh_trafo_fast_(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh);
#endif
void double_hh_trafo_(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh)
void double_hh_trafo_real_sse_avx_2hv_(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh)
{
int i;
int nb = *pnb;
......
......@@ -75,12 +75,12 @@ __forceinline void hh_trafo_kernel_4_SSE_4hv(double* q, double* hh, int nb, int
__forceinline void hh_trafo_kernel_6_SSE_4hv(double* q, double* hh, int nb, int ldq, int ldh, double s_1_2, double s_1_3, double s_2_3, double s_1_4, double s_2_4, double s_3_4);
#endif
void quad_hh_trafo_(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh);
void quad_hh_trafo_real_sse_avx_4hv_(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh);
#if 0
void quad_hh_trafo_fast_(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh);
#endif
void quad_hh_trafo_(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh)
void quad_hh_trafo_real_sse_avx_4hv_(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh)
{
int i;
int nb = *pnb;
......
......@@ -73,12 +73,12 @@ static void hh_trafo_kernel_2_SSE_6hv(double* q, double* hh, int nb, int ldq, in
static void hh_trafo_kernel_4_SSE_6hv(double* q, double* hh, int nb, int ldq, int ldh, double* scalarprods);
#endif
void hexa_hh_trafo_(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh);
void hexa_hh_trafo_real_sse_avx_6hv_(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh);
#if 0
void hexa_hh_trafo_fast_(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh);
#endif
void hexa_hh_trafo_(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh)
void hexa_hh_trafo_real_sse_avx_6hv_(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh)
{
int i;
int nb = *pnb;
......
This diff is collapsed.
......@@ -161,30 +161,23 @@ program test_complex2
if (myid .eq. 0) then
print *," "
print *,"This ELPA2 is build with"
#ifdef WITH_AVX_COMPLEX_BLOCK2
#ifdef WITH_COMPLEX_AVX_BLOCK2_KERNEL
print *,"AVX optimized kernel (2 blocking) for complex matrices"
#endif
#ifdef WITH_AVX_COMPLEX_BLOCK1
#ifdef WITH_COMPLEX_AVX_BLOCK1_KERNEL
print *,"AVX optimized kernel (1 blocking) for complex matrices"
#endif
#ifdef WITH_AVX_SANDYBRIDGE
print *,"AVX SANDYBRIDGE optimized kernel for complex matrices"
#endif
#ifdef WITH_GENERIC
#ifdef WITH_COMPLEX_GENERIC_KERNEL
print *,"GENERIC kernel for complex matrices"
#endif
#ifdef WITH_GENERIC_SIMPLE
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
print *,"GENERIC SIMPLE kernel for complex matrices"
#endif
#ifdef WITH_SSE_AS
#ifdef WITH_COMPLEX_SSE_KERNEL
print *,"SSE ASSEMBLER kernel for complex matrices"
#endif
#ifdef WITH_BGP
print *,"BGP kernel for complex matrices"
#endif
#ifdef WITH_BGQ
print *,"BGQ kernel for complex matrices"
#endif
endif
if (arg4 .eq. "output") then
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment