Commit 09d13e2b authored by Andreas Marek's avatar Andreas Marek

Merge branch 'master' into ELPA_GPU

parents 62fe6edc 0d256c1b
......@@ -12,6 +12,7 @@ autom4te.cache
compile
config.guess
config.h.in
config.h.in~
config.sub
configure
depcomp
......@@ -19,3 +20,8 @@ install-sh
ltmain.sh
missing
test-driver
m4/libtool.m4
m4/ltoptions.m4
m4/ltsugar.m4
m4/ltversion.m4
m4/lt~obsolete.m4
jobs:
script: ./autogen.sh && ./configure && make && make check TEST_FLAGS='1500 50 16'
script:
- export LANG=C
- module load impi intel gcc mkl autotools
- ./autogen.sh
- ./configure CFLAGS="-O3 -mavx" CXXFLAGS="-O3 -mavx" FCFLAGS="-O3 -mavx" SCALAPACK_LDFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -Wl,-rpath,$MKL_HOME/lib/intel64" SCALAPACK_FCFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -I$MKLROOT/include/intel64/lp64"
- make -j 8
- make check TEST_FLAGS='1500 50 16'
......@@ -82,18 +82,18 @@ if WITH_REAL_BGQ_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_bgq.f90
endif
if WITH_REAL_SSE_KERNEL
if WITH_REAL_SSE_ASSEMBLY_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_asm_x86_64_double_precision.s
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_asm_x86_64_single_precision.s
endif
else
if WITH_COMPLEX_SSE_KERNEL
if WITH_COMPLEX_SSE_ASSEMBLY_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_asm_x86_64_double_precision.s
if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_asm_x86_64_single_precision.s
endif
endif
endif
......
......@@ -202,55 +202,77 @@ if test x"${with_ftimings}" = x"yes"; then
fi
AM_CONDITIONAL([HAVE_DETAILED_TIMINGS],[test x"$with_ftimings" = x"yes"])
AC_MSG_CHECKING(whether double-precision SSE assembler kernel can be compiled)
AC_MSG_CHECKING(whether double-precision SSE assembly kernel can be compiled)
$CC -c $srcdir/src/elpa2_kernels/elpa2_kernels_asm_x86_64_double_precision.s -o test.o 2>/dev/null
if test "$?" == 0; then
can_compile_sse=yes
install_real_sse=yes
can_compile_sse_assembly=yes
install_real_sse_assembly=yes
install_complex_sse_assembly=yes
else
can_compile_sse_assembly=no
install_real_sse_assembly=no
install_complex_sse_assembly=no
fi
rm -f ./test.o
AC_MSG_RESULT([${can_compile_sse_assembly}])
if test x"${want_single_precision}" = x"yes" ; then
AC_MSG_CHECKING(whether single-precision SSE assembly kernel can be compiled)
$CC -c $srcdir/src/elpa2_kernels/elpa2_kernels_asm_x86_64_single_precision.s -o test.o 2>/dev/null
if test "$?" == 0; then
can_compile_sse_assembly=yes
install_real_sse_assembly=yes
install_complex_sse_assembly=yes
else
can_compile_sse_assembly=no
install_real_sse_assembly=no
install_complex_sse_assembly=no
fi
rm -f ./test.o
AC_MSG_RESULT([${can_compile_sse_assembly}])
if test x"${can_compile_sse_assembly}" = x"no" ; then
AC_MSG_WARN([Cannot compile single-precision SSE assembly kernel: disabling SSE assembly kernels alltogether])
fi
fi
dnl check whether on can compile with sse-gcc intrinsics
AC_MSG_CHECKING(whether we can compile SSE with gcc intrinsics in C)
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
double* q;
__m128d h1 = _mm_loaddup_pd(q);
return 0;
}
])],
[can_compile_sse_intrinsics=yes],
[can_compile_sse_intrinsics=no]
)
AC_MSG_RESULT([${can_compile_sse_intrinsics}])
if test "${can_compile_sse_intrinsics}" = "yes"; then
install_real_sse_intrinsics=yes
install_real_sse_block2=yes
install_real_sse_block4=yes
install_real_sse_block6=yes
install_complex_sse=yes
install_complex_sse_intrinsics=yes
install_complex_sse_block1=yes
install_complex_sse_block2=yes
else
can_compile_sse=no
install_real_sse=no
install_real_sse_intrinsics=no
install_real_sse_block2=no
install_real_sse_block4=no
install_real_sse_block6=no
install_complex_sse=no
install_complex_sse_intrinsics=no
install_complex_sse_block1=no
install_complex_sse_block2=no
fi
rm -f ./test.o
AC_MSG_RESULT([${can_compile_sse}])
if test x"${want_single_precision}" = x"yes" ; then
AC_MSG_CHECKING(whether single-precision SSE assembler kernel can be compiled)
$CC -c $srcdir/src/elpa2_kernels/elpa2_kernels_asm_x86_64_double_precision.s -o test.o 2>/dev/null
if test "$?" == 0; then
can_compile_sse=yes
install_real_sse=yes
install_complex_sse=yes
else
can_compile_sse=no
install_real_sse=no
install_complex_sse=no
fi
rm -f ./test.o
AC_MSG_RESULT([${can_compile_sse}])
if test x"${can_compile_sse}" = x"no" ; then
AC_MSG_WARN([Cannot compile single-precision SSE kernel: disabling SSE kernels alltogether])
fi
fi
dnl check whether one can compile with avx - gcc intrinsics
dnl first pass: try with specified CFLAGS and CXXFLAGS
......@@ -356,10 +378,16 @@ else
install_complex_avx2_block1=no
install_complex_avx2_block2=no
fi
AM_CONDITIONAL([HAVE_SSE],[test x"$can_compile_sse" = x"yes"])
if test x"${can_compile_sse}" = x"yes" ; then
AC_DEFINE([HAVE_SSE],[1],[SSE is supported on this CPU])
AM_CONDITIONAL([HAVE_SSE_ASSEMBLY],[test x"$can_compile_sse_assembly" = x"yes"])
if test x"${can_compile_sse_assembly}" = x"yes" ; then
AC_DEFINE([HAVE_SSE_ASSEMBLY],[1],[assembly SSE is supported on this CPU])
fi
AM_CONDITIONAL([HAVE_SSE_INTRINSICS],[test x"$can_compile_sse_intrinsics" = x"yes"])
if test x"${can_compile_sse_intrinsics}" = x"yes" ; then
AC_DEFINE([HAVE_SSE_INTRINSICS],[1],[gcc intrinsics SSE is supported on this CPU])
fi
AM_CONDITIONAL([HAVE_AVX],[test x"$can_compile_avx" = x"yes"])
if test x"${can_compile_avx}" = x"yes" ; then
AC_DEFINE([HAVE_AVX],[1],[AVX is supported on this CPU])
......@@ -671,7 +699,7 @@ dnl generic-simple kernel
DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-generic-simple-kernel-only],[generic-simple-kernel],[install_real_generic_simple])
dnl sse kernel
DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-sse-kernel-only],[sse-kernel],[install_real_sse])
DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-sse-assembly-kernel-only],[sse-assembly-kernel],[install_real_sse_assembly])
dnl bgp kernel
DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-bgp-kernel-only],[bgp-kernel],[install_real_bgp])
......@@ -706,7 +734,7 @@ dnl generic-simple kernel
DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-generic-simple-kernel-only],[generic-simple-kernel],[install_complex_generic_simple])
dnl sse kernel
DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-sse-kernel-only],[sse-kernel],[install_complex_sse])
DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-sse-assembly-kernel-only],[sse-assembly-kernel],[install_complex_sse_assembly])
dnl complex-bqp kernel
DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-bgp-kernel-only],[bgp-kernel],[install_complex_bgp])
......@@ -757,14 +785,14 @@ if test x"${install_complex_generic_simple}" = x"yes" ; then
AC_DEFINE([WITH_COMPLEX_GENERIC_SIMPLE_KERNEL],[1],[can use complex generic-simple kernel])
fi
AM_CONDITIONAL([WITH_REAL_SSE_KERNEL],[test x"$install_real_sse" = x"yes"])
if test x"${install_real_sse}" = x"yes" ; then
AC_DEFINE([WITH_REAL_SSE_KERNEL],[1],[can use real SSE kernel])
AM_CONDITIONAL([WITH_REAL_SSE_ASSEMBLY_KERNEL],[test x"$install_real_sse_assembly" = x"yes"])
if test x"${install_real_sse_assembly}" = x"yes" ; then
AC_DEFINE([WITH_REAL_SSE_ASSEMBLY_KERNEL],[1],[can use real SSE assembly kernel])
fi
AM_CONDITIONAL([WITH_COMPLEX_SSE_KERNEL],[test x"$install_complex_sse" = x"yes"])
if test x"${install_complex_sse}" = x"yes" ; then
AC_DEFINE([WITH_COMPLEX_SSE_KERNEL],[1],[can use complex SSE kernel])
AM_CONDITIONAL([WITH_COMPLEX_SSE_ASSEMBLY_KERNEL],[test x"$install_complex_sse_assembly" = x"yes"])
if test x"${install_complex_sse_assembly}" = x"yes" ; then
AC_DEFINE([WITH_COMPLEX_SSE_ASSEMBLY_KERNEL],[1],[can use complex SSE assembly kernel])
fi
AM_CONDITIONAL([WITH_REAL_SSE_BLOCK2_KERNEL],[test x"$install_real_sse_block2" = x"yes"])
......
......@@ -67,7 +67,7 @@
#define __forceinline __attribute__((always_inline))
#ifdef HAVE_SSE
#ifdef HAVE_SSE_INTRINSICS
#undef __AVX__
#endif
......@@ -78,7 +78,7 @@ static __forceinline void hh_trafo_complex_kernel_4_SSE_1hv_double(double comple
static __forceinline void hh_trafo_complex_kernel_2_SSE_1hv_double(double complex* q, double complex* hh, int nb, int ldq);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE_INTRINSICS
!f> interface
!f> subroutine single_hh_trafo_complex_sse_1hv_double(q, hh, pnb, pnq, pldq) &
!f> bind(C, name="single_hh_trafo_complex_sse_1hv_double")
......
......@@ -67,7 +67,7 @@
#define __forceinline __attribute__((always_inline))
#ifdef HAVE_SSE
#ifdef HAVE_SSE_INTRINSICS
#undef __AVX__
#endif
......@@ -78,7 +78,7 @@ static __forceinline void hh_trafo_complex_kernel_4_SSE_1hv_single(complex* q, c
static __forceinline void hh_trafo_complex_kernel_2_SSE_1hv_single(complex* q, complex* hh, int nb, int ldq);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE_INTRINSICS
!f> interface
!f> subroutine single_hh_trafo_complex_sse_1hv_single(q, hh, pnb, pnq, pldq) &
!f> bind(C, name="single_hh_trafo_complex_sse_1hv_single")
......
......@@ -66,7 +66,7 @@
#define __forceinline __attribute__((always_inline))
#ifdef HAVE_SSE
#ifdef HAVE_SSE_INTRINSICS
#undef __AVX__
#endif
......@@ -78,7 +78,7 @@ static __forceinline void hh_trafo_complex_kernel_2_SSE_2hv_double(double comple
static __forceinline void hh_trafo_complex_kernel_1_SSE_2hv_double(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE_INTRINSICS
!f> interface
!f> subroutine double_hh_trafo_complex_sse_2hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="double_hh_trafo_complex_sse_2hv_double")
......
......@@ -66,7 +66,7 @@
#define __forceinline __attribute__((always_inline))
#ifdef HAVE_SSE
#ifdef HAVE_SSE_INTRINSICS
#undef __AVX__
#endif
......@@ -74,7 +74,7 @@
static __forceinline void hh_trafo_complex_kernel_4_SSE_2hv_single(complex* q, complex* hh, int nb, int ldq, int ldh, complex s, complex s1);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE_INTRINSICS
!f> interface
!f> subroutine double_hh_trafo_complex_sse_2hv_single(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="double_hh_trafo_complex_sse_2hv_single")
......
......@@ -67,7 +67,7 @@
#define __forceinline __attribute__((always_inline)) static
#ifdef HAVE_SSE
#ifdef HAVE_SSE_INTRINSICS
#undef __AVX__
#endif
......@@ -79,7 +79,7 @@ __forceinline void hh_trafo_kernel_12_SSE_2hv_double(double* q, double* hh, int
void double_hh_trafo_real_sse_2hv_double(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE_INTRINSICS
!f> interface
!f> subroutine double_hh_trafo_real_sse_2hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="double_hh_trafo_real_sse_2hv_double")
......
......@@ -67,7 +67,7 @@
#define __forceinline __attribute__((always_inline)) static
#ifdef HAVE_SSE
#ifdef HAVE_SSE_INTRINSICS
#undef __AVX__
#endif
......@@ -79,7 +79,7 @@ __forceinline void hh_trafo_kernel_12_SSE_2hv_single(float* q, float* hh, int nb
void double_hh_trafo_real_sse_2hv_single_(float* q, float* hh, int* pnb, int* pnq, int* pldq, int* pldh);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE_INTRINSICS
!f> interface
!f> subroutine double_hh_trafo_real_sse_2hv_single(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="double_hh_trafo_real_sse_2hv_single")
......
......@@ -66,7 +66,7 @@
#define __forceinline __attribute__((always_inline)) static
#ifdef HAVE_SSE
#ifdef HAVE_SSE_INTRINSICS
#undef __AVX__
#endif
......@@ -78,7 +78,7 @@ __forceinline void hh_trafo_kernel_6_SSE_4hv_double(double* q, double* hh, int n
void quad_hh_trafo_real_sse_4hv_double(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE_INTRINSICS
!f> interface
!f> subroutine quad_hh_trafo_real_sse_4hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="quad_hh_trafo_real_sse_4hv_double")
......
......@@ -66,7 +66,7 @@
#define __forceinline __attribute__((always_inline)) static
#ifdef HAVE_SSE
#ifdef HAVE_SSE_INTRINSICS
#undef __AVX__
#endif
__forceinline void hh_trafo_kernel_4_SSE_4hv_single(float* q, float* hh, int nb, int ldq, int ldh, float s_1_2, float s_1_3, float s_2_3, float s_1_4, float s_2_4, float s_3_4);
......@@ -76,7 +76,7 @@ __forceinline void hh_trafo_kernel_12_SSE_4hv_single(float* q, float* hh, int nb
void quad_hh_trafo_real_sse_4hv_single_(float* q, float* hh, int* pnb, int* pnq, int* pldq, int* pldh);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE_INTRINSICS
!f> interface
!f> subroutine quad_hh_trafo_real_sse_4hv_single(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="quad_hh_trafo_real_sse_4hv_single")
......
......@@ -66,7 +66,7 @@
#define __forceinline __attribute__((always_inline)) static
#ifdef HAVE_SSE
#ifdef HAVE_SSE_INTRINSICS
#undef __AVX__
#endif
......@@ -76,7 +76,7 @@ static void hh_trafo_kernel_4_SSE_6hv_double(double* q, double* hh, int nb, int
void hexa_hh_trafo_real_sse_6hv_double(double* q, double* hh, int* pnb, int* pnq, int* pldq, int* pldh);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE_INTRINSICS
!f> interface
!f> subroutine hexa_hh_trafo_real_sse_6hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="hexa_hh_trafo_real_sse_6hv_double")
......
......@@ -66,7 +66,7 @@
#define __forceinline __attribute__((always_inline)) static
#ifdef HAVE_SSE
#ifdef HAVE_SSE_INTRINSICS
#undef __AVX__
#endif
......@@ -80,7 +80,7 @@ static void hh_trafo_kernel_8_SSE_6hv_single(float* q, float* hh, int nb, int ld
void hexa_hh_trafo_real_sse_6hv_single_(float* q, float* hh, int* pnb, int* pnq, int* pldq, int* pldh);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE_INTRINSICS
!f> interface
!f> subroutine hexa_hh_trafo_real_sse_6hv_single(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="hexa_hh_trafo_real_sse_6hv_single")
......
......@@ -132,7 +132,7 @@ module ELPA2_utilities
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_GENERIC_SIMPLE
#endif
#ifdef WITH_REAL_SSE_KERNEL
#ifdef WITH_REAL_SSE_ASSEMBLY_KERNEL
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_SSE
#endif
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
......@@ -168,7 +168,7 @@ module ELPA2_utilities
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_GENERIC_SIMPLE
#endif
#ifdef WITH_REAL_SSE_KERNEL
#ifdef WITH_REAL_SSE_ASSEMBLY_KERNEL
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_SSE
#endif
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
......@@ -238,7 +238,7 @@ module ELPA2_utilities
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE
#endif
#ifdef WITH_COMPLEX_SSE_KERNEL
#ifdef WITH_COMPLEX_SSE_ASSEMBLY_KERNEL
integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_SSE
#endif
#ifdef WITH_COMPLEX_AVX1_BLOCK1_KERNEL
......@@ -267,7 +267,7 @@ module ELPA2_utilities
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE
#endif
#ifdef WITH_COMPLEX_SSE_KERNEL
#ifdef WITH_COMPLEX_SSE_ASSEMBLY_KERNEL
integer(kind=ik), parameter :: DEFAULT_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_SSE
#endif
#ifdef WITH_COMPLEX_AVX1_BLOCK1_KERNEL
......@@ -321,7 +321,7 @@ module ELPA2_utilities
#else
,0 &
#endif
#if WITH_REAL_SSE_KERNEL
#if WITH_REAL_SSE_ASSEMBLY_KERNEL
,1 &
#else
,0 &
......@@ -402,7 +402,7 @@ module ELPA2_utilities
#else
,0 &
#endif
#if WITH_COMPLEX_SSE_KERNEL
#if WITH_COMPLEX_SSE_ASSEMBLY_KERNEL
,1 &
#else
,0 &
......
......@@ -87,7 +87,7 @@ module compute_hh_trafo_complex
#endif
use iso_c_binding
#if defined(HAVE_AVX) || defined(HAVE_SSE)
#if defined(HAVE_AVX) || defined(HAVE_SSE_INTRINSICS) || defined(HAVE_SSE_ASSEMBLY)
use kernel_interfaces
#endif
implicit none
......@@ -271,7 +271,7 @@ module compute_hh_trafo_complex
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_GENERIC_KERNEL */
#if defined(WITH_COMPLEX_SSE_KERNEL)
#if defined(WITH_COMPLEX_SSE_ASSEMBLY_KERNEL)
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
if (THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_SSE) then
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
......@@ -288,7 +288,7 @@ module compute_hh_trafo_complex
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_SSE_KERNEL */
#endif /* WITH_COMPLEX_SSE_ASSEMBLY_KERNEL */
!#if defined(WITH_AVX_SANDYBRIDGE)
......@@ -386,7 +386,7 @@ module compute_hh_trafo_complex
use timings
#endif
#if defined(HAVE_AVX) || defined(HAVE_SSE)
#if defined(HAVE_AVX) || defined(HAVE_SSE_INTRINSICS) || defined(HAVE_SSE_ASSEMBLY)
use kernel_interfaces
#endif
use iso_c_binding
......@@ -569,7 +569,7 @@ module compute_hh_trafo_complex
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_GENERIC_KERNEL */
#if defined(WITH_COMPLEX_SSE_KERNEL)
#if defined(WITH_COMPLEX_SSE_ASSEMBLY_KERNEL)
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
if (THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_SSE) then
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
......@@ -586,7 +586,7 @@ module compute_hh_trafo_complex
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_SSE_KERNEL */
#endif /* WITH_COMPLEX_SSE_ASSEMBLY_KERNEL */
!#if defined(WITH_AVX_SANDYBRIDGE)
......
......@@ -104,7 +104,7 @@ module compute_hh_trafo_real
use timings
#endif
#if defined(HAVE_AVX) || defined(HAVE_SSE)
#if defined(HAVE_AVX) || defined(HAVE_SSE_INTRINSICS) || defined(HAVE_SSE_ASSEMBLY)
use kernel_interfaces
#endif
implicit none
......@@ -275,7 +275,7 @@ module compute_hh_trafo_real
#endif /* WITH_REAL_GENERIC_SIMPLE_KERNEL */
#if defined(WITH_REAL_SSE_KERNEL)
#if defined(WITH_REAL_SSE_ASSEMBLY_KERNEL)
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
if (THIS_REAL_ELPA_KERNEL .eq. REAL_ELPA_KERNEL_SSE) then
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
......@@ -293,7 +293,7 @@ module compute_hh_trafo_real
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#endif /* WITH_REAL_SSE_KERNEL */
#endif /* WITH_REAL_SSE_ASSEMBLY_KERNEL */
#if defined(WITH_REAL_SSE_BLOCK2_KERNEL)
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
......@@ -661,7 +661,7 @@ module compute_hh_trafo_real
use timings
#endif
#if defined(HAVE_AVX) || defined(HAVE_SSE)
#if defined(HAVE_AVX) || defined(HAVE_SSE_INTRINSICS) || defined(HAVE_SSE_ASSEMBLY)
use kernel_interfaces
#endif
implicit none
......@@ -831,7 +831,7 @@ module compute_hh_trafo_real
#endif /* WITH_REAL_GENERIC_SIMPLE_KERNEL */
#if defined(WITH_REAL_SSE_KERNEL)
#if defined(WITH_REAL_SSE_ASSEMBLY_KERNEL)
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
if (THIS_REAL_ELPA_KERNEL .eq. REAL_ELPA_KERNEL_SSE) then
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
......@@ -849,7 +849,7 @@ module compute_hh_trafo_real
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#endif /* WITH_REAL_SSE_KERNEL */
#endif /* WITH_REAL_SSE_ASSEMBLY_KERNEL */
#if defined(WITH_REAL_SSE_BLOCK2_KERNEL)
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment