Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
09d13e2b
Commit
09d13e2b
authored
Apr 20, 2016
by
Andreas Marek
Browse files
Merge branch 'master' into ELPA_GPU
parents
62fe6edc
0d256c1b
Changes
17
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
.gitignore
View file @
09d13e2b
...
@@ -12,6 +12,7 @@ autom4te.cache
...
@@ -12,6 +12,7 @@ autom4te.cache
compile
compile
config.guess
config.guess
config.h.in
config.h.in
config.h.in~
config.sub
config.sub
configure
configure
depcomp
depcomp
...
@@ -19,3 +20,8 @@ install-sh
...
@@ -19,3 +20,8 @@ install-sh
ltmain.sh
ltmain.sh
missing
missing
test-driver
test-driver
m4/libtool.m4
m4/ltoptions.m4
m4/ltsugar.m4
m4/ltversion.m4
m4/lt~obsolete.m4
.gitlab-ci.yml
View file @
09d13e2b
jobs
:
jobs
:
script
:
./autogen.sh && ./configure && make && make check TEST_FLAGS='1500 50 16'
script
:
-
export LANG=C
-
module load impi intel gcc mkl autotools
-
./autogen.sh
-
./configure CFLAGS="-O3 -mavx" CXXFLAGS="-O3 -mavx" FCFLAGS="-O3 -mavx" SCALAPACK_LDFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -Wl,-rpath,$MKL_HOME/lib/intel64" SCALAPACK_FCFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -I$MKLROOT/include/intel64/lp64"
-
make -j
8
-
make check TEST_FLAGS='1500 50 16'
Makefile.am
View file @
09d13e2b
...
@@ -82,18 +82,18 @@ if WITH_REAL_BGQ_KERNEL
...
@@ -82,18 +82,18 @@ if WITH_REAL_BGQ_KERNEL
libelpa@SUFFIX@
_la_SOURCES
+=
src/elpa2_kernels/elpa2_kernels_real_bgq.f90
libelpa@SUFFIX@
_la_SOURCES
+=
src/elpa2_kernels/elpa2_kernels_real_bgq.f90
endif
endif
if
WITH_REAL_SSE_KERNEL
if
WITH_REAL_SSE_
ASSEMBLY_
KERNEL
libelpa@SUFFIX@
_la_SOURCES
+=
src/elpa2_kernels/elpa2_kernels_asm_x86_64_double_precision.s
libelpa@SUFFIX@
_la_SOURCES
+=
src/elpa2_kernels/elpa2_kernels_asm_x86_64_double_precision.s
if
WANT_SINGLE_PRECISION_REAL
if
WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@
_la_SOURCES
+=
src/elpa2_kernels/elpa2_kernels_asm_x86_64_single_precision.s
libelpa@SUFFIX@
_la_SOURCES
+=
src/elpa2_kernels/elpa2_kernels_asm_x86_64_single_precision.s
endif
endif
else
else
if
WITH_COMPLEX_SSE_KERNEL
if
WITH_COMPLEX_SSE_
ASSEMBLY_
KERNEL
libelpa@SUFFIX@
_la_SOURCES
+=
src/elpa2_kernels/elpa2_kernels_asm_x86_64_double_precision.s
libelpa@SUFFIX@
_la_SOURCES
+=
src/elpa2_kernels/elpa2_kernels_asm_x86_64_double_precision.s
if
WANT_SINGLE_PRECISION_COMPLEX
if
WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@
_la_SOURCES
+=
src/elpa2_kernels/elpa2_kernels_asm_x86_64_single_precision.s
libelpa@SUFFIX@
_la_SOURCES
+=
src/elpa2_kernels/elpa2_kernels_asm_x86_64_single_precision.s
endif
endif
endif
endif
endif
endif
...
...
configure.ac
View file @
09d13e2b
...
@@ -202,55 +202,77 @@ if test x"${with_ftimings}" = x"yes"; then
...
@@ -202,55 +202,77 @@ if test x"${with_ftimings}" = x"yes"; then
fi
fi
AM_CONDITIONAL([HAVE_DETAILED_TIMINGS],[test x"$with_ftimings" = x"yes"])
AM_CONDITIONAL([HAVE_DETAILED_TIMINGS],[test x"$with_ftimings" = x"yes"])
AC_MSG_CHECKING(whether double-precision SSE assembl
er
kernel can be compiled)
AC_MSG_CHECKING(whether double-precision SSE assembl
y
kernel can be compiled)
$CC -c $srcdir/src/elpa2_kernels/elpa2_kernels_asm_x86_64_double_precision.s -o test.o 2>/dev/null
$CC -c $srcdir/src/elpa2_kernels/elpa2_kernels_asm_x86_64_double_precision.s -o test.o 2>/dev/null
if test "$?" == 0; then
if test "$?" == 0; then
can_compile_sse=yes
can_compile_sse_assembly=yes
install_real_sse=yes
install_real_sse_assembly=yes
install_complex_sse_assembly=yes
else
can_compile_sse_assembly=no
install_real_sse_assembly=no
install_complex_sse_assembly=no
fi
rm -f ./test.o
AC_MSG_RESULT([${can_compile_sse_assembly}])
if test x"${want_single_precision}" = x"yes" ; then
AC_MSG_CHECKING(whether single-precision SSE assembly kernel can be compiled)
$CC -c $srcdir/src/elpa2_kernels/elpa2_kernels_asm_x86_64_single_precision.s -o test.o 2>/dev/null
if test "$?" == 0; then
can_compile_sse_assembly=yes
install_real_sse_assembly=yes
install_complex_sse_assembly=yes
else
can_compile_sse_assembly=no
install_real_sse_assembly=no
install_complex_sse_assembly=no
fi
rm -f ./test.o
AC_MSG_RESULT([${can_compile_sse_assembly}])
if test x"${can_compile_sse_assembly}" = x"no" ; then
AC_MSG_WARN([Cannot compile single-precision SSE assembly kernel: disabling SSE assembly kernels alltogether])
fi
fi
dnl check whether on can compile with sse-gcc intrinsics
AC_MSG_CHECKING(whether we can compile SSE with gcc intrinsics in C)
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
double* q;
__m128d h1 = _mm_loaddup_pd(q);
return 0;
}
])],
[can_compile_sse_intrinsics=yes],
[can_compile_sse_intrinsics=no]
)
AC_MSG_RESULT([${can_compile_sse_intrinsics}])
if test "${can_compile_sse_intrinsics}" = "yes"; then
install_real_sse_intrinsics=yes
install_real_sse_block2=yes
install_real_sse_block2=yes
install_real_sse_block4=yes
install_real_sse_block4=yes
install_real_sse_block6=yes
install_real_sse_block6=yes
install_complex_sse=yes
install_complex_sse
_intrinsics
=yes
install_complex_sse_block1=yes
install_complex_sse_block1=yes
install_complex_sse_block2=yes
install_complex_sse_block2=yes
else
else
can_compile_sse=no
install_real_sse_intrinsics=no
install_real_sse=no
install_real_sse_block2=no
install_real_sse_block2=no
install_real_sse_block4=no
install_real_sse_block4=no
install_real_sse_block6=no
install_real_sse_block6=no
install_complex_sse=no
install_complex_sse
_intrinsics
=no
install_complex_sse_block1=no
install_complex_sse_block1=no
install_complex_sse_block2=no
install_complex_sse_block2=no
fi
fi
rm -f ./test.o
AC_MSG_RESULT([${can_compile_sse}])
if test x"${want_single_precision}" = x"yes" ; then
AC_MSG_CHECKING(whether single-precision SSE assembler kernel can be compiled)
$CC -c $srcdir/src/elpa2_kernels/elpa2_kernels_asm_x86_64_double_precision.s -o test.o 2>/dev/null
if test "$?" == 0; then
can_compile_sse=yes
install_real_sse=yes
install_complex_sse=yes
else
can_compile_sse=no
install_real_sse=no
install_complex_sse=no
fi
rm -f ./test.o
AC_MSG_RESULT([${can_compile_sse}])
if test x"${can_compile_sse}" = x"no" ; then
AC_MSG_WARN([Cannot compile single-precision SSE kernel: disabling SSE kernels alltogether])
fi
fi
dnl check whether one can compile with avx - gcc intrinsics
dnl check whether one can compile with avx - gcc intrinsics
dnl first pass: try with specified CFLAGS and CXXFLAGS
dnl first pass: try with specified CFLAGS and CXXFLAGS
...
@@ -356,10 +378,16 @@ else
...
@@ -356,10 +378,16 @@ else
install_complex_avx2_block1=no
install_complex_avx2_block1=no
install_complex_avx2_block2=no
install_complex_avx2_block2=no
fi
fi
AM_CONDITIONAL([HAVE_SSE],[test x"$can_compile_sse" = x"yes"])
if test x"${can_compile_sse}" = x"yes" ; then
AM_CONDITIONAL([HAVE_SSE_ASSEMBLY],[test x"$can_compile_sse_assembly" = x"yes"])
AC_DEFINE([HAVE_SSE],[1],[SSE is supported on this CPU])
if test x"${can_compile_sse_assembly}" = x"yes" ; then
AC_DEFINE([HAVE_SSE_ASSEMBLY],[1],[assembly SSE is supported on this CPU])
fi
AM_CONDITIONAL([HAVE_SSE_INTRINSICS],[test x"$can_compile_sse_intrinsics" = x"yes"])
if test x"${can_compile_sse_intrinsics}" = x"yes" ; then
AC_DEFINE([HAVE_SSE_INTRINSICS],[1],[gcc intrinsics SSE is supported on this CPU])
fi
fi
AM_CONDITIONAL([HAVE_AVX],[test x"$can_compile_avx" = x"yes"])
AM_CONDITIONAL([HAVE_AVX],[test x"$can_compile_avx" = x"yes"])
if test x"${can_compile_avx}" = x"yes" ; then
if test x"${can_compile_avx}" = x"yes" ; then
AC_DEFINE([HAVE_AVX],[1],[AVX is supported on this CPU])
AC_DEFINE([HAVE_AVX],[1],[AVX is supported on this CPU])
...
@@ -671,7 +699,7 @@ dnl generic-simple kernel
...
@@ -671,7 +699,7 @@ dnl generic-simple kernel
DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-generic-simple-kernel-only],[generic-simple-kernel],[install_real_generic_simple])
DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-generic-simple-kernel-only],[generic-simple-kernel],[install_real_generic_simple])
dnl sse kernel
dnl sse kernel
DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-sse-kernel-only],[sse-kernel],[install_real_sse])
DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-sse-
assembly-
kernel-only],[sse-
assembly-
kernel],[install_real_sse
_assembly
])
dnl bgp kernel
dnl bgp kernel
DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-bgp-kernel-only],[bgp-kernel],[install_real_bgp])
DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-bgp-kernel-only],[bgp-kernel],[install_real_bgp])
...
@@ -706,7 +734,7 @@ dnl generic-simple kernel
...
@@ -706,7 +734,7 @@ dnl generic-simple kernel
DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-generic-simple-kernel-only],[generic-simple-kernel],[install_complex_generic_simple])
DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-generic-simple-kernel-only],[generic-simple-kernel],[install_complex_generic_simple])
dnl sse kernel
dnl sse kernel
DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-sse-kernel-only],[sse-kernel],[install_complex_sse])
DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-sse-
assembly-
kernel-only],[sse-
assembly-
kernel],[install_complex_sse
_assembly
])
dnl complex-bqp kernel
dnl complex-bqp kernel
DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-bgp-kernel-only],[bgp-kernel],[install_complex_bgp])
DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-bgp-kernel-only],[bgp-kernel],[install_complex_bgp])
...
@@ -757,14 +785,14 @@ if test x"${install_complex_generic_simple}" = x"yes" ; then
...
@@ -757,14 +785,14 @@ if test x"${install_complex_generic_simple}" = x"yes" ; then
AC_DEFINE([WITH_COMPLEX_GENERIC_SIMPLE_KERNEL],[1],[can use complex generic-simple kernel])
AC_DEFINE([WITH_COMPLEX_GENERIC_SIMPLE_KERNEL],[1],[can use complex generic-simple kernel])
fi
fi
AM_CONDITIONAL([WITH_REAL_SSE_KERNEL],[test x"$install_real_sse" = x"yes"])
AM_CONDITIONAL([WITH_REAL_SSE_
ASSEMBLY_
KERNEL],[test x"$install_real_sse
_assembly
" = x"yes"])
if test x"${install_real_sse}" = x"yes" ; then
if test x"${install_real_sse
_assembly
}" = x"yes" ; then
AC_DEFINE([WITH_REAL_SSE_KERNEL],[1],[can use real SSE kernel])
AC_DEFINE([WITH_REAL_SSE_
ASSEMBLY_
KERNEL],[1],[can use real SSE
assembly
kernel])
fi
fi
AM_CONDITIONAL([WITH_COMPLEX_SSE_KERNEL],[test x"$install_complex_sse" = x"yes"])
AM_CONDITIONAL([WITH_COMPLEX_SSE_
ASSEMBLY_
KERNEL],[test x"$install_complex_sse
_assembly
" = x"yes"])
if test x"${install_complex_sse}" = x"yes" ; then
if test x"${install_complex_sse
_assembly
}" = x"yes" ; then
AC_DEFINE([WITH_COMPLEX_SSE_KERNEL],[1],[can use complex SSE kernel])
AC_DEFINE([WITH_COMPLEX_SSE_
ASSEMBLY_
KERNEL],[1],[can use complex SSE
assembly
kernel])
fi
fi
AM_CONDITIONAL([WITH_REAL_SSE_BLOCK2_KERNEL],[test x"$install_real_sse_block2" = x"yes"])
AM_CONDITIONAL([WITH_REAL_SSE_BLOCK2_KERNEL],[test x"$install_real_sse_block2" = x"yes"])
...
...
src/elpa2_kernels/elpa2_kernels_complex_sse_1hv_double_precision.c
View file @
09d13e2b
...
@@ -67,7 +67,7 @@
...
@@ -67,7 +67,7 @@
#define __forceinline __attribute__((always_inline))
#define __forceinline __attribute__((always_inline))
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#undef __AVX__
#endif
#endif
...
@@ -78,7 +78,7 @@ static __forceinline void hh_trafo_complex_kernel_4_SSE_1hv_double(double comple
...
@@ -78,7 +78,7 @@ static __forceinline void hh_trafo_complex_kernel_4_SSE_1hv_double(double comple
static
__forceinline
void
hh_trafo_complex_kernel_2_SSE_1hv_double
(
double
complex
*
q
,
double
complex
*
hh
,
int
nb
,
int
ldq
);
static
__forceinline
void
hh_trafo_complex_kernel_2_SSE_1hv_double
(
double
complex
*
q
,
double
complex
*
hh
,
int
nb
,
int
ldq
);
/*
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> interface
!f> subroutine single_hh_trafo_complex_sse_1hv_double(q, hh, pnb, pnq, pldq) &
!f> subroutine single_hh_trafo_complex_sse_1hv_double(q, hh, pnb, pnq, pldq) &
!f> bind(C, name="single_hh_trafo_complex_sse_1hv_double")
!f> bind(C, name="single_hh_trafo_complex_sse_1hv_double")
...
...
src/elpa2_kernels/elpa2_kernels_complex_sse_1hv_single_precision.c
View file @
09d13e2b
...
@@ -67,7 +67,7 @@
...
@@ -67,7 +67,7 @@
#define __forceinline __attribute__((always_inline))
#define __forceinline __attribute__((always_inline))
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#undef __AVX__
#endif
#endif
...
@@ -78,7 +78,7 @@ static __forceinline void hh_trafo_complex_kernel_4_SSE_1hv_single(complex* q, c
...
@@ -78,7 +78,7 @@ static __forceinline void hh_trafo_complex_kernel_4_SSE_1hv_single(complex* q, c
static
__forceinline
void
hh_trafo_complex_kernel_2_SSE_1hv_single
(
complex
*
q
,
complex
*
hh
,
int
nb
,
int
ldq
);
static
__forceinline
void
hh_trafo_complex_kernel_2_SSE_1hv_single
(
complex
*
q
,
complex
*
hh
,
int
nb
,
int
ldq
);
/*
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> interface
!f> subroutine single_hh_trafo_complex_sse_1hv_single(q, hh, pnb, pnq, pldq) &
!f> subroutine single_hh_trafo_complex_sse_1hv_single(q, hh, pnb, pnq, pldq) &
!f> bind(C, name="single_hh_trafo_complex_sse_1hv_single")
!f> bind(C, name="single_hh_trafo_complex_sse_1hv_single")
...
...
src/elpa2_kernels/elpa2_kernels_complex_sse_2hv_double_precision.c
View file @
09d13e2b
...
@@ -66,7 +66,7 @@
...
@@ -66,7 +66,7 @@
#define __forceinline __attribute__((always_inline))
#define __forceinline __attribute__((always_inline))
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#undef __AVX__
#endif
#endif
...
@@ -78,7 +78,7 @@ static __forceinline void hh_trafo_complex_kernel_2_SSE_2hv_double(double comple
...
@@ -78,7 +78,7 @@ static __forceinline void hh_trafo_complex_kernel_2_SSE_2hv_double(double comple
static
__forceinline
void
hh_trafo_complex_kernel_1_SSE_2hv_double
(
double
complex
*
q
,
double
complex
*
hh
,
int
nb
,
int
ldq
,
int
ldh
,
double
complex
s
);
static
__forceinline
void
hh_trafo_complex_kernel_1_SSE_2hv_double
(
double
complex
*
q
,
double
complex
*
hh
,
int
nb
,
int
ldq
,
int
ldh
,
double
complex
s
);
/*
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> interface
!f> subroutine double_hh_trafo_complex_sse_2hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> subroutine double_hh_trafo_complex_sse_2hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="double_hh_trafo_complex_sse_2hv_double")
!f> bind(C, name="double_hh_trafo_complex_sse_2hv_double")
...
...
src/elpa2_kernels/elpa2_kernels_complex_sse_2hv_single_precision.c
View file @
09d13e2b
...
@@ -66,7 +66,7 @@
...
@@ -66,7 +66,7 @@
#define __forceinline __attribute__((always_inline))
#define __forceinline __attribute__((always_inline))
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#undef __AVX__
#endif
#endif
...
@@ -74,7 +74,7 @@
...
@@ -74,7 +74,7 @@
static
__forceinline
void
hh_trafo_complex_kernel_4_SSE_2hv_single
(
complex
*
q
,
complex
*
hh
,
int
nb
,
int
ldq
,
int
ldh
,
complex
s
,
complex
s1
);
static
__forceinline
void
hh_trafo_complex_kernel_4_SSE_2hv_single
(
complex
*
q
,
complex
*
hh
,
int
nb
,
int
ldq
,
int
ldh
,
complex
s
,
complex
s1
);
/*
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> interface
!f> subroutine double_hh_trafo_complex_sse_2hv_single(q, hh, pnb, pnq, pldq, pldh) &
!f> subroutine double_hh_trafo_complex_sse_2hv_single(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="double_hh_trafo_complex_sse_2hv_single")
!f> bind(C, name="double_hh_trafo_complex_sse_2hv_single")
...
...
src/elpa2_kernels/elpa2_kernels_real_sse_2hv_double_precision.c
View file @
09d13e2b
...
@@ -67,7 +67,7 @@
...
@@ -67,7 +67,7 @@
#define __forceinline __attribute__((always_inline)) static
#define __forceinline __attribute__((always_inline)) static
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#undef __AVX__
#endif
#endif
...
@@ -79,7 +79,7 @@ __forceinline void hh_trafo_kernel_12_SSE_2hv_double(double* q, double* hh, int
...
@@ -79,7 +79,7 @@ __forceinline void hh_trafo_kernel_12_SSE_2hv_double(double* q, double* hh, int
void
double_hh_trafo_real_sse_2hv_double
(
double
*
q
,
double
*
hh
,
int
*
pnb
,
int
*
pnq
,
int
*
pldq
,
int
*
pldh
);
void
double_hh_trafo_real_sse_2hv_double
(
double
*
q
,
double
*
hh
,
int
*
pnb
,
int
*
pnq
,
int
*
pldq
,
int
*
pldh
);
/*
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> interface
!f> subroutine double_hh_trafo_real_sse_2hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> subroutine double_hh_trafo_real_sse_2hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="double_hh_trafo_real_sse_2hv_double")
!f> bind(C, name="double_hh_trafo_real_sse_2hv_double")
...
...
src/elpa2_kernels/elpa2_kernels_real_sse_2hv_single_precision.c
View file @
09d13e2b
...
@@ -67,7 +67,7 @@
...
@@ -67,7 +67,7 @@
#define __forceinline __attribute__((always_inline)) static
#define __forceinline __attribute__((always_inline)) static
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#undef __AVX__
#endif
#endif
...
@@ -79,7 +79,7 @@ __forceinline void hh_trafo_kernel_12_SSE_2hv_single(float* q, float* hh, int nb
...
@@ -79,7 +79,7 @@ __forceinline void hh_trafo_kernel_12_SSE_2hv_single(float* q, float* hh, int nb
void
double_hh_trafo_real_sse_2hv_single_
(
float
*
q
,
float
*
hh
,
int
*
pnb
,
int
*
pnq
,
int
*
pldq
,
int
*
pldh
);
void
double_hh_trafo_real_sse_2hv_single_
(
float
*
q
,
float
*
hh
,
int
*
pnb
,
int
*
pnq
,
int
*
pldq
,
int
*
pldh
);
/*
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> interface
!f> subroutine double_hh_trafo_real_sse_2hv_single(q, hh, pnb, pnq, pldq, pldh) &
!f> subroutine double_hh_trafo_real_sse_2hv_single(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="double_hh_trafo_real_sse_2hv_single")
!f> bind(C, name="double_hh_trafo_real_sse_2hv_single")
...
...
src/elpa2_kernels/elpa2_kernels_real_sse_4hv_double_precision.c
View file @
09d13e2b
...
@@ -66,7 +66,7 @@
...
@@ -66,7 +66,7 @@
#define __forceinline __attribute__((always_inline)) static
#define __forceinline __attribute__((always_inline)) static
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#undef __AVX__
#endif
#endif
...
@@ -78,7 +78,7 @@ __forceinline void hh_trafo_kernel_6_SSE_4hv_double(double* q, double* hh, int n
...
@@ -78,7 +78,7 @@ __forceinline void hh_trafo_kernel_6_SSE_4hv_double(double* q, double* hh, int n
void
quad_hh_trafo_real_sse_4hv_double
(
double
*
q
,
double
*
hh
,
int
*
pnb
,
int
*
pnq
,
int
*
pldq
,
int
*
pldh
);
void
quad_hh_trafo_real_sse_4hv_double
(
double
*
q
,
double
*
hh
,
int
*
pnb
,
int
*
pnq
,
int
*
pldq
,
int
*
pldh
);
/*
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> interface
!f> subroutine quad_hh_trafo_real_sse_4hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> subroutine quad_hh_trafo_real_sse_4hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="quad_hh_trafo_real_sse_4hv_double")
!f> bind(C, name="quad_hh_trafo_real_sse_4hv_double")
...
...
src/elpa2_kernels/elpa2_kernels_real_sse_4hv_single_precision.c
View file @
09d13e2b
...
@@ -66,7 +66,7 @@
...
@@ -66,7 +66,7 @@
#define __forceinline __attribute__((always_inline)) static
#define __forceinline __attribute__((always_inline)) static
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#undef __AVX__
#endif
#endif
__forceinline
void
hh_trafo_kernel_4_SSE_4hv_single
(
float
*
q
,
float
*
hh
,
int
nb
,
int
ldq
,
int
ldh
,
float
s_1_2
,
float
s_1_3
,
float
s_2_3
,
float
s_1_4
,
float
s_2_4
,
float
s_3_4
);
__forceinline
void
hh_trafo_kernel_4_SSE_4hv_single
(
float
*
q
,
float
*
hh
,
int
nb
,
int
ldq
,
int
ldh
,
float
s_1_2
,
float
s_1_3
,
float
s_2_3
,
float
s_1_4
,
float
s_2_4
,
float
s_3_4
);
...
@@ -76,7 +76,7 @@ __forceinline void hh_trafo_kernel_12_SSE_4hv_single(float* q, float* hh, int nb
...
@@ -76,7 +76,7 @@ __forceinline void hh_trafo_kernel_12_SSE_4hv_single(float* q, float* hh, int nb
void
quad_hh_trafo_real_sse_4hv_single_
(
float
*
q
,
float
*
hh
,
int
*
pnb
,
int
*
pnq
,
int
*
pldq
,
int
*
pldh
);
void
quad_hh_trafo_real_sse_4hv_single_
(
float
*
q
,
float
*
hh
,
int
*
pnb
,
int
*
pnq
,
int
*
pldq
,
int
*
pldh
);
/*
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> interface
!f> subroutine quad_hh_trafo_real_sse_4hv_single(q, hh, pnb, pnq, pldq, pldh) &
!f> subroutine quad_hh_trafo_real_sse_4hv_single(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="quad_hh_trafo_real_sse_4hv_single")
!f> bind(C, name="quad_hh_trafo_real_sse_4hv_single")
...
...
src/elpa2_kernels/elpa2_kernels_real_sse_6hv_double_precision.c
View file @
09d13e2b
...
@@ -66,7 +66,7 @@
...
@@ -66,7 +66,7 @@
#define __forceinline __attribute__((always_inline)) static
#define __forceinline __attribute__((always_inline)) static
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#undef __AVX__
#endif
#endif
...
@@ -76,7 +76,7 @@ static void hh_trafo_kernel_4_SSE_6hv_double(double* q, double* hh, int nb, int
...
@@ -76,7 +76,7 @@ static void hh_trafo_kernel_4_SSE_6hv_double(double* q, double* hh, int nb, int
void
hexa_hh_trafo_real_sse_6hv_double
(
double
*
q
,
double
*
hh
,
int
*
pnb
,
int
*
pnq
,
int
*
pldq
,
int
*
pldh
);
void
hexa_hh_trafo_real_sse_6hv_double
(
double
*
q
,
double
*
hh
,
int
*
pnb
,
int
*
pnq
,
int
*
pldq
,
int
*
pldh
);
/*
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> interface
!f> subroutine hexa_hh_trafo_real_sse_6hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> subroutine hexa_hh_trafo_real_sse_6hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="hexa_hh_trafo_real_sse_6hv_double")
!f> bind(C, name="hexa_hh_trafo_real_sse_6hv_double")
...
...
src/elpa2_kernels/elpa2_kernels_real_sse_6hv_single_precision.c
View file @
09d13e2b
...
@@ -66,7 +66,7 @@
...
@@ -66,7 +66,7 @@
#define __forceinline __attribute__((always_inline)) static
#define __forceinline __attribute__((always_inline)) static
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#undef __AVX__
#endif
#endif
...
@@ -80,7 +80,7 @@ static void hh_trafo_kernel_8_SSE_6hv_single(float* q, float* hh, int nb, int ld
...
@@ -80,7 +80,7 @@ static void hh_trafo_kernel_8_SSE_6hv_single(float* q, float* hh, int nb, int ld
void
hexa_hh_trafo_real_sse_6hv_single_
(
float
*
q
,
float
*
hh
,
int
*
pnb
,
int
*
pnq
,
int
*
pldq
,
int
*
pldh
);
void
hexa_hh_trafo_real_sse_6hv_single_
(
float
*
q
,
float
*
hh
,
int
*
pnb
,
int
*
pnq
,
int
*
pldq
,
int
*
pldh
);
/*
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> interface
!f> subroutine hexa_hh_trafo_real_sse_6hv_single(q, hh, pnb, pnq, pldq, pldh) &
!f> subroutine hexa_hh_trafo_real_sse_6hv_single(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="hexa_hh_trafo_real_sse_6hv_single")
!f> bind(C, name="hexa_hh_trafo_real_sse_6hv_single")
...
...
src/elpa2_utilities.F90
View file @
09d13e2b
...
@@ -132,7 +132,7 @@ module ELPA2_utilities
...
@@ -132,7 +132,7 @@ module ELPA2_utilities
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
integer
(
kind
=
ik
),
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_GENERIC_SIMPLE
integer
(
kind
=
ik
),
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_GENERIC_SIMPLE
#endif
#endif
#ifdef WITH_REAL_SSE_KERNEL
#ifdef WITH_REAL_SSE_
ASSEMBLY_
KERNEL
integer
(
kind
=
ik
),
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_SSE
integer
(
kind
=
ik
),
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_SSE
#endif
#endif
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
...
@@ -168,7 +168,7 @@ module ELPA2_utilities
...
@@ -168,7 +168,7 @@ module ELPA2_utilities
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
integer
(
kind
=
ik
),
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_GENERIC_SIMPLE
integer
(
kind
=
ik
),
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_GENERIC_SIMPLE
#endif
#endif
#ifdef WITH_REAL_SSE_KERNEL
#ifdef WITH_REAL_SSE_
ASSEMBLY_
KERNEL
integer
(
kind
=
ik
),
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_SSE
integer
(
kind
=
ik
),
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_SSE
#endif
#endif
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
...
@@ -238,7 +238,7 @@ module ELPA2_utilities
...
@@ -238,7 +238,7 @@ module ELPA2_utilities
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
integer
(
kind
=
ik
),
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE
integer
(
kind
=
ik
),
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE
#endif
#endif
#ifdef WITH_COMPLEX_SSE_KERNEL
#ifdef WITH_COMPLEX_SSE_
ASSEMBLY_
KERNEL
integer
(
kind
=
ik
),
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_SSE
integer
(
kind
=
ik
),
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_SSE
#endif
#endif
#ifdef WITH_COMPLEX_AVX1_BLOCK1_KERNEL
#ifdef WITH_COMPLEX_AVX1_BLOCK1_KERNEL
...
@@ -267,7 +267,7 @@ module ELPA2_utilities
...
@@ -267,7 +267,7 @@ module ELPA2_utilities
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
integer
(
kind
=
ik
),
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE
integer
(
kind
=
ik
),
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE
#endif
#endif
#ifdef WITH_COMPLEX_SSE_KERNEL
#ifdef WITH_COMPLEX_SSE_
ASSEMBLY_
KERNEL
integer
(
kind
=
ik
),
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_SSE
integer
(
kind
=
ik
),
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_SSE
#endif
#endif
#ifdef WITH_COMPLEX_AVX1_BLOCK1_KERNEL
#ifdef WITH_COMPLEX_AVX1_BLOCK1_KERNEL
...
@@ -321,7 +321,7 @@ module ELPA2_utilities
...
@@ -321,7 +321,7 @@ module ELPA2_utilities
#else
#else
,
0
&
,
0
&
#endif
#endif
#if WITH_REAL_SSE_KERNEL
#if WITH_REAL_SSE_
ASSEMBLY_
KERNEL
,
1
&
,
1
&
#else
#else
,
0
&
,
0
&
...
@@ -402,7 +402,7 @@ module ELPA2_utilities
...
@@ -402,7 +402,7 @@ module ELPA2_utilities
#else
#else
,
0
&
,
0
&
#endif
#endif
#if WITH_COMPLEX_SSE_KERNEL