Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
elpa
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
15
Issues
15
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Environments
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
elpa
elpa
Commits
09d13e2b
Commit
09d13e2b
authored
Apr 20, 2016
by
Andreas Marek
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' into ELPA_GPU
parents
62fe6edc
0d256c1b
Changes
17
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
124 additions
and
84 deletions
+124
-84
.gitignore
.gitignore
+6
-0
.gitlab-ci.yml
.gitlab-ci.yml
+7
-1
Makefile.am
Makefile.am
+3
-3
configure.ac
configure.ac
+70
-42
src/elpa2_kernels/elpa2_kernels_complex_sse_1hv_double_precision.c
..._kernels/elpa2_kernels_complex_sse_1hv_double_precision.c
+2
-2
src/elpa2_kernels/elpa2_kernels_complex_sse_1hv_single_precision.c
..._kernels/elpa2_kernels_complex_sse_1hv_single_precision.c
+2
-2
src/elpa2_kernels/elpa2_kernels_complex_sse_2hv_double_precision.c
..._kernels/elpa2_kernels_complex_sse_2hv_double_precision.c
+2
-2
src/elpa2_kernels/elpa2_kernels_complex_sse_2hv_single_precision.c
..._kernels/elpa2_kernels_complex_sse_2hv_single_precision.c
+2
-2
src/elpa2_kernels/elpa2_kernels_real_sse_2hv_double_precision.c
...pa2_kernels/elpa2_kernels_real_sse_2hv_double_precision.c
+2
-2
src/elpa2_kernels/elpa2_kernels_real_sse_2hv_single_precision.c
...pa2_kernels/elpa2_kernels_real_sse_2hv_single_precision.c
+2
-2
src/elpa2_kernels/elpa2_kernels_real_sse_4hv_double_precision.c
...pa2_kernels/elpa2_kernels_real_sse_4hv_double_precision.c
+2
-2
src/elpa2_kernels/elpa2_kernels_real_sse_4hv_single_precision.c
...pa2_kernels/elpa2_kernels_real_sse_4hv_single_precision.c
+2
-2
src/elpa2_kernels/elpa2_kernels_real_sse_6hv_double_precision.c
...pa2_kernels/elpa2_kernels_real_sse_6hv_double_precision.c
+2
-2
src/elpa2_kernels/elpa2_kernels_real_sse_6hv_single_precision.c
...pa2_kernels/elpa2_kernels_real_sse_6hv_single_precision.c
+2
-2
src/elpa2_utilities.F90
src/elpa2_utilities.F90
+6
-6
src/mod_compute_hh_trafo_complex.F90
src/mod_compute_hh_trafo_complex.F90
+6
-6
src/mod_compute_hh_trafo_real.F90
src/mod_compute_hh_trafo_real.F90
+6
-6
No files found.
.gitignore
View file @
09d13e2b
...
...
@@ -12,6 +12,7 @@ autom4te.cache
compile
config.guess
config.h.in
config.h.in~
config.sub
configure
depcomp
...
...
@@ -19,3 +20,8 @@ install-sh
ltmain.sh
missing
test-driver
m4/libtool.m4
m4/ltoptions.m4
m4/ltsugar.m4
m4/ltversion.m4
m4/lt~obsolete.m4
.gitlab-ci.yml
View file @
09d13e2b
jobs
:
script
:
./autogen.sh && ./configure && make && make check TEST_FLAGS='1500 50 16'
script
:
-
export LANG=C
-
module load impi intel gcc mkl autotools
-
./autogen.sh
-
./configure CFLAGS="-O3 -mavx" CXXFLAGS="-O3 -mavx" FCFLAGS="-O3 -mavx" SCALAPACK_LDFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -Wl,-rpath,$MKL_HOME/lib/intel64" SCALAPACK_FCFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -I$MKLROOT/include/intel64/lp64"
-
make -j
8
-
make check TEST_FLAGS='1500 50 16'
Makefile.am
View file @
09d13e2b
...
...
@@ -82,18 +82,18 @@ if WITH_REAL_BGQ_KERNEL
libelpa@SUFFIX@
_la_SOURCES
+=
src/elpa2_kernels/elpa2_kernels_real_bgq.f90
endif
if
WITH_REAL_SSE_KERNEL
if
WITH_REAL_SSE_
ASSEMBLY_
KERNEL
libelpa@SUFFIX@
_la_SOURCES
+=
src/elpa2_kernels/elpa2_kernels_asm_x86_64_double_precision.s
if
WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@
_la_SOURCES
+=
src/elpa2_kernels/elpa2_kernels_asm_x86_64_single_precision.s
endif
else
if
WITH_COMPLEX_SSE_KERNEL
if
WITH_COMPLEX_SSE_
ASSEMBLY_
KERNEL
libelpa@SUFFIX@
_la_SOURCES
+=
src/elpa2_kernels/elpa2_kernels_asm_x86_64_double_precision.s
if
WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@
_la_SOURCES
+=
src/elpa2_kernels/elpa2_kernels_asm_x86_64_single_precision.s
endif
endif
endif
...
...
configure.ac
View file @
09d13e2b
...
...
@@ -202,55 +202,77 @@ if test x"${with_ftimings}" = x"yes"; then
fi
AM_CONDITIONAL([HAVE_DETAILED_TIMINGS],[test x"$with_ftimings" = x"yes"])
AC_MSG_CHECKING(whether double-precision SSE assembl
er
kernel can be compiled)
AC_MSG_CHECKING(whether double-precision SSE assembl
y
kernel can be compiled)
$CC -c $srcdir/src/elpa2_kernels/elpa2_kernels_asm_x86_64_double_precision.s -o test.o 2>/dev/null
if test "$?" == 0; then
can_compile_sse=yes
install_real_sse=yes
can_compile_sse_assembly=yes
install_real_sse_assembly=yes
install_complex_sse_assembly=yes
else
can_compile_sse_assembly=no
install_real_sse_assembly=no
install_complex_sse_assembly=no
fi
rm -f ./test.o
AC_MSG_RESULT([${can_compile_sse_assembly}])
if test x"${want_single_precision}" = x"yes" ; then
AC_MSG_CHECKING(whether single-precision SSE assembly kernel can be compiled)
$CC -c $srcdir/src/elpa2_kernels/elpa2_kernels_asm_x86_64_single_precision.s -o test.o 2>/dev/null
if test "$?" == 0; then
can_compile_sse_assembly=yes
install_real_sse_assembly=yes
install_complex_sse_assembly=yes
else
can_compile_sse_assembly=no
install_real_sse_assembly=no
install_complex_sse_assembly=no
fi
rm -f ./test.o
AC_MSG_RESULT([${can_compile_sse_assembly}])
if test x"${can_compile_sse_assembly}" = x"no" ; then
AC_MSG_WARN([Cannot compile single-precision SSE assembly kernel: disabling SSE assembly kernels alltogether])
fi
fi
dnl check whether on can compile with sse-gcc intrinsics
AC_MSG_CHECKING(whether we can compile SSE with gcc intrinsics in C)
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
double* q;
__m128d h1 = _mm_loaddup_pd(q);
return 0;
}
])],
[can_compile_sse_intrinsics=yes],
[can_compile_sse_intrinsics=no]
)
AC_MSG_RESULT([${can_compile_sse_intrinsics}])
if test "${can_compile_sse_intrinsics}" = "yes"; then
install_real_sse_intrinsics=yes
install_real_sse_block2=yes
install_real_sse_block4=yes
install_real_sse_block6=yes
install_complex_sse=yes
install_complex_sse
_intrinsics
=yes
install_complex_sse_block1=yes
install_complex_sse_block2=yes
else
can_compile_sse=no
install_real_sse=no
install_real_sse_intrinsics=no
install_real_sse_block2=no
install_real_sse_block4=no
install_real_sse_block6=no
install_complex_sse=no
install_complex_sse
_intrinsics
=no
install_complex_sse_block1=no
install_complex_sse_block2=no
fi
rm -f ./test.o
AC_MSG_RESULT([${can_compile_sse}])
if test x"${want_single_precision}" = x"yes" ; then
AC_MSG_CHECKING(whether single-precision SSE assembler kernel can be compiled)
$CC -c $srcdir/src/elpa2_kernels/elpa2_kernels_asm_x86_64_double_precision.s -o test.o 2>/dev/null
if test "$?" == 0; then
can_compile_sse=yes
install_real_sse=yes
install_complex_sse=yes
else
can_compile_sse=no
install_real_sse=no
install_complex_sse=no
fi
rm -f ./test.o
AC_MSG_RESULT([${can_compile_sse}])
if test x"${can_compile_sse}" = x"no" ; then
AC_MSG_WARN([Cannot compile single-precision SSE kernel: disabling SSE kernels alltogether])
fi
fi
dnl check whether one can compile with avx - gcc intrinsics
dnl first pass: try with specified CFLAGS and CXXFLAGS
...
...
@@ -356,10 +378,16 @@ else
install_complex_avx2_block1=no
install_complex_avx2_block2=no
fi
AM_CONDITIONAL([HAVE_SSE],[test x"$can_compile_sse" = x"yes"])
if test x"${can_compile_sse}" = x"yes" ; then
AC_DEFINE([HAVE_SSE],[1],[SSE is supported on this CPU])
AM_CONDITIONAL([HAVE_SSE_ASSEMBLY],[test x"$can_compile_sse_assembly" = x"yes"])
if test x"${can_compile_sse_assembly}" = x"yes" ; then
AC_DEFINE([HAVE_SSE_ASSEMBLY],[1],[assembly SSE is supported on this CPU])
fi
AM_CONDITIONAL([HAVE_SSE_INTRINSICS],[test x"$can_compile_sse_intrinsics" = x"yes"])
if test x"${can_compile_sse_intrinsics}" = x"yes" ; then
AC_DEFINE([HAVE_SSE_INTRINSICS],[1],[gcc intrinsics SSE is supported on this CPU])
fi
AM_CONDITIONAL([HAVE_AVX],[test x"$can_compile_avx" = x"yes"])
if test x"${can_compile_avx}" = x"yes" ; then
AC_DEFINE([HAVE_AVX],[1],[AVX is supported on this CPU])
...
...
@@ -671,7 +699,7 @@ dnl generic-simple kernel
DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-generic-simple-kernel-only],[generic-simple-kernel],[install_real_generic_simple])
dnl sse kernel
DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-sse-
kernel-only],[sse-kernel],[install_real_sse
])
DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-sse-
assembly-kernel-only],[sse-assembly-kernel],[install_real_sse_assembly
])
dnl bgp kernel
DEFINE_OPTION_SPECIFIC_REAL_KERNEL([real-bgp-kernel-only],[bgp-kernel],[install_real_bgp])
...
...
@@ -706,7 +734,7 @@ dnl generic-simple kernel
DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-generic-simple-kernel-only],[generic-simple-kernel],[install_complex_generic_simple])
dnl sse kernel
DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-sse-
kernel-only],[sse-kernel],[install_complex_sse
])
DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-sse-
assembly-kernel-only],[sse-assembly-kernel],[install_complex_sse_assembly
])
dnl complex-bqp kernel
DEFINE_OPTION_SPECIFIC_COMPLEX_KERNEL([complex-bgp-kernel-only],[bgp-kernel],[install_complex_bgp])
...
...
@@ -757,14 +785,14 @@ if test x"${install_complex_generic_simple}" = x"yes" ; then
AC_DEFINE([WITH_COMPLEX_GENERIC_SIMPLE_KERNEL],[1],[can use complex generic-simple kernel])
fi
AM_CONDITIONAL([WITH_REAL_SSE_
KERNEL],[test x"$install_real_sse
" = x"yes"])
if test x"${install_real_sse}" = x"yes" ; then
AC_DEFINE([WITH_REAL_SSE_
KERNEL],[1],[can use real SSE
kernel])
AM_CONDITIONAL([WITH_REAL_SSE_
ASSEMBLY_KERNEL],[test x"$install_real_sse_assembly
" = x"yes"])
if test x"${install_real_sse
_assembly
}" = x"yes" ; then
AC_DEFINE([WITH_REAL_SSE_
ASSEMBLY_KERNEL],[1],[can use real SSE assembly
kernel])
fi
AM_CONDITIONAL([WITH_COMPLEX_SSE_
KERNEL],[test x"$install_complex_sse
" = x"yes"])
if test x"${install_complex_sse}" = x"yes" ; then
AC_DEFINE([WITH_COMPLEX_SSE_
KERNEL],[1],[can use complex SSE
kernel])
AM_CONDITIONAL([WITH_COMPLEX_SSE_
ASSEMBLY_KERNEL],[test x"$install_complex_sse_assembly
" = x"yes"])
if test x"${install_complex_sse
_assembly
}" = x"yes" ; then
AC_DEFINE([WITH_COMPLEX_SSE_
ASSEMBLY_KERNEL],[1],[can use complex SSE assembly
kernel])
fi
AM_CONDITIONAL([WITH_REAL_SSE_BLOCK2_KERNEL],[test x"$install_real_sse_block2" = x"yes"])
...
...
src/elpa2_kernels/elpa2_kernels_complex_sse_1hv_double_precision.c
View file @
09d13e2b
...
...
@@ -67,7 +67,7 @@
#define __forceinline __attribute__((always_inline))
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#endif
...
...
@@ -78,7 +78,7 @@ static __forceinline void hh_trafo_complex_kernel_4_SSE_1hv_double(double comple
static
__forceinline
void
hh_trafo_complex_kernel_2_SSE_1hv_double
(
double
complex
*
q
,
double
complex
*
hh
,
int
nb
,
int
ldq
);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> subroutine single_hh_trafo_complex_sse_1hv_double(q, hh, pnb, pnq, pldq) &
!f> bind(C, name="single_hh_trafo_complex_sse_1hv_double")
...
...
src/elpa2_kernels/elpa2_kernels_complex_sse_1hv_single_precision.c
View file @
09d13e2b
...
...
@@ -67,7 +67,7 @@
#define __forceinline __attribute__((always_inline))
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#endif
...
...
@@ -78,7 +78,7 @@ static __forceinline void hh_trafo_complex_kernel_4_SSE_1hv_single(complex* q, c
static
__forceinline
void
hh_trafo_complex_kernel_2_SSE_1hv_single
(
complex
*
q
,
complex
*
hh
,
int
nb
,
int
ldq
);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> subroutine single_hh_trafo_complex_sse_1hv_single(q, hh, pnb, pnq, pldq) &
!f> bind(C, name="single_hh_trafo_complex_sse_1hv_single")
...
...
src/elpa2_kernels/elpa2_kernels_complex_sse_2hv_double_precision.c
View file @
09d13e2b
...
...
@@ -66,7 +66,7 @@
#define __forceinline __attribute__((always_inline))
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#endif
...
...
@@ -78,7 +78,7 @@ static __forceinline void hh_trafo_complex_kernel_2_SSE_2hv_double(double comple
static
__forceinline
void
hh_trafo_complex_kernel_1_SSE_2hv_double
(
double
complex
*
q
,
double
complex
*
hh
,
int
nb
,
int
ldq
,
int
ldh
,
double
complex
s
);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> subroutine double_hh_trafo_complex_sse_2hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="double_hh_trafo_complex_sse_2hv_double")
...
...
src/elpa2_kernels/elpa2_kernels_complex_sse_2hv_single_precision.c
View file @
09d13e2b
...
...
@@ -66,7 +66,7 @@
#define __forceinline __attribute__((always_inline))
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#endif
...
...
@@ -74,7 +74,7 @@
static
__forceinline
void
hh_trafo_complex_kernel_4_SSE_2hv_single
(
complex
*
q
,
complex
*
hh
,
int
nb
,
int
ldq
,
int
ldh
,
complex
s
,
complex
s1
);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> subroutine double_hh_trafo_complex_sse_2hv_single(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="double_hh_trafo_complex_sse_2hv_single")
...
...
src/elpa2_kernels/elpa2_kernels_real_sse_2hv_double_precision.c
View file @
09d13e2b
...
...
@@ -67,7 +67,7 @@
#define __forceinline __attribute__((always_inline)) static
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#endif
...
...
@@ -79,7 +79,7 @@ __forceinline void hh_trafo_kernel_12_SSE_2hv_double(double* q, double* hh, int
void
double_hh_trafo_real_sse_2hv_double
(
double
*
q
,
double
*
hh
,
int
*
pnb
,
int
*
pnq
,
int
*
pldq
,
int
*
pldh
);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> subroutine double_hh_trafo_real_sse_2hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="double_hh_trafo_real_sse_2hv_double")
...
...
src/elpa2_kernels/elpa2_kernels_real_sse_2hv_single_precision.c
View file @
09d13e2b
...
...
@@ -67,7 +67,7 @@
#define __forceinline __attribute__((always_inline)) static
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#endif
...
...
@@ -79,7 +79,7 @@ __forceinline void hh_trafo_kernel_12_SSE_2hv_single(float* q, float* hh, int nb
void
double_hh_trafo_real_sse_2hv_single_
(
float
*
q
,
float
*
hh
,
int
*
pnb
,
int
*
pnq
,
int
*
pldq
,
int
*
pldh
);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> subroutine double_hh_trafo_real_sse_2hv_single(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="double_hh_trafo_real_sse_2hv_single")
...
...
src/elpa2_kernels/elpa2_kernels_real_sse_4hv_double_precision.c
View file @
09d13e2b
...
...
@@ -66,7 +66,7 @@
#define __forceinline __attribute__((always_inline)) static
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#endif
...
...
@@ -78,7 +78,7 @@ __forceinline void hh_trafo_kernel_6_SSE_4hv_double(double* q, double* hh, int n
void
quad_hh_trafo_real_sse_4hv_double
(
double
*
q
,
double
*
hh
,
int
*
pnb
,
int
*
pnq
,
int
*
pldq
,
int
*
pldh
);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> subroutine quad_hh_trafo_real_sse_4hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="quad_hh_trafo_real_sse_4hv_double")
...
...
src/elpa2_kernels/elpa2_kernels_real_sse_4hv_single_precision.c
View file @
09d13e2b
...
...
@@ -66,7 +66,7 @@
#define __forceinline __attribute__((always_inline)) static
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#endif
__forceinline
void
hh_trafo_kernel_4_SSE_4hv_single
(
float
*
q
,
float
*
hh
,
int
nb
,
int
ldq
,
int
ldh
,
float
s_1_2
,
float
s_1_3
,
float
s_2_3
,
float
s_1_4
,
float
s_2_4
,
float
s_3_4
);
...
...
@@ -76,7 +76,7 @@ __forceinline void hh_trafo_kernel_12_SSE_4hv_single(float* q, float* hh, int nb
void
quad_hh_trafo_real_sse_4hv_single_
(
float
*
q
,
float
*
hh
,
int
*
pnb
,
int
*
pnq
,
int
*
pldq
,
int
*
pldh
);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> subroutine quad_hh_trafo_real_sse_4hv_single(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="quad_hh_trafo_real_sse_4hv_single")
...
...
src/elpa2_kernels/elpa2_kernels_real_sse_6hv_double_precision.c
View file @
09d13e2b
...
...
@@ -66,7 +66,7 @@
#define __forceinline __attribute__((always_inline)) static
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#endif
...
...
@@ -76,7 +76,7 @@ static void hh_trafo_kernel_4_SSE_6hv_double(double* q, double* hh, int nb, int
void
hexa_hh_trafo_real_sse_6hv_double
(
double
*
q
,
double
*
hh
,
int
*
pnb
,
int
*
pnq
,
int
*
pldq
,
int
*
pldh
);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> subroutine hexa_hh_trafo_real_sse_6hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="hexa_hh_trafo_real_sse_6hv_double")
...
...
src/elpa2_kernels/elpa2_kernels_real_sse_6hv_single_precision.c
View file @
09d13e2b
...
...
@@ -66,7 +66,7 @@
#define __forceinline __attribute__((always_inline)) static
#ifdef HAVE_SSE
#ifdef HAVE_SSE
_INTRINSICS
#undef __AVX__
#endif
...
...
@@ -80,7 +80,7 @@ static void hh_trafo_kernel_8_SSE_6hv_single(float* q, float* hh, int nb, int ld
void
hexa_hh_trafo_real_sse_6hv_single_
(
float
*
q
,
float
*
hh
,
int
*
pnb
,
int
*
pnq
,
int
*
pldq
,
int
*
pldh
);
/*
!f>#ifdef HAVE_SSE
!f>#ifdef HAVE_SSE
_INTRINSICS
!f> interface
!f> subroutine hexa_hh_trafo_real_sse_6hv_single(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="hexa_hh_trafo_real_sse_6hv_single")
...
...
src/elpa2_utilities.F90
View file @
09d13e2b
...
...
@@ -132,7 +132,7 @@ module ELPA2_utilities
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
integer
(
kind
=
ik
),
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_GENERIC_SIMPLE
#endif
#ifdef WITH_REAL_SSE_KERNEL
#ifdef WITH_REAL_SSE_
ASSEMBLY_
KERNEL
integer
(
kind
=
ik
),
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_SSE
#endif
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
...
...
@@ -168,7 +168,7 @@ module ELPA2_utilities
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
integer
(
kind
=
ik
),
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_GENERIC_SIMPLE
#endif
#ifdef WITH_REAL_SSE_KERNEL
#ifdef WITH_REAL_SSE_
ASSEMBLY_
KERNEL
integer
(
kind
=
ik
),
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_SSE
#endif
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
...
...
@@ -238,7 +238,7 @@ module ELPA2_utilities
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
integer
(
kind
=
ik
),
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE
#endif
#ifdef WITH_COMPLEX_SSE_KERNEL
#ifdef WITH_COMPLEX_SSE_
ASSEMBLY_
KERNEL
integer
(
kind
=
ik
),
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_SSE
#endif
#ifdef WITH_COMPLEX_AVX1_BLOCK1_KERNEL
...
...
@@ -267,7 +267,7 @@ module ELPA2_utilities
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
integer
(
kind
=
ik
),
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE
#endif
#ifdef WITH_COMPLEX_SSE_KERNEL
#ifdef WITH_COMPLEX_SSE_
ASSEMBLY_
KERNEL
integer
(
kind
=
ik
),
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_SSE
#endif
#ifdef WITH_COMPLEX_AVX1_BLOCK1_KERNEL
...
...
@@ -321,7 +321,7 @@ module ELPA2_utilities
#else
,
0
&
#endif
#if WITH_REAL_SSE_KERNEL
#if WITH_REAL_SSE_
ASSEMBLY_
KERNEL
,
1
&
#else
,
0
&
...
...
@@ -402,7 +402,7 @@ module ELPA2_utilities
#else
,
0
&
#endif
#if WITH_COMPLEX_SSE_KERNEL
#if WITH_COMPLEX_SSE_
ASSEMBLY_
KERNEL
,
1
&
#else
,
0
&
...
...
src/mod_compute_hh_trafo_complex.F90
View file @
09d13e2b
...
...
@@ -87,7 +87,7 @@ module compute_hh_trafo_complex
#endif
use
iso_c_binding
#if defined(HAVE_AVX) || defined(HAVE_SSE)
#if defined(HAVE_AVX) || defined(HAVE_SSE
_INTRINSICS) || defined(HAVE_SSE_ASSEMBLY
)
use
kernel_interfaces
#endif
implicit
none
...
...
@@ -271,7 +271,7 @@ module compute_hh_trafo_complex
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_GENERIC_KERNEL */
#if defined(WITH_COMPLEX_SSE_KERNEL)
#if defined(WITH_COMPLEX_SSE_
ASSEMBLY_
KERNEL)
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
if
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_SSE
)
then
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
...
...
@@ -288,7 +288,7 @@ module compute_hh_trafo_complex
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_SSE_KERNEL */
#endif /* WITH_COMPLEX_SSE_
ASSEMBLY_
KERNEL */
!#if defined(WITH_AVX_SANDYBRIDGE)
...
...
@@ -386,7 +386,7 @@ module compute_hh_trafo_complex
use
timings
#endif
#if defined(HAVE_AVX) || defined(HAVE_SSE)
#if defined(HAVE_AVX) || defined(HAVE_SSE
_INTRINSICS) || defined(HAVE_SSE_ASSEMBLY
)
use
kernel_interfaces
#endif
use
iso_c_binding
...
...
@@ -569,7 +569,7 @@ module compute_hh_trafo_complex
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_GENERIC_KERNEL */
#if defined(WITH_COMPLEX_SSE_KERNEL)
#if defined(WITH_COMPLEX_SSE_
ASSEMBLY_
KERNEL)
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
if
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_SSE
)
then
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
...
...
@@ -586,7 +586,7 @@ module compute_hh_trafo_complex
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_SSE_KERNEL */
#endif /* WITH_COMPLEX_SSE_
ASSEMBLY_
KERNEL */
!#if defined(WITH_AVX_SANDYBRIDGE)
...
...
src/mod_compute_hh_trafo_real.F90
View file @
09d13e2b
...
...
@@ -104,7 +104,7 @@ module compute_hh_trafo_real
use
timings
#endif
#if defined(HAVE_AVX) || defined(HAVE_SSE)
#if defined(HAVE_AVX) || defined(HAVE_SSE
_INTRINSICS) || defined(HAVE_SSE_ASSEMBLY
)
use
kernel_interfaces
#endif
implicit
none
...
...
@@ -275,7 +275,7 @@ module compute_hh_trafo_real
#endif /* WITH_REAL_GENERIC_SIMPLE_KERNEL */
#if defined(WITH_REAL_SSE_KERNEL)
#if defined(WITH_REAL_SSE_
ASSEMBLY_
KERNEL)
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
if
(
THIS_REAL_ELPA_KERNEL
.eq.
REAL_ELPA_KERNEL_SSE
)
then
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
...
...
@@ -293,7 +293,7 @@ module compute_hh_trafo_real
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#endif /* WITH_REAL_SSE_KERNEL */
#endif /* WITH_REAL_SSE_
ASSEMBLY_
KERNEL */
#if defined(WITH_REAL_SSE_BLOCK2_KERNEL)
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
...
...
@@ -661,7 +661,7 @@ module compute_hh_trafo_real
use
timings
#endif
#if defined(HAVE_AVX) || defined(HAVE_SSE)
#if defined(HAVE_AVX) || defined(HAVE_SSE
_INTRINSICS) || defined(HAVE_SSE_ASSEMBLY
)
use
kernel_interfaces
#endif
implicit
none
...
...
@@ -831,7 +831,7 @@ module compute_hh_trafo_real
#endif /* WITH_REAL_GENERIC_SIMPLE_KERNEL */
#if defined(WITH_REAL_SSE_KERNEL)
#if defined(WITH_REAL_SSE_
ASSEMBLY_
KERNEL)
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
if
(
THIS_REAL_ELPA_KERNEL
.eq.
REAL_ELPA_KERNEL_SSE
)
then
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
...
...
@@ -849,7 +849,7 @@ module compute_hh_trafo_real
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#endif /* WITH_REAL_SSE_KERNEL */
#endif /* WITH_REAL_SSE_
ASSEMBLY_
KERNEL */
#if defined(WITH_REAL_SSE_BLOCK2_KERNEL)
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment