Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
a2bd28d3
Commit
a2bd28d3
authored
Jan 12, 2018
by
Andreas Marek
Browse files
Merge branch 'Skylake-master' into master_pre_stage
parents
8596c33e
6e5fd307
Changes
3
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
configure.ac
View file @
a2bd28d3
...
@@ -853,12 +853,12 @@ if test x"${need_avx512}" = x"yes"; then
...
@@ -853,12 +853,12 @@ if test x"${need_avx512}" = x"yes"; then
AC_DEFINE([HAVE_AVX512],[1],[AVX512 is supported on this CPU])
AC_DEFINE([HAVE_AVX512],[1],[AVX512 is supported on this CPU])
if test x"$can_compile_avx512" = x"yes"; then
if test x"$can_compile_avx512" = x"yes"; then
AC_MSG_CHECKING([whether we compile for
Xeon PHI or
Xeon])
AC_MSG_CHECKING([whether we compile for Xeon])
AC_
COMPILE
_IFELSE([AC_LANG_SOURCE([
AC_
RUN
_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
#include <x86intrin.h>
int main(int argc, char **argv){
int main(int argc, char **argv){
__m512d sign;
__m512d sign;
__m512d h1;
__m512d h1
_real
;
__m512d x1 = _mm512_xor_pd(h1_real, sign);
__m512d x1 = _mm512_xor_pd(h1_real, sign);
return 0;
return 0;
...
@@ -867,19 +867,37 @@ if test x"${need_avx512}" = x"yes"; then
...
@@ -867,19 +867,37 @@ if test x"${need_avx512}" = x"yes"; then
[can_compile_avx512_xeon=yes],
[can_compile_avx512_xeon=yes],
[can_compile_avx512_xeon=no]
[can_compile_avx512_xeon=no]
)
)
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
AC_MSG_RESULT([${can_compile_avx512_xeon}])
AC_MSG_CHECKING([whether we compile for Xeon PHI])
AC_RUN_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
#include <x86intrin.h>
int main(int argc, char **argv){
int main(int argc, char **argv){
__m512d sign;
__m512d sign;
__m512d h1;
__m512d h1;
__m512d h2_real;
__m512d x1 = (__
m
m512d) _mm512_xor_epi64((__m512i) h2_real, (__m512i) sign);
__m512d x1 = (__m512d) _mm512_xor_epi64((__m512i) h2_real, (__m512i) sign);
return 0;
return 0;
}
}
])],
])],
[can_compile_avx512_xeon_phi=yes],
[can_compile_avx512_xeon_phi=yes],
[can_compile_avx512_xeon_phi=no]
[can_compile_avx512_xeon_phi=no]
)
)
AC_MSG_RESULT([${can_compile_avx512_xeon_phi}])
# this is needed for the intel compiler
if test x"$can_compile_avx512_xeon" = x"yes" ; then
if test x"$can_compile_avx512_xeon_phi" = x"yes" ; then
# we want only one to be true; this is ugly but could not come up with a better way
grep Phi /proc/cpuinfo > /dev/null
if test x"$?" = x"0" ; then
echo "Xeon PHI found ... disabling AVX512 Xeon"
can_compile_avx512_xeon=no
fi
fi
fi
if test x"$can_compile_avx512_xeon" = x"yes"; then
if test x"$can_compile_avx512_xeon" = x"yes"; then
AC_DEFINE([HAVE_AVX512_XEON],[1],[AVX512 for Xeon is supported on this CPU])
AC_DEFINE([HAVE_AVX512_XEON],[1],[AVX512 for Xeon is supported on this CPU])
else
else
...
...
src/elpa2/kernels/complex_avx512_2hv_template.c
View file @
a2bd28d3
XEON_PHI
/
This
file
is
part
of
ELPA
.
/
/ This file is part of ELPA.
//
//
// The ELPA library was originally created by the ELPA consortium,
// The ELPA library was originally created by the ELPA consortium,
// consisting of the following organizations:
// consisting of the following organizations:
...
...
src/elpa2/kernels/real_avx512_2hv_template.c
View file @
a2bd28d3
...
@@ -345,6 +345,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
...
@@ -345,6 +345,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
#ifdef SINGLE_PRECISION_REAL
#ifdef SINGLE_PRECISION_REAL
h1
=
(
__AVX512_DATATYPE
)
_mm512_xor_epi32
((
__AVX512i
)
tau1
,
(
__AVX512i
)
sign
);
h1
=
(
__AVX512_DATATYPE
)
_mm512_xor_epi32
((
__AVX512i
)
tau1
,
(
__AVX512i
)
sign
);
#endif
#endif
#endif
#ifdef HAVE_AVX512_XEON
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
...
@@ -364,6 +365,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
...
@@ -364,6 +365,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1
=
(
__AVX512_DATATYPE
)
_mm512_xor_epi32
((
__AVX512i
)
tau2
,
(
__AVX512i
)
sign
);
h1
=
(
__AVX512_DATATYPE
)
_mm512_xor_epi32
((
__AVX512i
)
tau2
,
(
__AVX512i
)
sign
);
#endif
#endif
#endif
#endif
#ifdef HAVE_AVX512_XEON
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1
=
_AVX512_XOR
(
tau2
,
sign
);
h1
=
_AVX512_XOR
(
tau2
,
sign
);
...
@@ -441,7 +443,6 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
...
@@ -441,7 +443,6 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
q4
=
_AVX512_LOAD
(
&
q
[(
nb
*
ldq
)
+
3
*
offset
]);
q4
=
_AVX512_LOAD
(
&
q
[(
nb
*
ldq
)
+
3
*
offset
]);
q4
=
_AVX512_FMA
(
x4
,
h1
,
q4
);
q4
=
_AVX512_FMA
(
x4
,
h1
,
q4
);
_AVX512_STORE
(
&
q
[(
nb
*
ldq
)
+
3
*
offset
],
q4
);
_AVX512_STORE
(
&
q
[(
nb
*
ldq
)
+
3
*
offset
],
q4
);
>>>>>>>
Skylake
}
}
...
@@ -531,6 +532,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
...
@@ -531,6 +532,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1
=
(
__AVX512_DATATYPE
)
_mm512_xor_epi32
((
__AVX512i
)
tau1
,
(
__AVX512i
)
sign
);
h1
=
(
__AVX512_DATATYPE
)
_mm512_xor_epi32
((
__AVX512i
)
tau1
,
(
__AVX512i
)
sign
);
#endif
#endif
#endif
#endif
#ifdef HAVE_AVX512_XEON
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1
=
_AVX512_XOR
(
tau1
,
sign
);
h1
=
_AVX512_XOR
(
tau1
,
sign
);
...
@@ -549,6 +551,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
...
@@ -549,6 +551,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1
=
(
__AVX512_DATATYPE
)
_mm512_xor_epi32
((
__AVX512i
)
tau2
,
(
__AVX512i
)
sign
);
h1
=
(
__AVX512_DATATYPE
)
_mm512_xor_epi32
((
__AVX512i
)
tau2
,
(
__AVX512i
)
sign
);
#endif
#endif
#endif
#endif
#ifdef HAVE_AVX512_XEON
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1
=
_AVX512_XOR
(
tau2
,
sign
);
h1
=
_AVX512_XOR
(
tau2
,
sign
);
...
@@ -613,7 +616,6 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
...
@@ -613,7 +616,6 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
q3
=
_AVX512_LOAD
(
&
q
[(
nb
*
ldq
)
+
2
*
offset
]);
q3
=
_AVX512_LOAD
(
&
q
[(
nb
*
ldq
)
+
2
*
offset
]);
q3
=
_AVX512_FMA
(
x3
,
h1
,
q3
);
q3
=
_AVX512_FMA
(
x3
,
h1
,
q3
);
_AVX512_STORE
(
&
q
[(
nb
*
ldq
)
+
2
*
offset
],
q3
);
_AVX512_STORE
(
&
q
[(
nb
*
ldq
)
+
2
*
offset
],
q3
);
>>>>>>>
Skylake
}
}
...
@@ -693,6 +695,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
...
@@ -693,6 +695,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1
=
(
__AVX512_DATATYPE
)
_mm512_xor_epi32
((
__AVX512i
)
tau1
,
(
__AVX512i
)
sign
);
h1
=
(
__AVX512_DATATYPE
)
_mm512_xor_epi32
((
__AVX512i
)
tau1
,
(
__AVX512i
)
sign
);
#endif
#endif
#endif
#endif
#ifdef HAVE_AVX512_XEON
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1
=
_AVX512_XOR
(
tau1
,
sign
);
h1
=
_AVX512_XOR
(
tau1
,
sign
);
...
@@ -708,6 +711,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
...
@@ -708,6 +711,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1
=
(
__AVX512_DATATYPE
)
_mm512_xor_epi32
((
__AVX512i
)
tau2
,
(
__AVX512i
)
sign
);
h1
=
(
__AVX512_DATATYPE
)
_mm512_xor_epi32
((
__AVX512i
)
tau2
,
(
__AVX512i
)
sign
);
#endif
#endif
#endif
#endif
#ifdef HAVE_AVX512_XEON
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1
=
_AVX512_XOR
(
tau2
,
sign
);
h1
=
_AVX512_XOR
(
tau2
,
sign
);
...
@@ -758,8 +762,6 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
...
@@ -758,8 +762,6 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
q2
=
_AVX512_LOAD
(
&
q
[(
nb
*
ldq
)
+
offset
]);
q2
=
_AVX512_LOAD
(
&
q
[(
nb
*
ldq
)
+
offset
]);
q2
=
_AVX512_FMA
(
x2
,
h1
,
q2
);
q2
=
_AVX512_FMA
(
x2
,
h1
,
q2
);
_AVX512_STORE
(
&
q
[(
nb
*
ldq
)
+
offset
],
q2
);
_AVX512_STORE
(
&
q
[(
nb
*
ldq
)
+
offset
],
q2
);
>>>>>>>
Skylake
}
}
/**
/**
...
@@ -830,6 +832,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
...
@@ -830,6 +832,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1
=
(
__AVX512_DATATYPE
)
_mm512_xor_epi32
((
__AVX512i
)
tau1
,
(
__AVX512i
)
sign
);
h1
=
(
__AVX512_DATATYPE
)
_mm512_xor_epi32
((
__AVX512i
)
tau1
,
(
__AVX512i
)
sign
);
#endif
#endif
#endif
#endif
#ifdef HAVE_AVX512_XEON
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1
=
_AVX512_XOR
(
tau1
,
sign
);
h1
=
_AVX512_XOR
(
tau1
,
sign
);
...
@@ -837,6 +840,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
...
@@ -837,6 +840,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
#endif
#endif
x1
=
_AVX512_MUL
(
x1
,
h1
);
x1
=
_AVX512_MUL
(
x1
,
h1
);
#ifdef HAVE_AVX512_XEON_PHI
#ifdef HAVE_AVX512_XEON_PHI
#ifdef DOUBLE_PRECISION_REAL
#ifdef DOUBLE_PRECISION_REAL
h1
=
(
__AVX512_DATATYPE
)
_mm512_xor_epi64
((
__AVX512i
)
tau2
,
(
__AVX512i
)
sign
);
h1
=
(
__AVX512_DATATYPE
)
_mm512_xor_epi64
((
__AVX512i
)
tau2
,
(
__AVX512i
)
sign
);
...
@@ -845,6 +849,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
...
@@ -845,6 +849,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1
=
(
__AVX512_DATATYPE
)
_mm512_xor_epi32
((
__AVX512i
)
tau2
,
(
__AVX512i
)
sign
);
h1
=
(
__AVX512_DATATYPE
)
_mm512_xor_epi32
((
__AVX512i
)
tau2
,
(
__AVX512i
)
sign
);
#endif
#endif
#endif
#endif
#ifdef HAVE_AVX512_XEON
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1
=
_AVX512_XOR
(
tau2
,
sign
);
h1
=
_AVX512_XOR
(
tau2
,
sign
);
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment