Commit 96543d13 authored by Andreas Marek's avatar Andreas Marek

Merge branch 'master_pre_stage' into QR_and_GPU

parents c7f9adec a2bd28d3
......@@ -853,12 +853,12 @@ if test x"${need_avx512}" = x"yes"; then
AC_DEFINE([HAVE_AVX512],[1],[AVX512 is supported on this CPU])
if test x"$can_compile_avx512" = x"yes"; then
AC_MSG_CHECKING([whether we compile for Xeon PHI or Xeon])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
AC_MSG_CHECKING([whether we compile for Xeon])
AC_RUN_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
__m512d sign;
__m512d h1;
__m512d h1_real;
__m512d x1 = _mm512_xor_pd(h1_real, sign);
return 0;
......@@ -867,19 +867,37 @@ if test x"${need_avx512}" = x"yes"; then
[can_compile_avx512_xeon=yes],
[can_compile_avx512_xeon=no]
)
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
AC_MSG_RESULT([${can_compile_avx512_xeon}])
AC_MSG_CHECKING([whether we compile for Xeon PHI])
AC_RUN_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
__m512d sign;
__m512d h1;
__m512d h2_real;
__m512d x1 = (__mm512d) _mm512_xor_epi64((__m512i) h2_real, (__m512i) sign);
__m512d x1 = (__m512d) _mm512_xor_epi64((__m512i) h2_real, (__m512i) sign);
return 0;
}
])],
[can_compile_avx512_xeon_phi=yes],
[can_compile_avx512_xeon_phi=no]
)
AC_MSG_RESULT([${can_compile_avx512_xeon_phi}])
# this is needed for the intel compiler
if test x"$can_compile_avx512_xeon" = x"yes" ; then
if test x"$can_compile_avx512_xeon_phi" = x"yes" ; then
# we want only one to be true; this is ugly but could not come up with a better way
grep Phi /proc/cpuinfo > /dev/null
if test x"$?" = x"0" ; then
echo "Xeon PHI found ... disabling AVX512 Xeon"
can_compile_avx512_xeon=no
fi
fi
fi
if test x"$can_compile_avx512_xeon" = x"yes"; then
AC_DEFINE([HAVE_AVX512_XEON],[1],[AVX512 for Xeon is supported on this CPU])
else
......
XEON_PHI/ This file is part of ELPA.
// This file is part of ELPA.
//
// The ELPA library was originally created by the ELPA consortium,
// consisting of the following organizations:
......
......@@ -345,6 +345,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
#ifdef SINGLE_PRECISION_REAL
h1 = (__AVX512_DATATYPE) _mm512_xor_epi32((__AVX512i) tau1, (__AVX512i) sign);
#endif
#endif
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
......@@ -364,6 +365,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1 = (__AVX512_DATATYPE) _mm512_xor_epi32((__AVX512i) tau2, (__AVX512i) sign);
#endif
#endif
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1 = _AVX512_XOR(tau2, sign);
......@@ -441,7 +443,6 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
q4 = _AVX512_LOAD(&q[(nb*ldq)+3*offset]);
q4 = _AVX512_FMA(x4, h1, q4);
_AVX512_STORE(&q[(nb*ldq)+3*offset],q4);
>>>>>>> Skylake
}
......@@ -531,6 +532,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1 = (__AVX512_DATATYPE) _mm512_xor_epi32((__AVX512i) tau1, (__AVX512i) sign);
#endif
#endif
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1 = _AVX512_XOR(tau1, sign);
......@@ -549,6 +551,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1 = (__AVX512_DATATYPE) _mm512_xor_epi32((__AVX512i) tau2, (__AVX512i) sign);
#endif
#endif
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1 = _AVX512_XOR(tau2, sign);
......@@ -613,7 +616,6 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
q3 = _AVX512_LOAD(&q[(nb*ldq)+2*offset]);
q3 = _AVX512_FMA(x3, h1, q3);
_AVX512_STORE(&q[(nb*ldq)+2*offset],q3);
>>>>>>> Skylake
}
......@@ -693,6 +695,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1 = (__AVX512_DATATYPE) _mm512_xor_epi32((__AVX512i) tau1, (__AVX512i) sign);
#endif
#endif
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1 = _AVX512_XOR(tau1, sign);
......@@ -708,6 +711,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1 = (__AVX512_DATATYPE) _mm512_xor_epi32((__AVX512i) tau2, (__AVX512i) sign);
#endif
#endif
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1 = _AVX512_XOR(tau2, sign);
......@@ -758,8 +762,6 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
q2 = _AVX512_LOAD(&q[(nb*ldq)+offset]);
q2 = _AVX512_FMA(x2, h1, q2);
_AVX512_STORE(&q[(nb*ldq)+offset],q2);
>>>>>>> Skylake
}
/**
......@@ -830,6 +832,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1 = (__AVX512_DATATYPE) _mm512_xor_epi32((__AVX512i) tau1, (__AVX512i) sign);
#endif
#endif
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1 = _AVX512_XOR(tau1, sign);
......@@ -837,6 +840,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
#endif
x1 = _AVX512_MUL(x1, h1);
#ifdef HAVE_AVX512_XEON_PHI
#ifdef DOUBLE_PRECISION_REAL
h1 = (__AVX512_DATATYPE) _mm512_xor_epi64((__AVX512i) tau2, (__AVX512i) sign);
......@@ -845,6 +849,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1 = (__AVX512_DATATYPE) _mm512_xor_epi32((__AVX512i) tau2, (__AVX512i) sign);
#endif
#endif
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1 = _AVX512_XOR(tau2, sign);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment