Commit 354ae8ec authored by Andreas Marek's avatar Andreas Marek

Fix merge error in AVX512 kernel

parent e9698a6c
......@@ -852,15 +852,13 @@ if test x"${need_avx512}" = x"yes"; then
fi
AC_DEFINE([HAVE_AVX512],[1],[AVX512 is supported on this CPU])
echo "AVX512 allgemein " $can_compile_avx512
if test x"$can_compile_avx512" = x"yes"; then
AC_MSG_CHECKING([whether we compile for Xeon PHI or Xeon])
AC_MSG_CHECKING([whether we compile for Xeon])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
__m512d sign;
__m512d h1;
__m512d h1_real;
__m512d x1 = _mm512_xor_pd(h1_real, sign);
return 0;
......@@ -869,6 +867,9 @@ if test x"${need_avx512}" = x"yes"; then
[can_compile_avx512_xeon=yes],
[can_compile_avx512_xeon=no]
)
AC_MSG_RESULT([${can_compile_avx512_xeon}])
AC_MSG_CHECKING([whether we compile for Xeon PHI])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
......@@ -883,8 +884,8 @@ if test x"${need_avx512}" = x"yes"; then
[can_compile_avx512_xeon_phi=yes],
[can_compile_avx512_xeon_phi=no]
)
echo $can_compile_avx512_xeon
echo $can_compile_avx512_xeon_phi
AC_MSG_RESULT([${can_compile_avx512_xeon_phi}])
if test x"$can_compile_avx512_xeon" = x"yes"; then
AC_DEFINE([HAVE_AVX512_XEON],[1],[AVX512 for Xeon is supported on this CPU])
else
......
XEON_PHI/ This file is part of ELPA.
// This file is part of ELPA.
//
// The ELPA library was originally created by the ELPA consortium,
// consisting of the following organizations:
......
......@@ -345,6 +345,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
#ifdef SINGLE_PRECISION_REAL
h1 = (__AVX512_DATATYPE) _mm512_xor_epi32((__AVX512i) tau1, (__AVX512i) sign);
#endif
#endif
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
......@@ -364,6 +365,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1 = (__AVX512_DATATYPE) _mm512_xor_epi32((__AVX512i) tau2, (__AVX512i) sign);
#endif
#endif
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1 = _AVX512_XOR(tau2, sign);
......@@ -441,7 +443,6 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
q4 = _AVX512_LOAD(&q[(nb*ldq)+3*offset]);
q4 = _AVX512_FMA(x4, h1, q4);
_AVX512_STORE(&q[(nb*ldq)+3*offset],q4);
>>>>>>> Skylake
}
......@@ -531,6 +532,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1 = (__AVX512_DATATYPE) _mm512_xor_epi32((__AVX512i) tau1, (__AVX512i) sign);
#endif
#endif
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1 = _AVX512_XOR(tau1, sign);
......@@ -549,6 +551,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1 = (__AVX512_DATATYPE) _mm512_xor_epi32((__AVX512i) tau2, (__AVX512i) sign);
#endif
#endif
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1 = _AVX512_XOR(tau2, sign);
......@@ -613,7 +616,6 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
q3 = _AVX512_LOAD(&q[(nb*ldq)+2*offset]);
q3 = _AVX512_FMA(x3, h1, q3);
_AVX512_STORE(&q[(nb*ldq)+2*offset],q3);
>>>>>>> Skylake
}
......@@ -693,6 +695,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1 = (__AVX512_DATATYPE) _mm512_xor_epi32((__AVX512i) tau1, (__AVX512i) sign);
#endif
#endif
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1 = _AVX512_XOR(tau1, sign);
......@@ -708,6 +711,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1 = (__AVX512_DATATYPE) _mm512_xor_epi32((__AVX512i) tau2, (__AVX512i) sign);
#endif
#endif
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1 = _AVX512_XOR(tau2, sign);
......@@ -758,8 +762,6 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
q2 = _AVX512_LOAD(&q[(nb*ldq)+offset]);
q2 = _AVX512_FMA(x2, h1, q2);
_AVX512_STORE(&q[(nb*ldq)+offset],q2);
>>>>>>> Skylake
}
/**
......@@ -830,6 +832,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1 = (__AVX512_DATATYPE) _mm512_xor_epi32((__AVX512i) tau1, (__AVX512i) sign);
#endif
#endif
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1 = _AVX512_XOR(tau1, sign);
......@@ -837,6 +840,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
#endif
x1 = _AVX512_MUL(x1, h1);
#ifdef HAVE_AVX512_XEON_PHI
#ifdef DOUBLE_PRECISION_REAL
h1 = (__AVX512_DATATYPE) _mm512_xor_epi64((__AVX512i) tau2, (__AVX512i) sign);
......@@ -845,6 +849,7 @@ void double_hh_trafo_real_avx512_2hv_single(float* q, float* hh, int* pnb, int*
h1 = (__AVX512_DATATYPE) _mm512_xor_epi32((__AVX512i) tau2, (__AVX512i) sign);
#endif
#endif
#ifdef HAVE_AVX512_XEON
#if defined(DOUBLE_PRECISION_REAL) || defined(SINGLE_PRECISION_REAL)
h1 = _AVX512_XOR(tau2, sign);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment