Commit 554d2785 authored by Andreas Marek's avatar Andreas Marek

Remove SSE references from AVX kernels

parent 1e1812bc
......@@ -860,6 +860,8 @@ void double_hh_trafo_real_avx_avx2_2hv_single(float* q, float* hh, int* pnb, int
#endif
}
#if 0
/**
* Unrolled kernel that computes
* 4 rows of Q simultaneously, a
......@@ -958,6 +960,7 @@ __forceinline void hh_trafo_kernel_4_sse_instead_of_avx_2hv_single(float* q, flo
q1 = _mm_add_ps(q1, _mm_mul_ps(x1, h1));
_mm_store_ps(&q[nb*ldq],q1);
}
#endif
/**
* Unrolled kernel that computes
......
......@@ -196,7 +196,7 @@ void hexa_hh_trafo_real_avx_avx2_6hv_single(float* q, float* hh, int* pnb, int*
}
// Production level kernel calls with padding
#ifdef __AVX__
//#ifdef __AVX__
for (i = 0; i < nq-4; i+=8)
{
hh_trafo_kernel_8_AVX_6hv_single(&q[i], hh, nb, ldq, ldh, scalarprods);
......@@ -209,20 +209,20 @@ void hexa_hh_trafo_real_avx_avx2_6hv_single(float* q, float* hh, int* pnb, int*
{
hh_trafo_kernel_4_AVX_6hv_single(&q[i], hh, nb, ldq, ldh, scalarprods);
}
#else
for (i = 0; i < nq-2; i+=4)
{
hh_trafo_kernel_4_SSE_6hv_single(&q[i], hh, nb, ldq, ldh, scalarprods);
}
if (nq == i)
{
return;
}
else
{
hh_trafo_kernel_2_SSE_6hv_single(&q[i], hh, nb, ldq, ldh, scalarprods);
}
#endif
//#else
// for (i = 0; i < nq-2; i+=4)
// {
// hh_trafo_kernel_4_SSE_6hv_single(&q[i], hh, nb, ldq, ldh, scalarprods);
// }
// if (nq == i)
// {
// return;
// }
// else
// {
// hh_trafo_kernel_2_SSE_6hv_single(&q[i], hh, nb, ldq, ldh, scalarprods);
// }
//#endif
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment