Commit 150880a4 authored by Andreas Marek's avatar Andreas Marek

Comment unused code paths in double precision SSE/AVX kernels

parent 207febce
......@@ -115,7 +115,7 @@ void double_hh_trafo_complex_avx_avx2_2hv(double complex* q, double complex* hh,
s += hh[i-1] * conj(hh[(i+ldh)]);
}
#if 1
//#if 1
for (i = 0; i < nq-4; i+=8)
{
hh_trafo_complex_kernel_8_AVX_2hv(&q[i], hh, nb, ldq, ldh, s);
......@@ -124,20 +124,20 @@ void double_hh_trafo_complex_avx_avx2_2hv(double complex* q, double complex* hh,
{
hh_trafo_complex_kernel_4_AVX_2hv(&q[i], hh, nb, ldq, ldh, s);
}
#else
for (i = 0; i < nq-4; i+=6)
{
hh_trafo_complex_kernel_6_AVX_2hv(&q[i], hh, nb, ldq, ldh, s);
}
if (nq-i > 2)
{
hh_trafo_complex_kernel_4_AVX_2hv(&q[i], hh, nb, ldq, ldh, s);
}
else if (nq-i > 0)
{
hh_trafo_complex_kernel_2_AVX_2hv(&q[i], hh, nb, ldq, ldh, s);
}
#endif
//#else
// for (i = 0; i < nq-4; i+=6)
// {
// hh_trafo_complex_kernel_6_AVX_2hv(&q[i], hh, nb, ldq, ldh, s);
// }
// if (nq-i > 2)
// {
// hh_trafo_complex_kernel_4_AVX_2hv(&q[i], hh, nb, ldq, ldh, s);
// }
// else if (nq-i > 0)
// {
// hh_trafo_complex_kernel_2_AVX_2hv(&q[i], hh, nb, ldq, ldh, s);
// }
//#endif
}
static __forceinline void hh_trafo_complex_kernel_8_AVX_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s)
......
......@@ -104,25 +104,25 @@ void double_hh_trafo_complex_sse_2hv(double complex* q, double complex* hh, int*
s += hh[i-1] * conj(hh[(i+ldh)]);
}
#if 1
//#if 1
for (i = 0; i < nq; i+=4)
{
hh_trafo_complex_kernel_4_SSE_2hv(&q[i], hh, nb, ldq, ldh, s);
}
#else
for (i = 0; i < nq-2; i+=3)
{
hh_trafo_complex_kernel_3_SSE_2hv(&q[i], hh, nb, ldq, ldh, s);
}
if (nq-i > 1)
{
hh_trafo_complex_kernel_2_SSE_2hv(&q[i], hh, nb, ldq, ldh, s);
}
else if (nq-i > 0)
{
hh_trafo_complex_kernel_1_SSE_2hv(&q[i], hh, nb, ldq, ldh, s);
}
#endif
//#else
// for (i = 0; i < nq-2; i+=3)
// {
// hh_trafo_complex_kernel_3_SSE_2hv(&q[i], hh, nb, ldq, ldh, s);
// }
// if (nq-i > 1)
// {
// hh_trafo_complex_kernel_2_SSE_2hv(&q[i], hh, nb, ldq, ldh, s);
// }
// else if (nq-i > 0)
// {
// hh_trafo_complex_kernel_1_SSE_2hv(&q[i], hh, nb, ldq, ldh, s);
// }
//#endif
}
static __forceinline void hh_trafo_complex_kernel_4_SSE_2hv(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment