Commit fa9926fc authored by Andreas Marek's avatar Andreas Marek

Comment unused code paths in double precision SSE/AVX kernels

parent 731f3def
......@@ -116,7 +116,7 @@ void double_hh_trafo_complex_avx_avx2_2hv_double(double complex* q, double compl
s += hh[i-1] * conj(hh[(i+ldh)]);
}
#if 1
//#if 1
for (i = 0; i < nq-4; i+=8)
{
hh_trafo_complex_kernel_8_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s);
......@@ -125,20 +125,20 @@ void double_hh_trafo_complex_avx_avx2_2hv_double(double complex* q, double compl
{
hh_trafo_complex_kernel_4_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s);
}
#else
for (i = 0; i < nq-4; i+=6)
{
hh_trafo_complex_kernel_6_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s);
}
if (nq-i > 2)
{
hh_trafo_complex_kernel_4_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s);
}
else if (nq-i > 0)
{
hh_trafo_complex_kernel_2_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s);
}
#endif
//#else
// for (i = 0; i < nq-4; i+=6)
// {
// hh_trafo_complex_kernel_6_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s);
// }
// if (nq-i > 2)
// {
// hh_trafo_complex_kernel_4_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s);
// }
// else if (nq-i > 0)
// {
// hh_trafo_complex_kernel_2_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s);
// }
//#endif
}
static __forceinline void hh_trafo_complex_kernel_8_AVX_2hv_double(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s)
......
......@@ -105,25 +105,25 @@ void double_hh_trafo_complex_sse_2hv_double(double complex* q, double complex* h
s += hh[i-1] * conj(hh[(i+ldh)]);
}
#if 1
//#if 1
for (i = 0; i < nq; i+=4)
{
hh_trafo_complex_kernel_4_SSE_2hv_double(&q[i], hh, nb, ldq, ldh, s);
}
#else
for (i = 0; i < nq-2; i+=3)
{
hh_trafo_complex_kernel_3_SSE_2hv_double(&q[i], hh, nb, ldq, ldh, s);
}
if (nq-i > 1)
{
hh_trafo_complex_kernel_2_SSE_2hv_double(&q[i], hh, nb, ldq, ldh, s);
}
else if (nq-i > 0)
{
hh_trafo_complex_kernel_1_SSE_2hv_double(&q[i], hh, nb, ldq, ldh, s);
}
#endif
//#else
// for (i = 0; i < nq-2; i+=3)
// {
// hh_trafo_complex_kernel_3_SSE_2hv_double(&q[i], hh, nb, ldq, ldh, s);
// }
// if (nq-i > 1)
// {
// hh_trafo_complex_kernel_2_SSE_2hv_double(&q[i], hh, nb, ldq, ldh, s);
// }
// else if (nq-i > 0)
// {
// hh_trafo_complex_kernel_1_SSE_2hv_double(&q[i], hh, nb, ldq, ldh, s);
// }
//#endif
}
static __forceinline void hh_trafo_complex_kernel_4_SSE_2hv_double(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment