Commit fa9926fc authored by Andreas Marek's avatar Andreas Marek
Browse files

Comment unused code paths in double precision SSE/AVX kernels

parent 731f3def
...@@ -116,7 +116,7 @@ void double_hh_trafo_complex_avx_avx2_2hv_double(double complex* q, double compl ...@@ -116,7 +116,7 @@ void double_hh_trafo_complex_avx_avx2_2hv_double(double complex* q, double compl
s += hh[i-1] * conj(hh[(i+ldh)]); s += hh[i-1] * conj(hh[(i+ldh)]);
} }
#if 1 //#if 1
for (i = 0; i < nq-4; i+=8) for (i = 0; i < nq-4; i+=8)
{ {
hh_trafo_complex_kernel_8_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s); hh_trafo_complex_kernel_8_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s);
...@@ -125,20 +125,20 @@ void double_hh_trafo_complex_avx_avx2_2hv_double(double complex* q, double compl ...@@ -125,20 +125,20 @@ void double_hh_trafo_complex_avx_avx2_2hv_double(double complex* q, double compl
{ {
hh_trafo_complex_kernel_4_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s); hh_trafo_complex_kernel_4_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s);
} }
#else //#else
for (i = 0; i < nq-4; i+=6) // for (i = 0; i < nq-4; i+=6)
{ // {
hh_trafo_complex_kernel_6_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s); // hh_trafo_complex_kernel_6_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s);
} // }
if (nq-i > 2) // if (nq-i > 2)
{ // {
hh_trafo_complex_kernel_4_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s); // hh_trafo_complex_kernel_4_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s);
} // }
else if (nq-i > 0) // else if (nq-i > 0)
{ // {
hh_trafo_complex_kernel_2_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s); // hh_trafo_complex_kernel_2_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s);
} // }
#endif //#endif
} }
static __forceinline void hh_trafo_complex_kernel_8_AVX_2hv_double(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s) static __forceinline void hh_trafo_complex_kernel_8_AVX_2hv_double(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s)
......
...@@ -105,25 +105,25 @@ void double_hh_trafo_complex_sse_2hv_double(double complex* q, double complex* h ...@@ -105,25 +105,25 @@ void double_hh_trafo_complex_sse_2hv_double(double complex* q, double complex* h
s += hh[i-1] * conj(hh[(i+ldh)]); s += hh[i-1] * conj(hh[(i+ldh)]);
} }
#if 1 //#if 1
for (i = 0; i < nq; i+=4) for (i = 0; i < nq; i+=4)
{ {
hh_trafo_complex_kernel_4_SSE_2hv_double(&q[i], hh, nb, ldq, ldh, s); hh_trafo_complex_kernel_4_SSE_2hv_double(&q[i], hh, nb, ldq, ldh, s);
} }
#else //#else
for (i = 0; i < nq-2; i+=3) // for (i = 0; i < nq-2; i+=3)
{ // {
hh_trafo_complex_kernel_3_SSE_2hv_double(&q[i], hh, nb, ldq, ldh, s); // hh_trafo_complex_kernel_3_SSE_2hv_double(&q[i], hh, nb, ldq, ldh, s);
} // }
if (nq-i > 1) // if (nq-i > 1)
{ // {
hh_trafo_complex_kernel_2_SSE_2hv_double(&q[i], hh, nb, ldq, ldh, s); // hh_trafo_complex_kernel_2_SSE_2hv_double(&q[i], hh, nb, ldq, ldh, s);
} // }
else if (nq-i > 0) // else if (nq-i > 0)
{ // {
hh_trafo_complex_kernel_1_SSE_2hv_double(&q[i], hh, nb, ldq, ldh, s); // hh_trafo_complex_kernel_1_SSE_2hv_double(&q[i], hh, nb, ldq, ldh, s);
} // }
#endif //#endif
} }
static __forceinline void hh_trafo_complex_kernel_4_SSE_2hv_double(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s) static __forceinline void hh_trafo_complex_kernel_4_SSE_2hv_double(double complex* q, double complex* hh, int nb, int ldq, int ldh, double complex s)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment