Commit 0278642d authored by Andreas Marek's avatar Andreas Marek
Browse files

Fix error in AVX512 real Block 4 and 6 kernels

parent e5d03b59
...@@ -227,28 +227,28 @@ void quad_hh_trafo_real_avx512_4hv_single(float* q, float* hh, int* pnb, int* pn ...@@ -227,28 +227,28 @@ void quad_hh_trafo_real_avx512_4hv_single(float* q, float* hh, int* pnb, int* pn
{ {
hh_trafo_kernel_24_AVX512_4hv_double(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4); hh_trafo_kernel_24_AVX512_4hv_double(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
} }
#endif
#ifdef DOUBLE_PRECISION_REAL else
if (nq-i == 16) {
{ if (nq-i == 16)
{
hh_trafo_kernel_16_AVX512_4hv_double(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4); hh_trafo_kernel_16_AVX512_4hv_double(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
} }
else
{
hh_trafo_kernel_8_AVX512_4hv_double(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
}
}
#endif #endif
#ifdef SINGLE_PRECISION_REAL #ifdef SINGLE_PRECISION_REAL
if (nq-i == 32) if (nq-i == 32)
{ {
hh_trafo_kernel_32_AVX_4hv_single(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4); hh_trafo_kernel_32_AVX_4hv_single(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
} }
#endif else
else
#ifdef DOUBLE_PRECISION_REAL
{
hh_trafo_kernel_8_AVX512_4hv_double(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
}
#endif
#ifdef SINGLE_PRECISION_REAL
{ {
hh_trafo_kernel_16_AVX_4hv_single(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4); hh_trafo_kernel_16_AVX_4hv_single(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
} }
#endif #endif
} }
......
...@@ -47,6 +47,7 @@ ...@@ -47,6 +47,7 @@
#include "config-f90.h" #include "config-f90.h"
#include <x86intrin.h> #include <x86intrin.h>
#include <stdio.h>
#define __forceinline __attribute__((always_inline)) static #define __forceinline __attribute__((always_inline)) static
...@@ -289,23 +290,25 @@ void hexa_hh_trafo_real_avx512_6hv_single(float* q, float* hh, int* pnb, int* pn ...@@ -289,23 +290,25 @@ void hexa_hh_trafo_real_avx512_6hv_single(float* q, float* hh, int* pnb, int* pn
{ {
hh_trafo_kernel_24_AVX512_6hv_double(&q[i], hh, nb, ldq, ldh, scalarprods); hh_trafo_kernel_24_AVX512_6hv_double(&q[i], hh, nb, ldq, ldh, scalarprods);
} }
if (nq-i == 16)
{
hh_trafo_kernel_16_AVX512_6hv_double(&q[i], hh, nb, ldq, ldh, scalarprods);
}
#endif
else else
{ {
#ifdef DOUBLE_PRECISION_REAL if (nq-i == 16)
{
hh_trafo_kernel_8_AVX512_6hv_double(&q[i], hh, nb, ldq, ldh, scalarprods); hh_trafo_kernel_16_AVX512_6hv_double(&q[i], hh, nb, ldq, ldh, scalarprods);
}
else
{
hh_trafo_kernel_8_AVX512_6hv_double(&q[i], hh, nb, ldq, ldh, scalarprods);
}
}
#endif #endif
#ifdef SINGLE_PRECISION_REAL #ifdef SINGLE_PRECISION_REAL
if (nq-i == 16) {
hh_trafo_kernel_16_AVX512_6hv_single(&q[i], hh, nb, ldq, ldh, scalarprods); hh_trafo_kernel_16_AVX512_6hv_single(&q[i], hh, nb, ldq, ldh, scalarprods);
} else {
printf("ERROR in avx512 kernel\n");
}
#endif #endif
}
} }
/** /**
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment