Commit 0278642d authored by Andreas Marek's avatar Andreas Marek

Fix error in AVX512 real Block 4 and 6 kernels

parent e5d03b59
......@@ -227,28 +227,28 @@ void quad_hh_trafo_real_avx512_4hv_single(float* q, float* hh, int* pnb, int* pn
{
hh_trafo_kernel_24_AVX512_4hv_double(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
}
#endif
#ifdef DOUBLE_PRECISION_REAL
if (nq-i == 16)
{
else
{
if (nq-i == 16)
{
hh_trafo_kernel_16_AVX512_4hv_double(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
}
}
else
{
hh_trafo_kernel_8_AVX512_4hv_double(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
}
}
#endif
#ifdef SINGLE_PRECISION_REAL
if (nq-i == 32)
{
hh_trafo_kernel_32_AVX_4hv_single(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
}
#endif
else
#ifdef DOUBLE_PRECISION_REAL
{
hh_trafo_kernel_8_AVX512_4hv_double(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
}
#endif
#ifdef SINGLE_PRECISION_REAL
else
{
hh_trafo_kernel_16_AVX_4hv_single(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
hh_trafo_kernel_16_AVX_4hv_single(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
}
#endif
}
......
......@@ -47,6 +47,7 @@
#include "config-f90.h"
#include <x86intrin.h>
#include <stdio.h>
#define __forceinline __attribute__((always_inline)) static
......@@ -289,23 +290,25 @@ void hexa_hh_trafo_real_avx512_6hv_single(float* q, float* hh, int* pnb, int* pn
{
hh_trafo_kernel_24_AVX512_6hv_double(&q[i], hh, nb, ldq, ldh, scalarprods);
}
if (nq-i == 16)
{
hh_trafo_kernel_16_AVX512_6hv_double(&q[i], hh, nb, ldq, ldh, scalarprods);
}
#endif
else
{
#ifdef DOUBLE_PRECISION_REAL
hh_trafo_kernel_8_AVX512_6hv_double(&q[i], hh, nb, ldq, ldh, scalarprods);
{
if (nq-i == 16)
{
hh_trafo_kernel_16_AVX512_6hv_double(&q[i], hh, nb, ldq, ldh, scalarprods);
}
else
{
hh_trafo_kernel_8_AVX512_6hv_double(&q[i], hh, nb, ldq, ldh, scalarprods);
}
}
#endif
#ifdef SINGLE_PRECISION_REAL
if (nq-i == 16) {
hh_trafo_kernel_16_AVX512_6hv_single(&q[i], hh, nb, ldq, ldh, scalarprods);
} else {
printf("ERROR in avx512 kernel\n");
}
#endif
}
}
/**
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment