Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
On Thursday, 7th July from 1 to 3 pm there will be a maintenance with a short downtime of GitLab.
Open sidebar
elpa
elpa
Commits
0278642d
Commit
0278642d
authored
Jul 31, 2017
by
Andreas Marek
Browse files
Fix error in AVX512 real Block 4 and 6 kernels
parent
e5d03b59
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
src/elpa2/kernels/real_avx512_4hv_template.Xc
View file @
0278642d
...
...
@@ -227,28 +227,28 @@ void quad_hh_trafo_real_avx512_4hv_single(float* q, float* hh, int* pnb, int* pn
{
hh_trafo_kernel_24_AVX512_4hv_double(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
}
#endif
#ifdef DOUBLE_PRECISION_REAL
if (nq-i == 16)
{
else
{
if (nq-i == 16)
{
hh_trafo_kernel_16_AVX512_4hv_double(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
}
}
else
{
hh_trafo_kernel_8_AVX512_4hv_double(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
}
}
#endif
#ifdef SINGLE_PRECISION_REAL
if (nq-i == 32)
{
hh_trafo_kernel_32_AVX_4hv_single(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
}
#endif
else
#ifdef DOUBLE_PRECISION_REAL
{
hh_trafo_kernel_8_AVX512_4hv_double(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
}
#endif
#ifdef SINGLE_PRECISION_REAL
else
{
hh_trafo_kernel_16_AVX_4hv_single(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
hh_trafo_kernel_16_AVX_4hv_single(&q[i], hh, nb, ldq, ldh, s_1_2, s_1_3, s_2_3, s_1_4, s_2_4, s_3_4);
}
#endif
}
...
...
src/elpa2/kernels/real_avx512_6hv_template.Xc
View file @
0278642d
...
...
@@ -47,6 +47,7 @@
#include "config-f90.h"
#include <x86intrin.h>
#include <stdio.h>
#define __forceinline __attribute__((always_inline)) static
...
...
@@ -289,23 +290,25 @@ void hexa_hh_trafo_real_avx512_6hv_single(float* q, float* hh, int* pnb, int* pn
{
hh_trafo_kernel_24_AVX512_6hv_double(&q[i], hh, nb, ldq, ldh, scalarprods);
}
if (nq-i == 16)
{
hh_trafo_kernel_16_AVX512_6hv_double(&q[i], hh, nb, ldq, ldh, scalarprods);
}
#endif
else
{
#ifdef DOUBLE_PRECISION_REAL
hh_trafo_kernel_8_AVX512_6hv_double(&q[i], hh, nb, ldq, ldh, scalarprods);
{
if (nq-i == 16)
{
hh_trafo_kernel_16_AVX512_6hv_double(&q[i], hh, nb, ldq, ldh, scalarprods);
}
else
{
hh_trafo_kernel_8_AVX512_6hv_double(&q[i], hh, nb, ldq, ldh, scalarprods);
}
}
#endif
#ifdef SINGLE_PRECISION_REAL
if (nq-i == 16) {
hh_trafo_kernel_16_AVX512_6hv_single(&q[i], hh, nb, ldq, ldh, scalarprods);
} else {
printf("ERROR in avx512 kernel\n");
}
#endif
}
}
/**
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment