Commit fdd2cca4 authored by Andreas Marek's avatar Andreas Marek
Browse files

Error in real single precision AVX2 block4 kernel

parent 0a06af6a
......@@ -397,7 +397,7 @@ __forceinline void hh_trafo_kernel_24_AVX_4hv_single(float* q, float* hh, int nb
#ifdef __ELPA_USE_FMA__
y1 = _mm256_FMA_ps(q1, h2, y1);
y2 = _mm256_FMA_ps(q2, h2, y2);
// y3 = _mm256_FMA_ps(q3, h2, y3);
y3 = _mm256_FMA_ps(q3, h2, y3);
y1 = _mm256_add_ps(y1, _mm256_mul_ps(q1,h2));
y2 = _mm256_add_ps(y2, _mm256_mul_ps(q2,h2));
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment