Commit 565de5df authored by Lorenz Huedepohl's avatar Lorenz Huedepohl

Fix AVX512 complex kernels

There was an error in the strip_width calculation leading to wrong
results for certain values of 'nev'. This is still not properly
investigated, should be checked again.
parent 4e7dbc7b
...@@ -402,7 +402,7 @@ ...@@ -402,7 +402,7 @@
if (kernel .eq. ELPA_2STAGE_COMPLEX_AVX512_BLOCK1 .or. & if (kernel .eq. ELPA_2STAGE_COMPLEX_AVX512_BLOCK1 .or. &
kernel .eq. ELPA_2STAGE_COMPLEX_AVX512_BLOCK2) then kernel .eq. ELPA_2STAGE_COMPLEX_AVX512_BLOCK2) then
stripe_width = ((stripe_width+3)/4)*4 ! Must be a multiple of 4 because of AVX-512 memory alignment of 64 bytes stripe_width = ((stripe_width+7)/8)*8 ! Must be a multiple of 4 because of AVX-512 memory alignment of 64 bytes
! (4 * sizeof(double complex) == 64) ! (4 * sizeof(double complex) == 64)
else else
...@@ -504,7 +504,7 @@ ...@@ -504,7 +504,7 @@
if (kernel .eq. ELPA_2STAGE_COMPLEX_AVX512_BLOCK1 .or. & if (kernel .eq. ELPA_2STAGE_COMPLEX_AVX512_BLOCK1 .or. &
kernel .eq. ELPA_2STAGE_COMPLEX_AVX512_BLOCK2) then kernel .eq. ELPA_2STAGE_COMPLEX_AVX512_BLOCK2) then
stripe_width = ((stripe_width+3)/4)*4 ! Must be a multiple of 4 because of AVX-512 memory alignment of 64 bytes stripe_width = ((stripe_width+7)/8)*8 ! Must be a multiple of 4 because of AVX-512 memory alignment of 64 bytes
! (4 * sizeof(double complex) == 64) ! (4 * sizeof(double complex) == 64)
else else
...@@ -517,7 +517,7 @@ ...@@ -517,7 +517,7 @@
if (kernel .eq. ELPA_2STAGE_COMPLEX_AVX512_BLOCK1 .or. & if (kernel .eq. ELPA_2STAGE_COMPLEX_AVX512_BLOCK1 .or. &
kernel .eq. ELPA_2STAGE_COMPLEX_AVX512_BLOCK2) then kernel .eq. ELPA_2STAGE_COMPLEX_AVX512_BLOCK2) then
stripe_width = ((stripe_width+7)/8)*8 ! Must be a multiple of 8 because of AVX-512 memory alignment of 64 bytes stripe_width = ((stripe_width+15)/16)*16 ! Must be a multiple of 8 because of AVX-512 memory alignment of 64 bytes
! (8 * sizeof(float complex) == 64) ! (8 * sizeof(float complex) == 64)
else else
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment