Commit 6788de0b authored by Andreas Marek's avatar Andreas Marek
Browse files

Start to implement complex AVX-512 kernels

parent e36e3f50
......@@ -3156,7 +3156,7 @@
if (useGPU) then
stripe_width = 256
else
#ifdef DOUBLE_PRECISION_REAL
#ifdef DOUBLE_PRECISION_COMPLEX
stripe_width = 48 ! Must be a multiple of 2
#else
stripe_width = 48 ! Must be a multiple of 4
......@@ -3190,12 +3190,31 @@
#endif /* WITH_OPENMP */
if (.not.(useGPU)) then
#ifdef DOUBLE_PRECISION_REAL
stripe_width = ((stripe_width+1)/2)*2 ! Must be a multiple of 2 because of AVX/SSE memory alignment of 32 bytes
! (2 * sizeof(double complex) == 32)
#ifdef DOUBLE_PRECISION_COMPLEX
if (THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_AVX512_BLOCK1 .or. &
THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_AVX512_BLOCK2) then
stripe_width = ((stripe_width+1)/4)*4 ! Must be a multiple of 4 because of AVX-512 memory alignment of 64 bytes
! (4 * sizeof(double complex) == 64)
else
stripe_width = ((stripe_width+1)/2)*2 ! Must be a multiple of 2 because of AVX/SSE memory alignment of 32 bytes
! (2 * sizeof(double complex) == 32)
endif
#else
stripe_width = ((stripe_width+3)/4)*4 ! Must be a multiple of 4 because of AVX/SSE memory alignment of 32 bytes
! (4 * sizeof(float complex) == 32)
if (THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_AVX512_BLOCK1 .or. &
THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_AVX512_BLOCK2) then
stripe_width = ((stripe_width+3)/8)*8 ! Must be a multiple of 8 because of AVX-512 memory alignment of 64 bytes
! (8 * sizeof(float complex) == 64)
else
stripe_width = ((stripe_width+3)/4)*4 ! Must be a multiple of 4 because of AVX/SSE memory alignment of 32 bytes
! (4 * sizeof(float complex) == 32)
endif
#endif
endif
#ifndef WITH_OPENMP
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment