Commit 95597586 authored by Andreas Marek's avatar Andreas Marek
Browse files

Define new VSE512 kernels

parent b40ac827
......@@ -825,6 +825,14 @@ m4_define(elpa_m4_avx512_kernels, [
complex_avx512_block2
])
m4_define(elpa_m4_vse512_kernels, [
real_vse512_block2
real_vse512_block4
real_vse512_block6
complex_vse512_block1
complex_vse512_block2
])
m4_define(elpa_m4_bgp_kernels, [
real_bgp
complex_bgp
......@@ -840,7 +848,7 @@ m4_define(elpa_m4_gpu_kernels, [
complex_gpu
])
m4_define(elpa_m4_kernel_types, [generic sparc64 neon_arch64 vsx sse sse_assembly avx avx2 avx512 bgp bgq gpu])
m4_define(elpa_m4_kernel_types, [generic sparc64 neon_arch64 vsx sse sse_assembly avx avx2 avx512 vse512 bgp bgq gpu])
m4_define(elpa_m4_all_kernels,
m4_foreach_w([elpa_m4_type],
......@@ -881,6 +889,7 @@ ELPA_SELECT_KERNELS([sse_assembly],[enable])
ELPA_SELECT_KERNELS([avx],[enable])
ELPA_SELECT_KERNELS([avx2],[enable])
ELPA_SELECT_KERNELS([avx512],[enable])
ELPA_SELECT_KERNELS([vse512],[disable])
ELPA_SELECT_KERNELS([gpu],[disable])
ELPA_SELECT_KERNELS([bgp],[disable])
ELPA_SELECT_KERNELS([bgq],[disable])
......@@ -890,7 +899,7 @@ m4_foreach_w([elpa_m4_kind],[real complex],[
])
if test x"${enable_bgp}" = x"yes" -o x"$enable_bgq" = x"yes"; then
m4_foreach_w([elpa_m4_kernel], elpa_m4_sparc64_kernels elpa_m4_neon_arch64_kernels elpa_m4_vsx_kernels elpa_m4_sse_kernels elpa_m4_avx_kernels elpa_m4_avx2_kernels elpa_m4_avx512_kernels, [
m4_foreach_w([elpa_m4_kernel], elpa_m4_sparc64_kernels elpa_m4_neon_arch64_kernels elpa_m4_vsx_kernels elpa_m4_sse_kernels elpa_m4_avx_kernels elpa_m4_avx2_kernels elpa_m4_avx512_kernels elpa_m4_vse512_kernels, [
if x"$use_[]elpa_m4_kernel[]" = x"yes" ; then
echo "Disabling elpa_m4_kernel due to BGP/BGQ option"
fi
......@@ -950,7 +959,7 @@ AC_DEFUN([ELPA_KERNEL_DEPENDS],[
])
fi
])
m4_foreach_w([elpa_m4_arch],[sparc64 neon_arch64 vsx sse avx avx2 avx512],[
m4_foreach_w([elpa_m4_arch],[sparc64 neon_arch64 vsx sse avx avx2 avx512 vse512],[
ELPA_KERNEL_DEPENDS([real_]elpa_m4_arch[_block6], [real_]elpa_m4_arch[_block4 real_]elpa_m4_arch[_block2])
ELPA_KERNEL_DEPENDS([real_]elpa_m4_arch[_block4], [real_]elpa_m4_arch[_block2])
ELPA_KERNEL_DEPENDS([complex_]elpa_m4_arch[_block2], [complex_]elpa_m4_arch[_block1])
......@@ -1008,7 +1017,7 @@ m4_foreach_w([elpa_m4_kind],[real complex],[
m4_foreach_w([elpa_m4_kind],[real complex],[
m4_foreach_w([elpa_m4_kernel],
m4_foreach_w([elpa_m4_cand_kernel],
elpa_m4_avx512_kernels elpa_m4_avx2_kernels elpa_m4_avx_kernels elpa_m4_sse_kernels elpa_m4_sse_assembly_kernels elpa_m4_sparc64_kernels elpa_m4_neon_arch64_kernels elpa_m4_vsx_kernels elpa_m4_generic_kernels elpa_m4_gpu_kernels,
elpa_m4_avx512_kernels elpa_m4_avx2_kernels elpa_m4_avx_kernels elpa_m4_sse_kernels elpa_m4_sse_assembly_kernels elpa_m4_vse512_kernels elpa_m4_sparc64_kernels elpa_m4_neon_arch64_kernels elpa_m4_vsx_kernels elpa_m4_generic_kernels elpa_m4_gpu_kernels,
[m4_bmatch(elpa_m4_cand_kernel,elpa_m4_kind,elpa_m4_cand_kernel)] ),
[
if test -z "$default_[]elpa_m4_kind[]_kernel"; then
......
......@@ -60,8 +60,11 @@ enum ELPA_SOLVERS {
X(ELPA_2STAGE_REAL_VSX_BLOCK2, 25, @ELPA_2STAGE_REAL_VSX_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK4, 26, @ELPA_2STAGE_REAL_VSX_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK6, 27, @ELPA_2STAGE_REAL_VSX_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4, 28, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6, 29, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6_COMPILED@, __VA_ARGS__)
X(ELPA_2STAGE_REAL_VSE512_BLOCK2, 28, @ELPA_2STAGE_REAL_VSE512_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSE512_BLOCK4, 29, @ELPA_2STAGE_REAL_VSE512_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSE512_BLOCK6, 30, @ELPA_2STAGE_REAL_VSE512_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4, 31, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6, 32, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6_COMPILED@, __VA_ARGS__)
#define ELPA_FOR_ALL_2STAGE_REAL_KERNELS_AND_DEFAULT(X) \
ELPA_FOR_ALL_2STAGE_REAL_KERNELS(X) \
......
......@@ -9,5 +9,6 @@
#define VSX_INSTR 9
#define ARCH64_INSTR 10
#define SPARC_INSTR 11
#define VSE512_INSTR 12
#define NUMBER_OF_INSTR 12
#define NUMBER_OF_INSTR 13
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment