Commit 1aa7170a authored by Andreas Marek's avatar Andreas Marek
Browse files

Introduce a AMD_GPU_INSTR constants

parent b0232235
......@@ -6,11 +6,12 @@
#define AVX2_INSTR 6
#define AVX512_INSTR 7
#define NVIDIA_INSTR 8
#define VSX_INSTR 9
#define ARCH64_INSTR 10
#define SPARC_INSTR 11
#define SVE128_INSTR 12
#define SVE256_INSTR 13
#define SVE512_INSTR 14
#define AMD_GPU_INSTR 9
#define VSX_INSTR 10
#define ARCH64_INSTR 11
#define SPARC_INSTR 12
#define SVE128_INSTR 13
#define SVE256_INSTR 15
#define SVE512_INSTR 15
#define NUMBER_OF_INSTR 15
#define NUMBER_OF_INSTR 16
......@@ -90,7 +90,8 @@ module simd_kernel
realKernels_to_simdTable(ELPA_2STAGE_REAL_SVE512_BLOCK2) = SVE512_INSTR
realKernels_to_simdTable(ELPA_2STAGE_REAL_SVE512_BLOCK4) = SVE512_INSTR
realKernels_to_simdTable(ELPA_2STAGE_REAL_SVE512_BLOCK6) = SVE512_INSTR
realKernels_to_simdTable(ELPA_2STAGE_REAL_GPU) = NVIDIA_INSTR
realKernels_to_simdTable(ELPA_2STAGE_REAL_NVIDIA_GPU) = NVIDIA_INSTR
realKernels_to_simdTable(ELPA_2STAGE_REAL_AND_GPU) = AMD_GPU_INSTR
realKernels_to_simdTable(ELPA_2STAGE_REAL_SPARC64_BLOCK2) = SPARC_INSTR
realKernels_to_simdTable(ELPA_2STAGE_REAL_SPARC64_BLOCK4) = SPARC_INSTR
realKernels_to_simdTable(ELPA_2STAGE_REAL_SPARC64_BLOCK6) = SPARC_INSTR
......@@ -123,7 +124,8 @@ module simd_kernel
simdTable_to_realKernels(AVX_INSTR) = ELPA_2STAGE_REAL_AVX_BLOCK2
simdTable_to_realKernels(AVX2_INSTR) = ELPA_2STAGE_REAL_AVX2_BLOCK2
simdTable_to_realKernels(AVX512_INSTR) = ELPA_2STAGE_REAL_AVX512_BLOCK2
simdTable_to_realKernels(NVIDIA_INSTR) = ELPA_2STAGE_REAL_GPU
simdTable_to_realKernels(NVIDIA_INSTR) = ELPA_2STAGE_REAL_NVIDIA_GPU
simdTable_to_realKernels(AMD_GPU_INSTR) = ELPA_2STAGE_REAL_AMD_GPU
simdTable_to_realKernels(SPARC_INSTR) = ELPA_2STAGE_REAL_SPARC64_BLOCK2
simdTable_to_realKernels(ARCH64_INSTR) = ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK2
simdTable_to_realKernels(VSX_INSTR) = ELPA_2STAGE_REAL_VSX_BLOCK2
......@@ -163,7 +165,8 @@ module simd_kernel
complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_SVE512_BLOCK2) = SVE512_INSTR
complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_NEON_ARCH64_BLOCK1) = ARCH64_INSTR
complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_NEON_ARCH64_BLOCK2) = ARCH64_INSTR
complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_GPU) = NVIDIA_INSTR
complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_NVIDIA_GPU) = NVIDIA_INSTR
complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_AMD_GPU) = AMD_GPU_INSTR
simd_set_index = complexKernels_to_simdTable(kernel)
......@@ -187,7 +190,8 @@ module simd_kernel
simdTable_to_complexKernels(SVE256_INSTR) = ELPA_2STAGE_COMPLEX_SVE256_BLOCK1
simdTable_to_complexKernels(SVE512_INSTR) = ELPA_2STAGE_COMPLEX_SVE512_BLOCK1
simdTable_to_complexKernels(ARCH64_INSTR) = ELPA_2STAGE_COMPLEX_NEON_ARCH64_BLOCK1
simdTable_to_complexKernels(NVIDIA_INSTR) = ELPA_2STAGE_COMPLEX_GPU
simdTable_to_complexKernels(NVIDIA_INSTR) = ELPA_2STAGE_COMPLEX_NVIDIA_GPU
simdTable_to_complexKernels(AMD_INSTR) = ELPA_2STAGE_COMPLEX_AMD_GPU
kernel = simdTable_to_complexKernels(simd_set_index)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment