Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
b40c7d47
Commit
b40c7d47
authored
Mar 08, 2021
by
Andreas Marek
Browse files
Merge branch 'ELPA_ROCm' into oneAPI
parents
ee776ac8
9407142d
Changes
12
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
.gitlab-ci.yml
View file @
b40c7d47
This diff is collapsed.
Click to expand it.
Makefile.am
View file @
b40c7d47
...
...
@@ -789,6 +789,7 @@ CLEANFILES = \
clean-local
:
-
rm
-rf
modules/
*
private_modules/
*
test_modules/
*
.fortran_dependencies/
*
-
rm
-rf
validate_
*
.sh
-
rm
-rf
test_
*
.sh
-
rm
-rf
real_2stage
*
.sh
-
rm
-rf
complex_2stage
*
.sh
-
rm
-rf
single_complex_2stage
*
.sh
...
...
ci_test_scripts/generate_gitlab_ci_tests.py
View file @
b40c7d47
...
...
@@ -346,7 +346,7 @@ ilp64_no_omp_tests = [
'SCALAPACK_LDFLAGS=
\\
"$MKL_GFORTRAN_SCALAPACK_LDFLAGS_NOMPI_NOOMP_ILP64
\\
" '
'SCALAPACK_FCFLAGS=
\\
"$MKL_GFORTRAN_SCALAPACK_FCFLAGS_NOMPI_NOOMP_ILP64
\\
" '
'--enable-option-checking=fatal --with-mpi=no --disable-openmp '
'--disable-gpu --enable-avx --disable-avx2 --disable-avx512 --enable-64bit-integer-math-support || { cat config.log; exit 1; }'
'--disable-
nvidia-
gpu --enable-avx --disable-avx2 --disable-avx512 --enable-64bit-integer-math-support || { cat config.log; exit 1; }'
'" -j 8 -t $MPI_TASKS -m $MATRIX_SIZE -n $NUMBER_OF_EIGENVECTORS -b $BLOCK_SIZE '
'-s $SKIP_STEP -i $INTERACTIVE_RUN -S $SLURM'
,
"
\n
"
,
...
...
@@ -364,7 +364,7 @@ ilp64_no_omp_tests = [
'SCALAPACK_LDFLAGS=
\\
"$MKL_GFORTRAN_SCALAPACK_LDFLAGS_NOMPI_OMP_ILP64
\\
" '
'SCALAPACK_FCFLAGS=
\\
"$MKL_GFORTRAN_SCALAPACK_FCFLAGS_NOMPI_OMP_ILP64
\\
" '
'--enable-option-checking=fatal --with-mpi=no --enable-openmp '
'--disable-gpu --enable-avx --disable-avx2 --disable-avx512 --enable-64bit-integer-math-support || { cat config.log; exit 1; }'
'--disable-
nvidia-
gpu --enable-avx --disable-avx2 --disable-avx512 --enable-64bit-integer-math-support || { cat config.log; exit 1; }'
'" -j 8 -t $MPI_TASKS -m $MATRIX_SIZE -n $NUMBER_OF_EIGENVECTORS -b $BLOCK_SIZE '
'-s $SKIP_STEP -i $INTERACTIVE_RUN -S $SLURM'
,
"
\n
"
,
...
...
@@ -388,7 +388,7 @@ ilp64_no_omp_mpi_tests = [
'SCALAPACK_LDFLAGS=
\\
"$MKL_GFORTRAN_SCALAPACK_LDFLAGS_MPI_NOOMP_ILP64
\\
" '
'SCALAPACK_FCFLAGS=
\\
"$MKL_GFORTRAN_SCALAPACK_FCFLAGS_MPI_NOOMP_ILP64
\\
" '
'--enable-option-checking=fatal --with-mpi=yes --disable-openmp '
'--disable-gpu --enable-avx --disable-avx2 --disable-avx512 --enable-64bit-integer-math-support || { cat config.log; exit 1; }'
'--disable-
nvidia-
gpu --enable-avx --disable-avx2 --disable-avx512 --enable-64bit-integer-math-support || { cat config.log; exit 1; }'
'" -j 8 -t $MPI_TASKS -m $MATRIX_SIZE -n $NUMBER_OF_EIGENVECTORS -b $BLOCK_SIZE '
'-s $SKIP_STEP -i $INTERACTIVE_RUN -S $SLURM'
,
"
\n
"
,
...
...
@@ -406,7 +406,7 @@ ilp64_no_omp_mpi_tests = [
'SCALAPACK_LDFLAGS=
\\
"$MKL_GFORTRAN_SCALAPACK_LDFLAGS_MPI_OMP_ILP64
\\
" '
'SCALAPACK_FCFLAGS=
\\
"$MKL_GFORTRAN_SCALAPACK_FCFLAGS_MPI_OMP_ILP64
\\
" '
'--enable-option-checking=fatal --with-mpi=yes --enable-openmp '
'--disable-gpu --enable-avx --disable-avx2 --disable-avx512 --enable-64bit-integer-math-support || { cat config.log; exit 1; }'
'--disable-
nvidia-
gpu --enable-avx --disable-avx2 --disable-avx512 --enable-64bit-integer-math-support || { cat config.log; exit 1; }'
'" -j 8 -t $MPI_TASKS -m $MATRIX_SIZE -n $NUMBER_OF_EIGENVECTORS -b $BLOCK_SIZE '
'-s $SKIP_STEP -i $INTERACTIVE_RUN -S $SLURM'
,
"
\n
"
,
...
...
@@ -430,7 +430,7 @@ matrix_redistribute_mpi_tests = [
'SCALAPACK_LDFLAGS=
\\
"$MKL_GFORTRAN_SCALAPACK_LDFLAGS_MPI_NO_OMP
\\
" '
'SCALAPACK_FCFLAGS=
\\
"$MKL_GFORTRAN_SCALAPACK_LDFLAGS_MPI_NO_OMP
\\
" '
'--enable-option-checking=fatal --with-mpi=yes --disable-openmp '
'--disable-gpu --enable-avx --disable-avx2 --disable-avx512 --enable-scalapack-tests --enable-autotune-redistribute-matrix || { cat config.log; exit 1; }'
'--disable-
nvidia-
gpu --enable-avx --disable-avx2 --disable-avx512 --enable-scalapack-tests --enable-autotune-redistribute-matrix || { cat config.log; exit 1; }'
'" -j 8 -t $MPI_TASKS -m $MATRIX_SIZE -n $NUMBER_OF_EIGENVECTORS -b $BLOCK_SIZE '
'-s $SKIP_STEP -i $INTERACTIVE_RUN -S $SLURM'
,
"
\n
"
,
...
...
@@ -448,7 +448,7 @@ matrix_redistribute_mpi_tests = [
'SCALAPACK_LDFLAGS=
\\
"$MKL_GFORTRAN_SCALAPACK_LDFLAGS_MPI_OMP
\\
" '
'SCALAPACK_FCFLAGS=
\\
"$MKL_GFORTRAN_SCALAPACK_FCFLAGS_MPI_OMP
\\
" '
'--enable-option-checking=fatal --with-mpi=yes --enable-openmp '
'--disable-gpu --enable-avx --disable-avx2 --disable-avx512 --enable-scalapack-tests --enable-autotune-redistribute-matrix || { cat config.log; exit 1; }'
'--disable-
nvidia-
gpu --enable-avx --disable-avx2 --disable-avx512 --enable-scalapack-tests --enable-autotune-redistribute-matrix || { cat config.log; exit 1; }'
'" -j 8 -t $MPI_TASKS -m $MATRIX_SIZE -n $NUMBER_OF_EIGENVECTORS -b $BLOCK_SIZE '
'-s $SKIP_STEP -i $INTERACTIVE_RUN -S $SLURM'
,
"
\n
"
,
...
...
@@ -472,7 +472,7 @@ python_ci_tests = [
'SCALAPACK_LDFLAGS=
\\
"$MKL_ANACONDA_INTEL_SCALAPACK_LDFLAGS_MPI_OMP
\\
" '
'SCALAPACK_FCFLAGS=
\\
"$MKL_ANACONDA_INTEL_SCALAPACK_FCFLAGS_MPI_OMP
\\
" '
'--enable-option-checking=fatal --with-mpi=yes --enable-openmp '
'--disable-gpu --enable-avx --enable-python --enable-python-tests || { cat config.log; exit 1; }'
'--disable-
nvidia-
gpu --enable-avx --enable-python --enable-python-tests || { cat config.log; exit 1; }'
'" -j 8 -t $MPI_TASKS -m $MATRIX_SIZE -n $NUMBER_OF_EIGENVECTORS -b $BLOCK_SIZE '
'-s $SKIP_STEP -i $INTERACTIVE_RUN -S $SLURM'
,
"
\n
"
,
...
...
@@ -486,7 +486,7 @@ python_ci_tests = [
'SCALAPACK_LDFLAGS=
\\\"
$MKL_ANACONDA_INTEL_SCALAPACK_LDFLAGS_MPI_OMP
\\\"
'
'SCALAPACK_FCFLAGS=
\\\"
$MKL_ANACONDA_INTEL_SCALAPACK_FCFLAGS_MPI_OMP
\\\"
'
'--enable-option-checking=fatal --with-mpi=yes --enable-openmp '
'--disable-gpu --enable-avx --enable-python --enable-python-tests || { cat config.log; exit 1; }'
'--disable-
nvidia-
gpu --enable-avx --enable-python --enable-python-tests || { cat config.log; exit 1; }'
'" -j 8 -t $MPI_TASKS -m 150 -n 50 -b 16 '
'-s $SKIP_STEP -i $INTERACTIVE_RUN -S $SLURM'
,
"
\n
"
,
...
...
@@ -496,13 +496,13 @@ python_ci_tests = [
'SCALAPACK_LDFLAGS=
\\\"
$MKL_ANACONDA_INTEL_SCALAPACK_LDFLAGS_MPI_OMP
\\\"
'
'SCALAPACK_FCFLAGS=
\\\"
$MKL_ANACONDA_INTEL_SCALAPACK_FCFLAGS_MPI_OMP
\\\"
'
'--enable-option-checking=fatal --with-mpi=yes --enable-openmp '
'--disable-gpu --enable-avx --enable-python --enable-python-tests "'
'--disable-
nvidia-
gpu --enable-avx --enable-python --enable-python-tests "'
'-d " CC=
\\\\\\\"
mpiicc
\\\\\\\"
CFLAGS=
\\\\\\\"
-O3 -xAVX
\\\\\\\"
'
'FC=
\\\\\\\"
mpiifort
\\\\\\\"
FCFLAGS=
\\\\\\\"
-O3 -xAVX
\\\\\\\"
'
'SCALAPACK_LDFLAGS=
\\\"
$MKL_ANACONDA_INTEL_SCALAPACK_LDFLAGS_MPI_OMP
\\\"
'
'SCALAPACK_FCFLAGS=
\\\"
$MKL_ANACONDA_INTEL_SCALAPACK_FCFLAGS_MPI_OMP
\\\"
'
'--enable-option-checking=fatal --with-mpi=yes --enable-openmp '
'--disable-gpu --enable-avx --enable-python --enable-python-tests'
'--disable-
nvidia-
gpu --enable-avx --enable-python --enable-python-tests'
'" -t $MPI_TASKS -m 150 -n 50 -b 16 -S $SLURM || { chmod u+rwX -R . ; exit 1 ; }'
,
"
\n
"
,
]
...
...
@@ -597,8 +597,8 @@ band_to_full_blocking = {
}
gpu
=
{
"no-gpu"
:
"--disable-gpu"
,
"with-gpu"
:
"--enable-gpu --with-cuda-path=
\\
$CUDA_HOME/"
,
"no-gpu"
:
"--disable-
nvidia-
gpu"
,
"with-gpu"
:
"--enable-
nvidia-
gpu --with-cuda-path=
\\
$CUDA_HOME/"
,
}
...
...
elpa/elpa_constants.h.in
View file @
b40c7d47
...
...
@@ -52,26 +52,27 @@ enum ELPA_SOLVERS {
X(ELPA_2STAGE_REAL_AVX512_BLOCK6, 17, @ELPA_2STAGE_REAL_AVX512_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_NVIDIA_GPU, 18, @ELPA_2STAGE_REAL_NVIDIA_GPU_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_AMD_GPU, 19, @ELPA_2STAGE_REAL_AMD_GPU_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SPARC64_BLOCK2, 20, @ELPA_2STAGE_REAL_SPARC64_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SPARC64_BLOCK4, 21, @ELPA_2STAGE_REAL_SPARC64_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SPARC64_BLOCK6, 22, @ELPA_2STAGE_REAL_SPARC64_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK2, 23, @ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK4, 24, @ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK6, 25, @ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK2, 26, @ELPA_2STAGE_REAL_VSX_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK4, 27, @ELPA_2STAGE_REAL_VSX_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK6, 28, @ELPA_2STAGE_REAL_VSX_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SVE128_BLOCK2, 29, @ELPA_2STAGE_REAL_SVE128_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SVE128_BLOCK4, 30, @ELPA_2STAGE_REAL_SVE128_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SVE128_BLOCK6, 31, @ELPA_2STAGE_REAL_SVE128_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SVE256_BLOCK2, 32, @ELPA_2STAGE_REAL_SVE256_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SVE256_BLOCK4, 33, @ELPA_2STAGE_REAL_SVE256_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SVE256_BLOCK6, 34, @ELPA_2STAGE_REAL_SVE256_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SVE512_BLOCK2, 35, @ELPA_2STAGE_REAL_SVE512_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SVE512_BLOCK4, 36, @ELPA_2STAGE_REAL_SVE512_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SVE512_BLOCK6, 37, @ELPA_2STAGE_REAL_SVE512_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4, 38, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6, 39, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6_COMPILED@, __VA_ARGS__)
X(ELPA_2STAGE_REAL_INTEL_GPU, 20, @ELPA_2STAGE_REAL_INTEL_GPU_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SPARC64_BLOCK2, 21, @ELPA_2STAGE_REAL_SPARC64_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SPARC64_BLOCK4, 22, @ELPA_2STAGE_REAL_SPARC64_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SPARC64_BLOCK6, 23, @ELPA_2STAGE_REAL_SPARC64_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK2, 24, @ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK4, 25, @ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK6, 26, @ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK2, 27, @ELPA_2STAGE_REAL_VSX_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK4, 28, @ELPA_2STAGE_REAL_VSX_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK6, 29, @ELPA_2STAGE_REAL_VSX_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SVE128_BLOCK2, 30, @ELPA_2STAGE_REAL_SVE128_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SVE128_BLOCK4, 31, @ELPA_2STAGE_REAL_SVE128_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SVE128_BLOCK6, 32, @ELPA_2STAGE_REAL_SVE128_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SVE256_BLOCK2, 33, @ELPA_2STAGE_REAL_SVE256_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SVE256_BLOCK4, 34, @ELPA_2STAGE_REAL_SVE256_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SVE256_BLOCK6, 35, @ELPA_2STAGE_REAL_SVE256_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SVE512_BLOCK2, 36, @ELPA_2STAGE_REAL_SVE512_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SVE512_BLOCK4, 37, @ELPA_2STAGE_REAL_SVE512_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SVE512_BLOCK6, 38, @ELPA_2STAGE_REAL_SVE512_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4, 39, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6, 40, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6_COMPILED@, __VA_ARGS__)
#define ELPA_FOR_ALL_2STAGE_REAL_KERNELS_AND_DEFAULT(X) \
ELPA_FOR_ALL_2STAGE_REAL_KERNELS(X) \
...
...
@@ -106,7 +107,8 @@ enum ELPA_REAL_KERNELS {
X(ELPA_2STAGE_COMPLEX_NEON_ARCH64_BLOCK1, 20, @ELPA_2STAGE_COMPLEX_NEON_ARCH64_BLOCK1_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_COMPLEX_NEON_ARCH64_BLOCK2, 21, @ELPA_2STAGE_COMPLEX_NEON_ARCH64_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_COMPLEX_NVIDIA_GPU, 22, @ELPA_2STAGE_COMPLEX_NVIDIA_GPU_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_COMPLEX_AMD_GPU, 23, @ELPA_2STAGE_COMPLEX_AMD_GPU_COMPILED@, __VA_ARGS__)
X(ELPA_2STAGE_COMPLEX_AMD_GPU, 23, @ELPA_2STAGE_COMPLEX_AMD_GPU_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_COMPLEX_INTEL_GPU, 24, @ELPA_2STAGE_COMPLEX_INTEL_GPU_COMPILED@, __VA_ARGS__)
#define ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS_AND_DEFAULT(X) \
ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(X) \
...
...
elpa/elpa_simd_constants.h
View file @
b40c7d47
...
...
@@ -6,11 +6,13 @@
#define AVX2_INSTR 6
#define AVX512_INSTR 7
#define NVIDIA_INSTR 8
#define VSX_INSTR 9
#define ARCH64_INSTR 10
#define SPARC_INSTR 11
#define SVE128_INSTR 12
#define SVE256_INSTR 13
#define SVE512_INSTR 14
#define AMD_GPU_INSTR 9
#define INTEL_GPU_INSTR 10
#define VSX_INSTR 11
#define ARCH64_INSTR 12
#define SPARC_INSTR 13
#define SVE128_INSTR 14
#define SVE256_INSTR 15
#define SVE512_INSTR 16
#define NUMBER_OF_INSTR 1
5
#define NUMBER_OF_INSTR 1
7
generate_automake_test_programs.py
View file @
b40c7d47
...
...
@@ -21,9 +21,10 @@ solver_flag = {
"scalapack_part"
:
"-DTEST_SCALAPACK_PART"
,
}
gpu_flag
=
{
"GPU_OFF"
:
"-DTEST_NVIDIA_GPU=0 -DTEST_INTEL_GPU=0"
,
"GPU_OFF"
:
"-DTEST_NVIDIA_GPU=0 -DTEST_INTEL_GPU=0
-DTEST_AMD_GPU=0
"
,
"NVIDIA_GPU_ON"
:
"-DTEST_NVIDIA_GPU=1"
,
"INTEL_GPU_ON"
:
"-DTEST_INTEL_GPU=1"
,
"AMD_GPU_ON"
:
"-DTEST_AMD_GPU=1"
,
}
gpu_id_flag
=
{
0
:
"-DTEST_GPU_SET_ID=0"
,
...
...
@@ -87,14 +88,14 @@ for lang, m, g, gid, q, t, p, d, s, lay, spl in product(sorted(language_flag.key
# exclude some test combinations
# analytic tests only for "eigenvectors" and not on GPU
if
(
m
==
"analytic"
and
(
g
==
"NVIDIA_GPU_ON"
or
g
==
"INTEL_GPU_ON"
or
t
!=
"eigenvectors"
)):
if
(
m
==
"analytic"
and
(
g
==
"NVIDIA_GPU_ON"
or
g
==
"INTEL_GPU_ON"
or
g
==
"AMD_GPU_ON"
or
t
!=
"eigenvectors"
)):
continue
# Frank tests only for "eigenvectors" and eigenvalues and real double precision case
if
(
m
==
"frank"
and
((
t
!=
"eigenvectors"
or
t
!=
"eigenvalues"
)
and
(
d
!=
"real"
or
p
!=
"double"
))):
continue
if
(
s
in
[
"scalapack_all"
,
"scalapack_part"
]
and
(
g
==
"NVIDIA_GPU_ON"
or
g
==
"INTEL_GPU_ON"
or
t
!=
"eigenvectors"
or
m
!=
"analytic"
)):
if
(
s
in
[
"scalapack_all"
,
"scalapack_part"
]
and
(
g
==
"NVIDIA_GPU_ON"
or
g
==
"INTEL_GPU_ON"
or
g
==
"AMD_GPU_ON"
or
t
!=
"eigenvectors"
or
m
!=
"analytic"
)):
continue
# do not test single-precision scalapack
...
...
@@ -128,7 +129,7 @@ for lang, m, g, gid, q, t, p, d, s, lay, spl in product(sorted(language_flag.key
continue
# qr only for 2stage real
if
(
q
==
1
and
(
s
!=
"2stage"
or
d
!=
"real"
or
t
!=
"eigenvectors"
or
g
==
"NVIDIA_GPU_ON"
or
"INTEL_GPU_ON"
or
m
!=
"random"
)):
if
(
q
==
1
and
(
s
!=
"2stage"
or
d
!=
"real"
or
t
!=
"eigenvectors"
or
g
==
"NVIDIA_GPU_ON"
or
"INTEL_GPU_ON"
or
g
==
"AMD_GPU_ON"
or
m
!=
"random"
)):
continue
if
(
spl
==
"myself"
and
(
d
!=
"real"
or
p
!=
"double"
or
q
!=
0
or
m
!=
"random"
or
(
t
!=
"eigenvectors"
and
t
!=
"cholesky"
)
or
lang
!=
"Fortran"
or
lay
!=
"square"
)):
...
...
@@ -163,6 +164,10 @@ for lang, m, g, gid, q, t, p, d, s, lay, spl in product(sorted(language_flag.key
print
(
"if WITH_INTEL_GPU_VERSION"
)
endifs
+=
1
if
(
g
==
"AMD_GPU_ON"
):
print
(
"if WITH_AMD_GPU_VERSION"
)
endifs
+=
1
if
(
lay
==
"all_layouts"
):
print
(
"if WITH_MPI"
)
endifs
+=
1
...
...
@@ -195,7 +200,7 @@ for lang, m, g, gid, q, t, p, d, s, lay, spl in product(sorted(language_flag.key
langsuffix
=
language_flag
[
lang
],
d
=
d
,
p
=
p
,
t
=
t
,
s
=
s
,
kernelsuffix
=
""
if
kernel
==
"nokernel"
else
"_"
+
kernel
,
gpusuffix
=
"gpu_"
if
(
g
==
"NVIDIA_GPU_ON"
or
g
==
"INTEL_GPU_ON"
)
else
""
,
gpusuffix
=
"gpu_"
if
(
g
==
"NVIDIA_GPU_ON"
or
g
==
"INTEL_GPU_ON"
or
g
==
"AMD_GPU_ON"
)
else
""
,
gpuidsuffix
=
"set_gpu_id_"
if
gid
else
""
,
qrsuffix
=
"qr_"
if
q
else
""
,
m
=
m
,
...
...
src/helpers/mod_simd_kernel.F90
View file @
b40c7d47
...
...
@@ -91,6 +91,8 @@ module simd_kernel
realKernels_to_simdTable
(
ELPA_2STAGE_REAL_SVE512_BLOCK4
)
=
SVE512_INSTR
realKernels_to_simdTable
(
ELPA_2STAGE_REAL_SVE512_BLOCK6
)
=
SVE512_INSTR
realKernels_to_simdTable
(
ELPA_2STAGE_REAL_NVIDIA_GPU
)
=
NVIDIA_INSTR
realKernels_to_simdTable
(
ELPA_2STAGE_REAL_AMD_GPU
)
=
AMD_GPU_INSTR
realKernels_to_simdTable
(
ELPA_2STAGE_REAL_INTEL_GPU
)
=
INTEL_GPU_INSTR
realKernels_to_simdTable
(
ELPA_2STAGE_REAL_SPARC64_BLOCK2
)
=
SPARC_INSTR
realKernels_to_simdTable
(
ELPA_2STAGE_REAL_SPARC64_BLOCK4
)
=
SPARC_INSTR
realKernels_to_simdTable
(
ELPA_2STAGE_REAL_SPARC64_BLOCK6
)
=
SPARC_INSTR
...
...
@@ -117,19 +119,21 @@ module simd_kernel
integer
(
kind
=
c_int
)
::
kernel
integer
(
kind
=
c_int
),
intent
(
in
)
::
simd_set_index
simdTable_to_realKernels
(
GENERIC_INSTR
)
=
ELPA_2STAGE_REAL_GENERIC
simdTable_to_realKernels
(
BLUEGENE_INSTR
)
=
ELPA_2STAGE_REAL_BGP
simdTable_to_realKernels
(
SSE_INSTR
)
=
ELPA_2STAGE_REAL_SSE_BLOCK2
simdTable_to_realKernels
(
AVX_INSTR
)
=
ELPA_2STAGE_REAL_AVX_BLOCK2
simdTable_to_realKernels
(
AVX2_INSTR
)
=
ELPA_2STAGE_REAL_AVX2_BLOCK2
simdTable_to_realKernels
(
AVX512_INSTR
)
=
ELPA_2STAGE_REAL_AVX512_BLOCK2
simdTable_to_realKernels
(
NVIDIA_INSTR
)
=
ELPA_2STAGE_REAL_NVIDIA_GPU
simdTable_to_realKernels
(
SPARC_INSTR
)
=
ELPA_2STAGE_REAL_SPARC64_BLOCK2
simdTable_to_realKernels
(
ARCH64_INSTR
)
=
ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK2
simdTable_to_realKernels
(
VSX_INSTR
)
=
ELPA_2STAGE_REAL_VSX_BLOCK2
simdTable_to_realKernels
(
SVE128_INSTR
)
=
ELPA_2STAGE_REAL_SVE128_BLOCK2
simdTable_to_realKernels
(
SVE256_INSTR
)
=
ELPA_2STAGE_REAL_SVE256_BLOCK2
simdTable_to_realKernels
(
SVE512_INSTR
)
=
ELPA_2STAGE_REAL_SVE512_BLOCK2
simdTable_to_realKernels
(
GENERIC_INSTR
)
=
ELPA_2STAGE_REAL_GENERIC
simdTable_to_realKernels
(
BLUEGENE_INSTR
)
=
ELPA_2STAGE_REAL_BGP
simdTable_to_realKernels
(
SSE_INSTR
)
=
ELPA_2STAGE_REAL_SSE_BLOCK2
simdTable_to_realKernels
(
AVX_INSTR
)
=
ELPA_2STAGE_REAL_AVX_BLOCK2
simdTable_to_realKernels
(
AVX2_INSTR
)
=
ELPA_2STAGE_REAL_AVX2_BLOCK2
simdTable_to_realKernels
(
AVX512_INSTR
)
=
ELPA_2STAGE_REAL_AVX512_BLOCK2
simdTable_to_realKernels
(
NVIDIA_INSTR
)
=
ELPA_2STAGE_REAL_NVIDIA_GPU
simdTable_to_realKernels
(
AMD_GPU_INSTR
)
=
ELPA_2STAGE_REAL_AMD_GPU
simdTable_to_realKernels
(
INTEL_GPU_INSTR
)
=
ELPA_2STAGE_REAL_INTEL_GPU
simdTable_to_realKernels
(
SPARC_INSTR
)
=
ELPA_2STAGE_REAL_SPARC64_BLOCK2
simdTable_to_realKernels
(
ARCH64_INSTR
)
=
ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK2
simdTable_to_realKernels
(
VSX_INSTR
)
=
ELPA_2STAGE_REAL_VSX_BLOCK2
simdTable_to_realKernels
(
SVE128_INSTR
)
=
ELPA_2STAGE_REAL_SVE128_BLOCK2
simdTable_to_realKernels
(
SVE256_INSTR
)
=
ELPA_2STAGE_REAL_SVE256_BLOCK2
simdTable_to_realKernels
(
SVE512_INSTR
)
=
ELPA_2STAGE_REAL_SVE512_BLOCK2
kernel
=
simdTable_to_realKernels
(
simd_set_index
)
...
...
@@ -164,6 +168,8 @@ module simd_kernel
complexKernels_to_simdTable
(
ELPA_2STAGE_COMPLEX_NEON_ARCH64_BLOCK1
)
=
ARCH64_INSTR
complexKernels_to_simdTable
(
ELPA_2STAGE_COMPLEX_NEON_ARCH64_BLOCK2
)
=
ARCH64_INSTR
complexKernels_to_simdTable
(
ELPA_2STAGE_COMPLEX_NVIDIA_GPU
)
=
NVIDIA_INSTR
complexKernels_to_simdTable
(
ELPA_2STAGE_COMPLEX_AMD_GPU
)
=
AMD_GPU_INSTR
complexKernels_to_simdTable
(
ELPA_2STAGE_COMPLEX_INTEL_GPU
)
=
INTEL_GPU_INSTR
simd_set_index
=
complexKernels_to_simdTable
(
kernel
)
...
...
@@ -177,17 +183,19 @@ module simd_kernel
integer
(
kind
=
c_int
)
::
kernel
integer
(
kind
=
c_int
),
intent
(
in
)
::
simd_set_index
simdTable_to_complexKernels
(
GENERIC_INSTR
)
=
ELPA_2STAGE_COMPLEX_GENERIC
simdTable_to_complexKernels
(
BLUEGENE_INSTR
)
=
ELPA_2STAGE_COMPLEX_BGP
simdTable_to_complexKernels
(
SSE_INSTR
)
=
ELPA_2STAGE_COMPLEX_SSE_BLOCK1
simdTable_to_complexKernels
(
AVX_INSTR
)
=
ELPA_2STAGE_COMPLEX_AVX_BLOCK1
simdTable_to_complexKernels
(
AVX2_INSTR
)
=
ELPA_2STAGE_COMPLEX_AVX2_BLOCK1
simdTable_to_complexKernels
(
AVX512_INSTR
)
=
ELPA_2STAGE_COMPLEX_AVX512_BLOCK1
simdTable_to_complexKernels
(
SVE128_INSTR
)
=
ELPA_2STAGE_COMPLEX_SVE128_BLOCK1
simdTable_to_complexKernels
(
SVE256_INSTR
)
=
ELPA_2STAGE_COMPLEX_SVE256_BLOCK1
simdTable_to_complexKernels
(
SVE512_INSTR
)
=
ELPA_2STAGE_COMPLEX_SVE512_BLOCK1
simdTable_to_complexKernels
(
ARCH64_INSTR
)
=
ELPA_2STAGE_COMPLEX_NEON_ARCH64_BLOCK1
simdTable_to_complexKernels
(
NVIDIA_INSTR
)
=
ELPA_2STAGE_COMPLEX_NVIDIA_GPU
simdTable_to_complexKernels
(
GENERIC_INSTR
)
=
ELPA_2STAGE_COMPLEX_GENERIC
simdTable_to_complexKernels
(
BLUEGENE_INSTR
)
=
ELPA_2STAGE_COMPLEX_BGP
simdTable_to_complexKernels
(
SSE_INSTR
)
=
ELPA_2STAGE_COMPLEX_SSE_BLOCK1
simdTable_to_complexKernels
(
AVX_INSTR
)
=
ELPA_2STAGE_COMPLEX_AVX_BLOCK1
simdTable_to_complexKernels
(
AVX2_INSTR
)
=
ELPA_2STAGE_COMPLEX_AVX2_BLOCK1
simdTable_to_complexKernels
(
AVX512_INSTR
)
=
ELPA_2STAGE_COMPLEX_AVX512_BLOCK1
simdTable_to_complexKernels
(
SVE128_INSTR
)
=
ELPA_2STAGE_COMPLEX_SVE128_BLOCK1
simdTable_to_complexKernels
(
SVE256_INSTR
)
=
ELPA_2STAGE_COMPLEX_SVE256_BLOCK1
simdTable_to_complexKernels
(
SVE512_INSTR
)
=
ELPA_2STAGE_COMPLEX_SVE512_BLOCK1
simdTable_to_complexKernels
(
ARCH64_INSTR
)
=
ELPA_2STAGE_COMPLEX_NEON_ARCH64_BLOCK1
simdTable_to_complexKernels
(
NVIDIA_INSTR
)
=
ELPA_2STAGE_COMPLEX_NVIDIA_GPU
simdTable_to_complexKernels
(
AMD_GPU_INSTR
)
=
ELPA_2STAGE_COMPLEX_AMD_GPU
simdTable_to_complexKernels
(
INTEL_GPU_INSTR
)
=
ELPA_2STAGE_COMPLEX_INTEL_GPU
kernel
=
simdTable_to_complexKernels
(
simd_set_index
)
...
...
test/C/test.c
View file @
b40c7d47
...
...
@@ -127,6 +127,14 @@
#define TEST_C_INT_MPI_TYPE int
#define C_INT_MPI_TYPE int
#endif
#define TEST_GPU 0
#if (TEST_NVIDIA_GPU == 1) || (TEST_AMD_GPU == 1) || (TEST_INTEL_GPU == 1)
#undef TEST_GPU
#define TEST_GPU 1
#endif
#include "test/shared/generated.h"
int
main
(
int
argc
,
char
**
argv
)
{
...
...
@@ -273,19 +281,18 @@ int main(int argc, char** argv) {
#endif
assert_elpa_ok
(
error_elpa
);
#if defined(TEST_NVIDIA_GPU)
elpa_set
(
handle
,
"nvidia-gpu"
,
TEST_NVIDIA_GPU
,
&
error_elpa
);
assert_elpa_ok
(
error_elpa
);
#else
elpa_set
(
handle
,
"nvidia-gpu"
,
0
,
&
error_elpa
);
#if TEST_NVIDIA_GPU == 1 || (TEST_NVIDIA_GPU == 0) && (TEST_AMD_GPU == 0)
elpa_set
(
handle
,
"nvidia-gpu"
,
TEST_GPU
,
&
error_elpa
);
assert_elpa_ok
(
error_elpa
);
#endif
#if defined(TEST_INTEL_GPU)
printf
(
"Setting INTEL GPU
\n
"
);
elpa_set
(
handle
,
"
intel
-gpu"
,
TEST_
INTEL_
GPU
,
&
error_elpa
);
#if TEST_AMD_GPU == 1
elpa_set
(
handle
,
"
amd
-gpu"
,
TEST_GPU
,
&
error_elpa
);
assert_elpa_ok
(
error_elpa
);
#else
elpa_set
(
handle
,
"intel-gpu"
,
0
,
&
error_elpa
);
#endif
#if TEST_INTEL_GPU == 1
elpa_set
(
handle
,
"intel-gpu"
,
TEST_GPU
,
&
error_elpa
);
assert_elpa_ok
(
error_elpa
);
#endif
...
...
test/Fortran/test.F90
View file @
b40c7d47
...
...
@@ -47,6 +47,7 @@
! Define one of TEST_SOLVER_1STAGE or TEST_SOLVER_2STAGE
! Define TEST_NVIDIA_GPU \in [0, 1]
! Define TEST_INTEL_GPU \in [0, 1]
! Define TEST_AMD_GPU \in [0, 1]
! Define either TEST_ALL_KERNELS or a TEST_KERNEL \in [any valid kernel]
#if !(defined(TEST_REAL) ^ defined(TEST_COMPLEX))
...
...
@@ -117,6 +118,13 @@ error: define either TEST_ALL_KERNELS or a valid TEST_KERNEL
#define TEST_INT_MPI_TYPE integer(kind=c_int32_t)
#define INT_MPI_TYPE c_int32_t
#endif
#define TEST_GPU 0
#if (TEST_NVIDIA_GPU == 1) || (TEST_AMD_GPU == 1) || (TEST_INTEL_GPU == 1)
#undef TEST_GPU
#define TEST_GPU 1
#endif
#include "assert.h"
program
test
...
...
@@ -281,7 +289,7 @@ program test
#if TEST_QR_DECOMPOSITION == 1
#if (TEST_NVIDIA_GPU == 1) || (TEST_INTEL_GPU == 1)
#if (TEST_NVIDIA_GPU == 1) || (TEST_INTEL_GPU == 1)
|| (TEST_AMD_GPU == 1)
#ifdef WITH_MPI
call
mpi_finalize
(
mpierr
)
#endif
...
...
@@ -641,23 +649,19 @@ program test
#endif
assert_elpa_ok
(
error_elpa
)
#if (TEST_NVIDIA_GPU == 1)
call
e
%
set
(
"nvidia-gpu"
,
TEST_NVIDIA_GPU
,
error_elpa
)
#else
call
e
%
set
(
"nvidia-gpu"
,
0
,
error_elpa
)
#endif
#if TEST_NVIDIA_GPU == 1 || (TEST_NVIDIA_GPU == 0) && (TEST_AMD_GPU == 0) && (TEST_INTEL_GPU == 0)
call
e
%
set
(
"nvidia-gpu"
,
TEST_GPU
,
error_elpa
)
assert_elpa_ok
(
error_elpa
)
#if (TEST_INTEL_GPU == 1)
call
e
%
set
(
"intel-gpu"
,
TEST_INTEL_GPU
,
error_elpa
)
#else
call
e
%
set
(
"intel-gpu"
,
0
,
error_elpa
)
#endif
#if TEST_AMD_GPU == 1
call
e
%
set
(
"amd-gpu"
,
TEST_GPU
,
error_elpa
)
assert_elpa_ok
(
error_elpa
)
#if (TEST_NVIDIA_GPU == 1)
stop
"AAAAAAAAAAAAAAA"
#endif
#if (TEST_INTEL_GPU == 1)
print
*
,
"Using intel gpu"
#if TEST_INTEL_GPU == 1
call
e
%
set
(
"intel-gpu"
,
TEST_GPU
,
error_elpa
)
assert_elpa_ok
(
error_elpa
)
#endif
#if (TEST_GPU_SET_ID == 1) && (TEST_INTEL_GPU == 0)
...
...
@@ -685,11 +689,15 @@ program test
#ifdef TEST_ALL_KERNELS
do
i
=
0
,
elpa_option_cardinality
(
KERNEL_KEY
)
! kernels
#if (TEST_NVIDIA_GPU == 0) && (TEST_INTEL_GPU == 0)
#if (TEST_NVIDIA_GPU == 0) && (TEST_INTEL_GPU == 0)
&& (TEST_AMD_GPU == 0)
!if (TEST_GPU .eq. 0) then
kernel
=
elpa_option_enumerate
(
KERNEL_KEY
,
int
(
i
,
kind
=
c_int
))
if
(
kernel
.eq.
ELPA_2STAGE_REAL_NVIDIA_GPU
)
continue
if
(
kernel
.eq.
ELPA_2STAGE_COMPLEX_NVIDIA_GPU
)
continue
if
(
kernel
.eq.
ELPA_2STAGE_REAL_AMD_GPU
)
continue
if
(
kernel
.eq.
ELPA_2STAGE_COMPLEX_AMD_GPU
)
continue
if
(
kernel
.eq.
ELPA_2STAGE_REAL_INTEL_GPU
)
continue
if
(
kernel
.eq.
ELPA_2STAGE_COMPLEX_INTEL_GPU
)
continue
!endif
#endif
#endif
...
...
@@ -701,31 +709,29 @@ program test
#ifdef TEST_SOLVER_2STAGE
#if TEST_NVIDIA_GPU == 1
#if defined TEST_REAL
#if (TEST_NVIDIA_GPU == 1)
kernel
=
ELPA_2STAGE_REAL_NVIDIA_GPU
#endif
#if defined TEST_COMPLEX
kernel
=
ELPA_2STAGE_COMPLEX_NVIDIA_GPU
#endif
#endif /* TEST_NVIDIA_GPU == 1 */
#if TEST_AMD_GPU == 1
#if defined TEST_REAL
#if (TEST_AMD_GPU == 1)
kernel
=
ELPA_2STAGE_REAL_AMD_GPU
#endif
#if
defined TEST_COMPLEX
kernel
=
ELPA_2STAGE_
COMPLEX_AMD
_GPU
#if
(TEST_INTEL_GPU == 1)
kernel
=
ELPA_2STAGE_
REAL_INTEL
_GPU
#endif
#endif /* TEST_
AMD_GPU == 1
*/
#endif /* TEST_
REAL
*/
#if TEST_INTEL_GPU == 1
#if defined TEST_REAL
!kernel = ELPA_2STAGE_REAL_INTEL_GPU
#endif
#if defined TEST_COMPLEX
!kernel = ELPA_2STAGE_COMPLEX_INTEL_GPU
#if (TEST_NVIDIA_GPU == 1)
kernel
=
ELPA_2STAGE_COMPLEX_NVIDIA_GPU
#endif
#if (TEST_AMD_GPU == 1)
kernel
=
ELPA_2STAGE_COMPLEX_AMD_GPU
#endif
#if (TEST_INTEL_GPU == 1)
kernel
=
ELPA_2STAGE_COMPLEX_INTEL_GPU
#endif
#endif /* TEST_COMPLEX */
#endif /* TEST_GPU == 1 */
call
e
%
set
(
KERNEL_KEY
,
kernel
,
error_elpa
)
...
...
test/Fortran/test_autotune.F90
View file @
b40c7d47
...
...
@@ -47,6 +47,7 @@
! Define one of TEST_SOLVER_1STAGE or TEST_SOLVER_2STAGE
! Define TEST_NVIDIA_GPU \in [0, 1]
! Define TEST_INTEL_GPU \in [0, 1]
! Define TEST_AMD_GPU \in [0, 1]
! Define either TEST_ALL_KERNELS or a TEST_KERNEL \in [any valid kernel]
#if !(defined(TEST_REAL) ^ defined(TEST_COMPLEX))
...
...
@@ -95,6 +96,16 @@ error: define exactly one of TEST_SINGLE or TEST_DOUBLE
#define TEST_INT_MPI_TYPE integer(kind=c_int32_t)
#define INT_MPI_TYPE c_int32_t
#endif
#define TEST_GPU 0
#if (TEST_NVIDIA_GPU == 1) || (TEST_AMD_GPU == 1)
#undef TEST_GPU
#define TEST_GPU 1
#endif
#include "assert.h"
program
test
...
...
@@ -229,10 +240,19 @@ program test
call
e
%
set
(
"debug"
,
1
,
error_elpa
)
assert_elpa_ok
(
error_elpa
)
#if TEST_NVIDIA_GPU == 1 || (TEST_NVIDIA_GPU == 0) && (TEST_AMD_GPU == 0) && (TEST_INTEL_GPU == 0)
call
e
%
set
(
"nvidia-gpu"
,
0
,
error_elpa
)
assert_elpa_ok
(
error_elpa
)
#endif
#if TEST_AMD_GPU == 1
call
e
%
set
(
"amd-gpu"
,
0
,
error_elpa
)
assert_elpa_ok
(
error_elpa
)
#endif
#if TEST_INTEL_GPU == 1
call
e
%
set
(
"intel-gpu"
,
0
,
error_elpa
)
assert_elpa_ok
(
error_elpa
)
#endif
!call e%set("max_stored_rows", 15, error_elpa)
assert_elpa_ok
(
e
%
setup
())
...
...
test/Fortran/test_multiple_objs.F90
View file @
b40c7d47
...
...
@@ -47,6 +47,7 @@
! Define one of TEST_SOLVER_1STAGE or TEST_SOLVER_2STAGE
! Define TEST_NVIDIA_GPU \in [0, 1]
! Define TEST_INTEL_GPU \in [0, 1]
! Define TEST_AMD_GPU \in [0, 1]
! Define either TEST_ALL_KERNELS or a TEST_KERNEL \in [any valid kernel]
#if !(defined(TEST_REAL) ^ defined(TEST_COMPLEX))
...
...
@@ -96,6 +97,11 @@ error: define exactly one of TEST_SINGLE or TEST_DOUBLE
#define INT_MPI_TYPE c_int32_t
#endif
#define TEST_GPU 0
#if (TEST_NVIDIA_GPU == 1) || (TEST_AMD_GPU == 1) || (TEST_INTEL_GPU == 1)
#undef TEST_GPU
#define TEST_GPU 1
#endif
#include "assert.h"
...
...
@@ -210,10 +216,20 @@ program test
call
e1
%
set
(
"debug"
,
1
,
error_elpa
)
assert_elpa_ok
(
error_elpa
)
call
e1
%
set
(
"nvidia-gpu"
,
0
,
error_elpa
)
#if TEST_NVIDIA_GPU == 1 || (TEST_NVIDIA_GPU == 0) && (TEST_AMD_GPU == 0) && (TEST_INTEL_GPU == 0)
call
e1
%
set
(
"nvidia-gpu"
,
TEST_GPU
,
error_elpa
)
assert_elpa_ok
(
error_elpa
)
#endif
#if TEST_AMD_GPU == 1
call
e1
%
set
(
"amd-gpu"
,
TEST_GPU
,
error_elpa
)
assert_elpa_ok
(
error_elpa
)
call
e1
%
set
(
"intel-gpu"
,
0
,
error_elpa
)
#endif
#if TEST_INTEL_GPU == 1
call
e1
%
set
(
"intel-gpu"
,
TEST_GPU
,
error_elpa
)
assert_elpa_ok
(
error_elpa
)
#endif
!call e1%set("max_stored_rows", 15, error_elpa)
assert_elpa_ok
(
e1
%
setup
())
...
...
@@ -241,10 +257,18 @@ program test
assert_elpa_ok
(
error_elpa
)
call
e2
%
get
(
"debug"
,
int
(
debug
,
kind
=
c_int
),
error_elpa
)
assert_elpa_ok
(
error_elpa
)
#if TEST_NVIDIA_GPU == 1 || (TEST_NVIDIA_GPU == 0) && (TEST_AMD_GPU == 0) && (TEST_INTEL_GPU == 0)
call
e2
%
get
(
"nvidia-gpu"
,
int
(
gpu
,
kind
=
c_int
),
error_elpa
)
assert_elpa_ok
(
error_elpa
)
#endif
#if TEST_AMD_GPU == 1
call
e2
%
get
(
"amd-gpu"
,
int
(
gpu
,
kind
=
c_int
),
error_elpa
)
assert_elpa_ok
(
error_elpa
)
#endif
#if TEST_INTEL_GPU == 1
call
e2
%
get
(
"intel-gpu"
,
int
(
gpu
,
kind
=
c_int
),
error_elpa
)
assert_elpa_ok
(
error_elpa
)
#endif
if
((
timings
.ne.
1
)
.or.
(
debug
.ne.
1
)
.or.
(
gpu
.ne.
0
))
then
print
*
,
"Parameters not stored or loaded correctly. Aborting..."
,
timings
,
debug
,
gpu
...
...
test/Fortran/test_skewsymmetric.F90
View file @
b40c7d47
...
...
@@ -47,6 +47,7 @@
! Define one of TEST_SOLVER_1STAGE or TEST_SOLVER_2STAGE
! Define TEST_NVIDIA_GPU \in [0, 1]
! Define TEST_INTEL_GPU \in [0, 1]
! Define TEST_AMD_GPU \in [0, 1]
! Define either TEST_ALL_KERNELS or a TEST_KERNEL \in [any valid kernel]
#if !(defined(TEST_REAL) ^ defined(TEST_COMPLEX))
...
...
@@ -96,6 +97,13 @@ error: define exactly one of TEST_SINGLE or TEST_DOUBLE
#define TEST_INT_MPI_TYPE integer(kind=c_int32_t)
#define INT_MPI_TYPE c_int32_t
#endif
#define TEST_GPU 0
#if (TEST_NVIDIA_GPU == 1) || (TEST_AMD_GPU == 1) || (TEST_INTEL_GPU == 1)
#undef TEST_GPU
#define TEST_GPU 1