Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
7329e8f8
Commit
7329e8f8
authored
Jul 06, 2017
by
Lorenz Huedepohl
Browse files
Make separate test programs for single kernel and loop
parent
06493879
Changes
5
Hide whitespace changes
Inline
Side-by-side
configure.ac
View file @
7329e8f8
...
...
@@ -483,7 +483,8 @@ m4_define(elpa_m4_bgq_kernels, [
])
m4_define(elpa_m4_gpu_kernels, [
gpu
real_gpu
complex_gpu
])
m4_define(elpa_m4_kernel_types, [generic sse sse_assembly avx avx2 avx512 bgp bgq gpu])
...
...
@@ -573,7 +574,8 @@ if test x"$with_gpu_support_only" = x"yes" ; then
m4_foreach_w([elpa_m4_kernel],elpa_m4_all_kernels,[
use_[]elpa_m4_kernel[]=no
])
use_gpu=yes
use_real_gpu=yes
use_complex_gpu=yes
fi
...
...
@@ -851,9 +853,10 @@ m4_foreach_w([elpa_m4_kernel],elpa_m4_all_kernels,[
])
AM_CONDITIONAL([WITH_GPU_VERSION],[test x"$use_gpu" = x"yes"])
if test x"$
{
use_gpu
}
" = x"yes" ; then
AM_CONDITIONAL([WITH_GPU_VERSION],[test x"$use_
real_gpu" = x"yes" -o x"$use_complex_
gpu" = x"yes"])
if test x"$use
_real_gpu" = x"yes" -o x"$use_complex
_gpu" = x"yes" ; then
AC_DEFINE([WITH_GPU_VERSION],[1],[enable GPU support])
AC_DEFINE([WITH_GPU_KERNEL],[1],[GPU kernel should be build])
ELPA_2STAGE_COMPLEX_GPU_COMPILED=1
ELPA_2STAGE_REAL_GPU_COMPILED=1
else
...
...
elpa/elpa_constants.h.in
View file @
7329e8f8
...
...
@@ -22,25 +22,25 @@ enum ELPA_SOLVERS {
#define ELPA_NUMBER_OF_SOLVERS (0 ELPA_FOR_ALL_SOLVERS(ELPA_ENUM_SUM))
/* Kernel constants */
#define ELPA_FOR_ALL_2STAGE_REAL_KERNELS(X) \
X(ELPA_2STAGE_REAL_GENERIC, 1, @ELPA_2STAGE_REAL_GENERIC_COMPILED@) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE, 2, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_COMPILED@) \
X(ELPA_2STAGE_REAL_BGP, 3, @ELPA_2STAGE_REAL_BGP_COMPILED@) \
X(ELPA_2STAGE_REAL_BGQ, 4, @ELPA_2STAGE_REAL_BGQ_COMPILED@) \
X(ELPA_2STAGE_REAL_SSE_ASSEMBLY, 5, @ELPA_2STAGE_REAL_SSE_ASSEMBLY_COMPILED@) \
X(ELPA_2STAGE_REAL_SSE_BLOCK2, 6, @ELPA_2STAGE_REAL_SSE_BLOCK2_COMPILED@) \
X(ELPA_2STAGE_REAL_SSE_BLOCK4, 7, @ELPA_2STAGE_REAL_SSE_BLOCK4_COMPILED@) \
X(ELPA_2STAGE_REAL_SSE_BLOCK6, 8, @ELPA_2STAGE_REAL_SSE_BLOCK6_COMPILED@) \
X(ELPA_2STAGE_REAL_AVX_BLOCK2, 9, @ELPA_2STAGE_REAL_AVX_BLOCK2_COMPILED@) \
X(ELPA_2STAGE_REAL_AVX_BLOCK4, 10, @ELPA_2STAGE_REAL_AVX_BLOCK4_COMPILED@) \
X(ELPA_2STAGE_REAL_AVX_BLOCK6, 11, @ELPA_2STAGE_REAL_AVX_BLOCK6_COMPILED@) \
X(ELPA_2STAGE_REAL_AVX2_BLOCK2, 12, @ELPA_2STAGE_REAL_AVX2_BLOCK2_COMPILED@) \
X(ELPA_2STAGE_REAL_AVX2_BLOCK4, 13, @ELPA_2STAGE_REAL_AVX2_BLOCK4_COMPILED@) \
X(ELPA_2STAGE_REAL_AVX2_BLOCK6, 14, @ELPA_2STAGE_REAL_AVX2_BLOCK6_COMPILED@) \
X(ELPA_2STAGE_REAL_AVX512_BLOCK2, 15, @ELPA_2STAGE_REAL_AVX512_BLOCK2_COMPILED@) \
X(ELPA_2STAGE_REAL_AVX512_BLOCK4, 16, @ELPA_2STAGE_REAL_AVX512_BLOCK4_COMPILED@) \
X(ELPA_2STAGE_REAL_AVX512_BLOCK6, 17, @ELPA_2STAGE_REAL_AVX512_BLOCK6_COMPILED@) \
X(ELPA_2STAGE_REAL_GPU, 18, @ELPA_2STAGE_REAL_GPU_COMPILED@)
#define ELPA_FOR_ALL_2STAGE_REAL_KERNELS(X
, ...
) \
X(ELPA_2STAGE_REAL_GENERIC, 1, @ELPA_2STAGE_REAL_GENERIC_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE, 2, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_REAL_BGP, 3, @ELPA_2STAGE_REAL_BGP_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_REAL_BGQ, 4, @ELPA_2STAGE_REAL_BGQ_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_REAL_SSE_ASSEMBLY, 5, @ELPA_2STAGE_REAL_SSE_ASSEMBLY_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_REAL_SSE_BLOCK2, 6, @ELPA_2STAGE_REAL_SSE_BLOCK2_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_REAL_SSE_BLOCK4, 7, @ELPA_2STAGE_REAL_SSE_BLOCK4_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_REAL_SSE_BLOCK6, 8, @ELPA_2STAGE_REAL_SSE_BLOCK6_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_REAL_AVX_BLOCK2, 9, @ELPA_2STAGE_REAL_AVX_BLOCK2_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_REAL_AVX_BLOCK4, 10, @ELPA_2STAGE_REAL_AVX_BLOCK4_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_REAL_AVX_BLOCK6, 11, @ELPA_2STAGE_REAL_AVX_BLOCK6_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_REAL_AVX2_BLOCK2, 12, @ELPA_2STAGE_REAL_AVX2_BLOCK2_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_REAL_AVX2_BLOCK4, 13, @ELPA_2STAGE_REAL_AVX2_BLOCK4_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_REAL_AVX2_BLOCK6, 14, @ELPA_2STAGE_REAL_AVX2_BLOCK6_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_REAL_AVX512_BLOCK2, 15, @ELPA_2STAGE_REAL_AVX512_BLOCK2_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_REAL_AVX512_BLOCK4, 16, @ELPA_2STAGE_REAL_AVX512_BLOCK4_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_REAL_AVX512_BLOCK6, 17, @ELPA_2STAGE_REAL_AVX512_BLOCK6_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_REAL_GPU, 18, @ELPA_2STAGE_REAL_GPU_COMPILED@
, __VA_ARGS__
)
#define ELPA_FOR_ALL_2STAGE_REAL_KERNELS_AND_DEFAULT(X) \
ELPA_FOR_ALL_2STAGE_REAL_KERNELS(X) \
...
...
@@ -52,21 +52,21 @@ enum ELPA_REAL_KERNELS {
};
#define ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(X) \
X(ELPA_2STAGE_COMPLEX_GENERIC, 1, @ELPA_2STAGE_COMPLEX_GENERIC_COMPILED@) \
X(ELPA_2STAGE_COMPLEX_GENERIC_SIMPLE, 2, @ELPA_2STAGE_COMPLEX_GENERIC_SIMPLE_COMPILED@) \
X(ELPA_2STAGE_COMPLEX_BGP, 3, @ELPA_2STAGE_COMPLEX_BGP_COMPILED@) \
X(ELPA_2STAGE_COMPLEX_BGQ, 4, @ELPA_2STAGE_COMPLEX_BGQ_COMPILED@) \
X(ELPA_2STAGE_COMPLEX_SSE_ASSEMBLY, 5, @ELPA_2STAGE_COMPLEX_SSE_ASSEMBLY_COMPILED@) \
X(ELPA_2STAGE_COMPLEX_SSE_BLOCK1, 6, @ELPA_2STAGE_COMPLEX_SSE_BLOCK1_COMPILED@) \
X(ELPA_2STAGE_COMPLEX_SSE_BLOCK2, 7, @ELPA_2STAGE_COMPLEX_SSE_BLOCK2_COMPILED@) \
X(ELPA_2STAGE_COMPLEX_AVX_BLOCK1, 8, @ELPA_2STAGE_COMPLEX_AVX_BLOCK1_COMPILED@) \
X(ELPA_2STAGE_COMPLEX_AVX_BLOCK2, 9, @ELPA_2STAGE_COMPLEX_AVX_BLOCK2_COMPILED@) \
X(ELPA_2STAGE_COMPLEX_AVX2_BLOCK1, 10, @ELPA_2STAGE_COMPLEX_AVX2_BLOCK1_COMPILED@) \
X(ELPA_2STAGE_COMPLEX_AVX2_BLOCK2, 11, @ELPA_2STAGE_COMPLEX_AVX2_BLOCK2_COMPILED@) \
X(ELPA_2STAGE_COMPLEX_AVX512_BLOCK1, 12, @ELPA_2STAGE_COMPLEX_AVX512_BLOCK1_COMPILED@) \
X(ELPA_2STAGE_COMPLEX_AVX512_BLOCK2, 13, @ELPA_2STAGE_COMPLEX_AVX512_BLOCK2_COMPILED@) \
X(ELPA_2STAGE_COMPLEX_GPU, 14, @ELPA_2STAGE_COMPLEX_GPU_COMPILED@)
#define ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(X
, ...
) \
X(ELPA_2STAGE_COMPLEX_GENERIC, 1, @ELPA_2STAGE_COMPLEX_GENERIC_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_COMPLEX_GENERIC_SIMPLE, 2, @ELPA_2STAGE_COMPLEX_GENERIC_SIMPLE_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_COMPLEX_BGP, 3, @ELPA_2STAGE_COMPLEX_BGP_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_COMPLEX_BGQ, 4, @ELPA_2STAGE_COMPLEX_BGQ_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_COMPLEX_SSE_ASSEMBLY, 5, @ELPA_2STAGE_COMPLEX_SSE_ASSEMBLY_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_COMPLEX_SSE_BLOCK1, 6, @ELPA_2STAGE_COMPLEX_SSE_BLOCK1_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_COMPLEX_SSE_BLOCK2, 7, @ELPA_2STAGE_COMPLEX_SSE_BLOCK2_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_COMPLEX_AVX_BLOCK1, 8, @ELPA_2STAGE_COMPLEX_AVX_BLOCK1_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_COMPLEX_AVX_BLOCK2, 9, @ELPA_2STAGE_COMPLEX_AVX_BLOCK2_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_COMPLEX_AVX2_BLOCK1, 10, @ELPA_2STAGE_COMPLEX_AVX2_BLOCK1_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_COMPLEX_AVX2_BLOCK2, 11, @ELPA_2STAGE_COMPLEX_AVX2_BLOCK2_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_COMPLEX_AVX512_BLOCK1, 12, @ELPA_2STAGE_COMPLEX_AVX512_BLOCK1_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_COMPLEX_AVX512_BLOCK2, 13, @ELPA_2STAGE_COMPLEX_AVX512_BLOCK2_COMPILED@
, __VA_ARGS__
) \
X(ELPA_2STAGE_COMPLEX_GPU, 14, @ELPA_2STAGE_COMPLEX_GPU_COMPILED@
, __VA_ARGS__
)
#define ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS_AND_DEFAULT(X) \
ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(X) \
...
...
generate_automake_test_programs.py
View file @
7329e8f8
...
...
@@ -23,35 +23,38 @@ for g, p, d, s in product(sorted(gpu_flag.keys()),
sorted
(
prec_flag
.
keys
()),
sorted
(
domain_flag
.
keys
()),
sorted
(
solver_flag
.
keys
())):
endifs
=
0
extra_flags
=
[]
if
(
g
==
1
):
print
(
"if WITH_GPU_VERSION"
)
endifs
+=
1
if
s
==
"2stage"
:
extra_flags
.
append
(
"-DTEST_KERNEL=ELPA_2STAGE_{0}_GPU"
.
format
(
d
.
upper
()))
elif
s
==
"2stage"
:
extra_flags
.
append
(
"-DTEST_ALL_KERNELS"
)
if
(
p
==
"single"
):
if
(
d
==
"real"
):
print
(
"if WANT_SINGLE_PRECISION_REAL"
)
elif
(
d
==
"complex"
):
print
(
"if WANT_SINGLE_PRECISION_COMPLEX"
)
else
:
raise
Exception
(
"Oh no!"
)
endifs
+=
1
for
kernel
in
[
"all_kernels"
,
"default_kernel"
]
if
s
==
"2stage"
else
[
"nokernel"
]:
endifs
=
0
extra_flags
=
[]
if
(
g
==
1
):
print
(
"if WITH_GPU_VERSION"
)
endifs
+=
1
name
=
"test_{0}_{1}_{2}{3}"
.
format
(
d
,
p
,
s
,
"_gpu"
if
g
else
""
)
print
(
"noinst_PROGRAMS += "
+
name
)
print
(
"check_SCRIPTS += "
+
name
+
".sh"
)
print
(
name
+
"_SOURCES = test/Fortran/test.F90"
)
print
(
name
+
"_LDADD = $(build_lib)"
)
print
(
name
+
"_FCFLAGS = $(AM_FCFLAGS) $(FC_MODINC)test_modules $(FC_MODINC)modules
\\
"
)
print
(
" "
+
"
\\\n
"
.
join
([
domain_flag
[
d
],
prec_flag
[
p
],
solver_flag
[
s
],
gpu_flag
[
g
]]
+
extra_flags
))
if
kernel
==
"default_kernel"
:
extra_flags
.
append
(
"-DTEST_KERNEL=ELPA_2STAGE_{0}_DEFAULT"
.
format
(
d
.
upper
()))
elif
kernel
==
"all_kernels"
:
extra_flags
.
append
(
"-DTEST_ALL_KERNELS"
)
print
(
"endif
\n
"
*
endifs
)
if
(
p
==
"single"
):
if
(
d
==
"real"
):
print
(
"if WANT_SINGLE_PRECISION_REAL"
)
elif
(
d
==
"complex"
):
print
(
"if WANT_SINGLE_PRECISION_COMPLEX"
)
else
:
raise
Exception
(
"Oh no!"
)
endifs
+=
1
name
=
"test_{0}_{1}_{2}{3}{4}"
.
format
(
d
,
p
,
s
,
""
if
kernel
==
"nokernel"
else
"_"
+
kernel
,
"_gpu"
if
g
else
""
)
print
(
"noinst_PROGRAMS += "
+
name
)
print
(
"check_SCRIPTS += "
+
name
+
".sh"
)
print
(
name
+
"_SOURCES = test/Fortran/test.F90"
)
print
(
name
+
"_LDADD = $(build_lib)"
)
print
(
name
+
"_FCFLAGS = $(AM_FCFLAGS) $(FC_MODINC)test_modules $(FC_MODINC)modules
\\
"
)
print
(
" "
+
"
\\\n
"
.
join
([
domain_flag
[
d
],
prec_flag
[
p
],
solver_flag
[
s
],
gpu_flag
[
g
]]
+
extra_flags
))
print
(
"endif
\n
"
*
endifs
)
src/elpa_index.c
View file @
7329e8f8
...
...
@@ -493,9 +493,9 @@ static int enumerate_identity(int i) {
case value: \
return 1;
#define VALID_CASE_3(name, value, available) \
#define VALID_CASE_3(name, value, available
, other_checks
) \
case value: \
return available;
return available
&& (other_checks(value))
;
static
const
char
*
elpa_solver_name
(
int
solver
)
{
switch
(
solver
)
{
...
...
@@ -562,9 +562,13 @@ static const char *real_kernel_name(int kernel) {
}
}
#define REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
kernel_number == ELPA_2STAGE_REAL_GPU ? gpu_is_active : 1
static
int
real_kernel_is_valid
(
elpa_index_t
index
,
int
n
,
int
new_value
)
{
int
gpu_is_active
=
elpa_index_get_int_value
(
index
,
"gpu"
,
NULL
);
switch
(
new_value
)
{
ELPA_FOR_ALL_2STAGE_REAL_KERNELS
(
VALID_CASE_3
)
ELPA_FOR_ALL_2STAGE_REAL_KERNELS
(
VALID_CASE_3
,
REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE
)
default:
return
0
;
}
...
...
@@ -593,9 +597,13 @@ static const char *complex_kernel_name(int kernel) {
}
}
#define COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
kernel_number == ELPA_2STAGE_COMPLEX_GPU ? gpu_is_active : 1
static
int
complex_kernel_is_valid
(
elpa_index_t
index
,
int
n
,
int
new_value
)
{
int
gpu_is_active
=
elpa_index_get_int_value
(
index
,
"gpu"
,
NULL
);
switch
(
new_value
)
{
ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS
(
VALID_CASE_3
)
ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS
(
VALID_CASE_3
,
COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE
)
default:
return
0
;
}
...
...
test/Fortran/test.F90
View file @
7329e8f8
...
...
@@ -210,13 +210,6 @@ program test
#ifdef TEST_ALL_KERNELS
do
i
=
0
,
elpa_option_cardinality
(
KERNEL_KEY
)
kernel
=
elpa_option_enumerate
(
KERNEL_KEY
,
i
)
#ifdef TEST_REAL
if
(
kernel
.eq.
ELPA_2STAGE_REAL_GPU
)
then
#else
if
(
kernel
.eq.
ELPA_2STAGE_COMPLEX_GPU
)
then
#endif
call
e
%
set
(
"gpu"
,
1
)
endif
#endif /* TEST_ALL_KERNELS */
#ifdef TEST_KERNEL
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment