Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
elpa
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
14
Issues
14
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Environments
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
elpa
elpa
Commits
603ffc20
Commit
603ffc20
authored
Apr 17, 2018
by
Andreas Marek
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Some fixes for K-computer
parent
8666cdd0
Changes
10
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
378 additions
and
33 deletions
+378
-33
Makefile.am
Makefile.am
+8
-2
autogen.sh
autogen.sh
+2
-1
configure.ac
configure.ac
+102
-14
generate_automake_C_test_programs.py
generate_automake_C_test_programs.py
+3
-1
generate_automake_Fortran_test_programs.py
generate_automake_Fortran_test_programs.py
+233
-0
src/elpa.F90
src/elpa.F90
+4
-1
src/elpa_api.F90
src/elpa_api.F90
+8
-6
src/elpa_autotune_impl.F90
src/elpa_autotune_impl.F90
+2
-2
src/elpa_impl.F90
src/elpa_impl.F90
+8
-5
src/elpa_impl_math_template.F90
src/elpa_impl_math_template.F90
+8
-1
No files found.
Makefile.am
View file @
603ffc20
...
...
@@ -61,9 +61,11 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/elpa1/elpa1.F90
\
src/elpa2/elpa2.F90
\
src/helpers/matrix_plot.F90
\
src/elpa_c_interface.c
\
src/elpa_index.c
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa_c_interface.c
# elpa_utilities.F90 is private in new API, public in legacy
if
ENABLE_LEGACY
libelpa@SUFFIX@
_public_la_SOURCES
+=
\
...
...
@@ -554,7 +556,11 @@ elpa2_print_kernels@SUFFIX@_SOURCES = src/elpa2/elpa2_print_kernels.F90
elpa2_print_kernels@SUFFIX@
_LDADD
=
libelpa@SUFFIX@.la
elpa2_print_kernels@SUFFIX@
_FCFLAGS
=
$(AM_FCFLAGS)
$(FC_MODINC)
modules
include
test_programs.am
include
Fortran_test_programs.am
if
ENABLE_C_TESTS
include
C_test_programs.am
endif
if
ENABLE_LEGACY
include
legacy_test_programs.am
...
...
autogen.sh
View file @
603ffc20
...
...
@@ -5,5 +5,6 @@ mkdir -p m4/
test
-n
"
$srcdir
"
||
srcdir
=
`
dirname
"
$0
"
`
test
-n
"
$srcdir
"
||
srcdir
=
.
$srcdir
/generate_automake_test_programs.py
>
$srcdir
/test_programs.am
$srcdir
/generate_automake_Fortran_test_programs.py
>
$srcdir
/Fortran_test_programs.am
$srcdir
/generate_automake_C_test_programs.py
>
$srcdir
/C_test_programs.am
autoreconf
--force
--install
--verbose
"
$srcdir
"
configure.ac
View file @
603ffc20
...
...
@@ -40,12 +40,24 @@ if test x$_cv_gnu_make_command = x ; then
AC_MSG_ERROR([Need GNU Make])
fi
AC_ARG_ENABLE([legacy],
AS_HELP_STRING([--disable-legacy],
AC_MSG_CHECKING(whether legacy interface should be provided)
AC_ARG_ENABLE([legacy-interface],
AS_HELP_STRING([--disable-legacy-interface],
[do not build legacy API, default yes]),
[],
[
if test x"$enableval" = x"yes"; then
enable_legayc=yes
else
enable_legacy=no
fi
],
[enable_legacy=yes])
AC_MSG_RESULT([${enable_legacy}])
AM_CONDITIONAL([ENABLE_LEGACY],[test x"$enable_legacy" = x"yes"])
if test x"${enable_legacy}" = x"yes"; then
AC_DEFINE([ENABLE_LEGACY], [1], [enable legacy interface])
fi
# gnu-make fortran module dependencies
m4_include([fdep/fortran_dependencies.m4])
...
...
@@ -58,7 +70,13 @@ AC_MSG_CHECKING(whether --enable-openmp is specified)
AC_ARG_ENABLE([openmp],
AS_HELP_STRING([--enable-openmp],
[use OpenMP threading, default no.]),
[],
[
if test x"$enableval" = x"yes"; then
enable_openmp=yes
else
enable_openmp=no
fi
],
[enable_openmp=no])
AC_MSG_RESULT([${enable_openmp}])
AM_CONDITIONAL([WITH_OPENMP],[test x"$enable_openmp" = x"yes"])
...
...
@@ -80,7 +98,13 @@ AC_MSG_CHECKING(whether --enable-scalapack-tests is specified)
AC_ARG_ENABLE([scalapack-tests],
AS_HELP_STRING([--enable-scalapack-tests],
[build SCALAPACK test cases for performance comparison, needs MPI, default no.]),
[],
[
if test x"$enableval" = x"yes"; then
enable_scalapack_tests=yes
else
enable_scalapack_tests=no
fi
],
[enable_scalapack_tests="no"])
AC_MSG_RESULT([$enable_scalapack_tests])
if test x"${enable_scalapack_tests}" = x"yes"; then
...
...
@@ -163,7 +187,13 @@ AC_MSG_CHECKING(whether stdout/stderr file redirect should be enabled)
AC_ARG_ENABLE([redirect],
[AS_HELP_STRING([--enable-redirect],
[for test programs, allow redirection of stdout/stderr per MPI taks in a file (useful for timing), default no.])],
[],
[
if test x"$enableval" = x"yes"; then
enable_redirect=yes
else
enable_redirect=no
fi
],
[enable_redirect=no])
AC_MSG_RESULT([${enable_redirect}])
...
...
@@ -188,7 +218,13 @@ dnl build with ftimings support
AC_ARG_ENABLE([timings],
[AS_HELP_STRING([--disable-timings],
[more detailed timing, default yes])],
[],
[
if test x"$enableval" = x"yes"; then
enable_timings=yes
else
enable_timings=no
fi
],
[enable_timings=yes])
if test x"${enable_timings}" = x"yes"; then
...
...
@@ -201,7 +237,13 @@ AC_LANG_PUSH([C])
AC_ARG_WITH([papi],
[AS_HELP_STRING([--with-papi],
[Use PAPI to also measure flop count in the detailed timing (--enable-timing), disabled by default])],
[],
[
if test x"$enableval" = x"yes"; then
with_papi=yes
else
with_papi=no
fi
],
[with_papi="no"])
if test x"${enable_timings}" = x"yes"; then
if test x"$with_papi" = x"yes" ; then
...
...
@@ -303,6 +345,7 @@ else
LIBS="-l${lib} ${old_LIBS}"
AC_MSG_CHECKING([whether -l${lib} already contains a BLACS implementation])
AC_LINK_IFELSE([AC_LANG_FUNC_LINK_TRY([blacs_gridinit])],[blacs_in_scalapack=yes],[blacs_in_scalapack=no])
AC_MSG_RESULT([${blacs_in_scalapack}])
if test x"${blacs_in_scalapack}" = x"yes"; then
break
...
...
@@ -386,7 +429,13 @@ dnl check whether BAND_TO_FULL_BLOCKING is set
AC_MSG_CHECKING(whether BAND_TO_FLULL_BLOCKING is requested)
AC_ARG_ENABLE(band-to-full-blocking,[AS_HELP_STRING([--disable-band-to-full-blocking],
[build ELPA2 with blocking in band_to_full (default: enabled)])],
[],
[
if test x"$enableval" = x"yes"; then
enable_band_to_full_blocking=yes
else
enable_band_to_full_blocking=no
fi
],
[enable_band_to_full_blocking="yes"])
AC_MSG_RESULT([${enable_band_to_full_blocking}])
...
...
@@ -430,7 +479,13 @@ if test x"${with_mpi}" = x"yes" ; then
AC_ARG_ENABLE([mpi-module],
AS_HELP_STRING([--disable-mpi-module],
[do not use the Fortran MPI module, get interfaces by 'include "mpif.h')]),
[],
[
if test x"$enableval" = x"yes"; then
enable_mpi_module=yes
else
enable_mpi_module=no
fi
],
[enable_mpi_module=yes])
if test x"${enable_mpi_module}" = x"yes" ; then
AC_MSG_CHECKING(whether Fortran mpi module can be used)
...
...
@@ -1023,7 +1078,6 @@ DX_MAN_FEATURE(ON)
DX_HTML_FEATURE(ON)
DX_INIT_DOXYGEN([ELPA], [Doxyfile], [docs])
USE_ASSUMED_SIZE=yes
AC_MSG_CHECKING(whether assumed size Fortran arrays should be used)
AC_ARG_ENABLE([assumed-size],
AS_HELP_STRING([--disable-assumed-size],
...
...
@@ -1040,7 +1094,6 @@ if test x"${USE_ASSUMED_SIZE}" = x"yes" ; then
AC_DEFINE([USE_ASSUMED_SIZE],[1],[for performance reasons use assumed size Fortran arrays, even if not debuggable])
fi
enable_fortran2008_features=yes
AC_MSG_CHECKING(whether Fortran2008 features should be enabled)
AC_ARG_ENABLE([Fortran2008-features],
AS_HELP_STRING([--enable-Fortran2008-features],
...
...
@@ -1052,14 +1105,49 @@ AC_ARG_ENABLE([Fortran2008-features],
enable_fortran2008_features=no
fi
],
[])
[
enable_fortran2008_features=yes
])
AC_MSG_RESULT([${enable_fortran2008_features}])
AM_CONDITIONAL([USE_FORTRAN2008],[test x"$enable_fortran2008_features" = x"yes"])
if test x"${enable_fortran2008_features}" = x"yes"; then
AC_DEFINE([USE_FORTRAN2008], [1], [use some Fortran 2008 features])
fi
enable_kcomputer=no
AC_MSG_CHECKING(whether autotuning functionality should be enabled)
AC_ARG_ENABLE([autotuning],
AS_HELP_STRING([--enable-autotuning],
[enables autotuning functionality, default yes.]),
[
if test x"$enableval" = x"yes"; then
enable_autotuning=yes
else
enable_autotuning=no
fi
],
[enable_autotuning=yes])
AC_MSG_RESULT([${enable_autotuning}])
AM_CONDITIONAL([ENABLE_AUTOTUNING],[test x"$enable_autotuning" = x"yes"])
if test x"${enable_autotuning}" = x"yes"; then
AC_DEFINE([ENABLE_AUTOTUNING], [1], [enable autotuning functionality])
fi
AC_MSG_CHECKING(whether C tests should be provided)
AC_ARG_ENABLE([c-tests],
AS_HELP_STRING([--enable-c-tests],
[enables the C tests for elpa, default yes.]),
[
if test x"$enableval" = x"yes"; then
enable_c_tests=yes
else
enable_c_tests=no
fi
],
[enable_c_tests=yes])
AC_MSG_RESULT([${enable_c_tests}])
AM_CONDITIONAL([ENABLE_C_TESTS],[test x"$enable_c_tests" = x"yes"])
if test x"${enable_c_tests}" = x"yes"; then
AC_DEFINE([ENABLE_C_TESTS], [1], [enable C tests])
fi
AC_MSG_CHECKING(whether we build for K-computer)
AC_ARG_ENABLE([K-computer],
AS_HELP_STRING([--enable-K-computer],
...
...
generate_automake_test_programs.py
→
generate_automake_
C_
test_programs.py
View file @
603ffc20
...
...
@@ -3,7 +3,6 @@ from __future__ import print_function
from
itertools
import
product
language_flag
=
{
"Fortran"
:
""
,
"C"
:
"_c_version"
,
}
...
...
@@ -213,6 +212,7 @@ for lang, p, d in product(sorted(language_flag.keys()), sorted(prec_flag.keys())
name
=
"test_autotune{langsuffix}_{d}_{p}"
.
format
(
langsuffix
=
language_flag
[
lang
],
d
=
d
,
p
=
p
)
print
(
"if ENABLE_AUTOTUNING"
)
print
(
"check_SCRIPTS += "
+
name
+
"_extended.sh"
)
print
(
"noinst_PROGRAMS += "
+
name
)
if
lang
==
"Fortran"
:
...
...
@@ -229,3 +229,5 @@ for lang, p, d in product(sorted(language_flag.keys()), sorted(prec_flag.keys())
domain_flag
[
d
],
prec_flag
[
p
]]))
print
(
"endif
\n
"
*
endifs
)
print
(
"endif"
)
generate_automake_Fortran_test_programs.py
0 → 100755
View file @
603ffc20
#!/usr/bin/env python
from
__future__
import
print_function
from
itertools
import
product
language_flag
=
{
"Fortran"
:
""
,
}
domain_flag
=
{
"real"
:
"-DTEST_REAL"
,
"complex"
:
"-DTEST_COMPLEX"
,
}
prec_flag
=
{
"double"
:
"-DTEST_DOUBLE"
,
"single"
:
"-DTEST_SINGLE"
,
}
solver_flag
=
{
"1stage"
:
"-DTEST_SOLVER_1STAGE"
,
"2stage"
:
"-DTEST_SOLVER_2STAGE"
,
"scalapack_all"
:
"-DTEST_SCALAPACK_ALL"
,
"scalapack_part"
:
"-DTEST_SCALAPACK_PART"
,
}
gpu_flag
=
{
0
:
"-DTEST_GPU=0"
,
1
:
"-DTEST_GPU=1"
,
}
matrix_flag
=
{
"random"
:
"-DTEST_MATRIX_RANDOM"
,
"analytic"
:
"-DTEST_MATRIX_ANALYTIC"
,
"toeplitz"
:
"-DTEST_MATRIX_TOEPLITZ"
,
"frank"
:
"-DTEST_MATRIX_FRANK"
,
}
qr_flag
=
{
0
:
"-DTEST_QR_DECOMPOSITION=0"
,
1
:
"-DTEST_QR_DECOMPOSITION=1"
,
}
test_type_flag
=
{
"eigenvectors"
:
"-DTEST_EIGENVECTORS"
,
"eigenvalues"
:
"-DTEST_EIGENVALUES"
,
"solve_tridiagonal"
:
"-DTEST_SOLVE_TRIDIAGONAL"
,
"cholesky"
:
"-DTEST_CHOLESKY"
,
"hermitian_multiply"
:
"-DTEST_HERMITIAN_MULTIPLY"
,
"generalized"
:
"-DTEST_GENERALIZED_EIGENPROBLEM"
,
"generalized_decomp"
:
"-DTEST_GENERALIZED_DECOMP_EIGENPROBLEM"
,
}
layout_flag
=
{
"all_layouts"
:
"-DTEST_ALL_LAYOUTS"
,
"square"
:
""
}
for
lang
,
m
,
g
,
q
,
t
,
p
,
d
,
s
,
lay
in
product
(
sorted
(
language_flag
.
keys
()),
sorted
(
matrix_flag
.
keys
()),
sorted
(
gpu_flag
.
keys
()),
sorted
(
qr_flag
.
keys
()),
sorted
(
test_type_flag
.
keys
()),
sorted
(
prec_flag
.
keys
()),
sorted
(
domain_flag
.
keys
()),
sorted
(
solver_flag
.
keys
()),
sorted
(
layout_flag
.
keys
())):
if
lang
==
"C"
and
(
m
==
"analytic"
or
m
==
"toeplitz"
or
m
==
"frank"
or
lay
==
"all_layouts"
):
continue
# not implemented in the test.c file yet
if
lang
==
"C"
and
(
t
==
"cholesky"
or
t
==
"hermitian_multiply"
or
q
==
1
):
continue
# exclude some test combinations
# analytic tests only for "eigenvectors" and not on GPU
if
(
m
==
"analytic"
and
(
g
==
1
or
t
!=
"eigenvectors"
)):
continue
# Frank tests only for "eigenvectors" and eigenvalues and real double precision case
if
(
m
==
"frank"
and
((
t
!=
"eigenvectors"
or
t
!=
"eigenvalues"
)
and
(
d
!=
"real"
or
p
!=
"double"
))):
continue
if
(
s
in
[
"scalapack_all"
,
"scalapack_part"
]
and
(
g
==
1
or
t
!=
"eigenvectors"
or
m
!=
"analytic"
)):
continue
# do not test single-precision scalapack
if
(
s
in
[
"scalapack_all"
,
"scalapack_part"
]
and
(
p
==
"single"
)):
continue
# solve tridiagonal only for real toeplitz matrix in 1stage
if
(
t
==
"solve_tridiagonal"
and
(
s
!=
"1stage"
or
d
!=
"real"
or
m
!=
"toeplitz"
)):
continue
# solve generalized only for random matrix in 1stage
if
(
t
==
"generalized"
and
(
m
!=
"random"
or
s
==
"2stage"
)):
continue
# solve generalized already decomposed only for random matrix in 1stage
# maybe this test should be further restricted, maybe not so important...
if
(
t
==
"generalized_decomp"
and
(
m
!=
"random"
or
s
==
"2stage"
)):
continue
# cholesky tests only 1stage and teoplitz or random matrix
if
(
t
==
"cholesky"
and
((
not
(
m
==
"toeplitz"
or
m
==
"random"
))
or
s
==
"2stage"
)):
continue
if
(
t
==
"eigenvalues"
and
(
m
==
"random"
)):
continue
if
(
t
==
"hermitian_multiply"
and
(
s
==
"2stage"
)):
continue
if
(
t
==
"hermitian_multiply"
and
(
m
==
"toeplitz"
)):
continue
# qr only for 2stage real
if
(
q
==
1
and
(
s
!=
"2stage"
or
d
!=
"real"
or
t
!=
"eigenvectors"
or
g
==
1
or
m
!=
"random"
)):
continue
for
kernel
in
[
"all_kernels"
,
"default_kernel"
]
if
s
==
"2stage"
else
[
"nokernel"
]:
endifs
=
0
extra_flags
=
[]
if
(
t
==
"eigenvalues"
and
kernel
==
"all_kernels"
):
continue
if
(
lang
==
"C"
and
kernel
==
"all_kernels"
):
continue
if
(
g
==
1
):
print
(
"if WITH_GPU_VERSION"
)
endifs
+=
1
if
(
lay
==
"all_layouts"
):
print
(
"if WITH_MPI"
)
endifs
+=
1
if
(
s
in
[
"scalapack_all"
,
"scalapack_part"
]):
print
(
"if WITH_SCALAPACK_TESTS"
)
endifs
+=
1
if
kernel
==
"default_kernel"
:
extra_flags
.
append
(
"-DTEST_KERNEL=ELPA_2STAGE_{0}_DEFAULT"
.
format
(
d
.
upper
()))
elif
kernel
==
"all_kernels"
:
extra_flags
.
append
(
"-DTEST_ALL_KERNELS"
)
if
layout_flag
[
lay
]:
extra_flags
.
append
(
layout_flag
[
lay
])
if
(
p
==
"single"
):
if
(
d
==
"real"
):
print
(
"if WANT_SINGLE_PRECISION_REAL"
)
elif
(
d
==
"complex"
):
print
(
"if WANT_SINGLE_PRECISION_COMPLEX"
)
else
:
raise
Exception
(
"Oh no!"
)
endifs
+=
1
name
=
"test{langsuffix}_{d}_{p}_{t}_{s}{kernelsuffix}_{gpusuffix}{qrsuffix}{m}{layoutsuffix}"
.
format
(
langsuffix
=
language_flag
[
lang
],
d
=
d
,
p
=
p
,
t
=
t
,
s
=
s
,
kernelsuffix
=
""
if
kernel
==
"nokernel"
else
"_"
+
kernel
,
gpusuffix
=
"gpu_"
if
g
else
""
,
qrsuffix
=
"qr_"
if
q
else
""
,
m
=
m
,
layoutsuffix
=
"_all_layouts"
if
lay
==
"all_layouts"
else
""
)
print
(
"if BUILD_KCOMPUTER"
)
print
(
"bin_PROGRAMS += "
+
name
)
print
(
"else"
)
print
(
"noinst_PROGRAMS += "
+
name
)
print
(
"endif"
)
if
lay
==
"square"
:
print
(
"check_SCRIPTS += "
+
name
+
"_default.sh"
)
elif
lay
==
"all_layouts"
:
print
(
"check_SCRIPTS += "
+
name
+
"_extended.sh"
)
else
:
raise
Exception
(
"Unknown layout {0}"
.
format
(
lay
))
if
lang
==
"Fortran"
:
print
(
name
+
"_SOURCES = test/Fortran/test.F90"
)
print
(
name
+
"_LDADD = $(test_program_ldadd)"
)
print
(
name
+
"_FCFLAGS = $(test_program_fcflags)
\\
"
)
elif
lang
==
"C"
:
print
(
name
+
"_SOURCES = test/C/test.c"
)
print
(
name
+
"_LDADD = $(test_program_ldadd) $(FCLIBS)"
)
print
(
name
+
"_CFLAGS = $(test_program_cflags)
\\
"
)
print
(
" -DTEST_CASE=
\\\"
{0}
\\\"
\\
"
.
format
(
name
))
print
(
" "
+
"
\\\n
"
.
join
([
domain_flag
[
d
],
prec_flag
[
p
],
test_type_flag
[
t
],
solver_flag
[
s
],
gpu_flag
[
g
],
qr_flag
[
q
],
matrix_flag
[
m
]]
+
extra_flags
))
print
(
"endif
\n
"
*
endifs
)
for
lang
,
p
,
d
in
product
(
sorted
(
language_flag
.
keys
()),
sorted
(
prec_flag
.
keys
()),
sorted
(
domain_flag
.
keys
())):
endifs
=
0
if
(
p
==
"single"
):
if
(
d
==
"real"
):
print
(
"if WANT_SINGLE_PRECISION_REAL"
)
elif
(
d
==
"complex"
):
print
(
"if WANT_SINGLE_PRECISION_COMPLEX"
)
else
:
raise
Exception
(
"Oh no!"
)
endifs
+=
1
name
=
"test_autotune{langsuffix}_{d}_{p}"
.
format
(
langsuffix
=
language_flag
[
lang
],
d
=
d
,
p
=
p
)
print
(
"if ENABLE_AUTOTUNING"
)
print
(
"check_SCRIPTS += "
+
name
+
"_extended.sh"
)
print
(
"noinst_PROGRAMS += "
+
name
)
if
lang
==
"Fortran"
:
print
(
name
+
"_SOURCES = test/Fortran/test_autotune.F90"
)
print
(
name
+
"_LDADD = $(test_program_ldadd)"
)
print
(
name
+
"_FCFLAGS = $(test_program_fcflags)
\\
"
)
elif
lang
==
"C"
:
print
(
name
+
"_SOURCES = test/C/test_autotune.c"
)
print
(
name
+
"_LDADD = $(test_program_ldadd) $(FCLIBS)"
)
print
(
name
+
"_CFLAGS = $(test_program_cflags)
\\
"
)
print
(
" "
+
"
\\\n
"
.
join
([
domain_flag
[
d
],
prec_flag
[
p
]]))
print
(
"endif
\n
"
*
endifs
)
print
(
"endif"
)
src/elpa.F90
View file @
603ffc20
...
...
@@ -189,6 +189,8 @@
!> \endcode
!>
!> \brief Fortran module to use the ELPA library. No other module shoule be used
#include "config-f90.h"
module
elpa
use
elpa_constants
use
elpa_api
...
...
@@ -219,7 +221,7 @@ module elpa
deallocate
(
obj
)
end
subroutine
#ifdef ENABLE_AUTOTUNING
!> \brief function to deallocate an ELPA autotune instance
!> Parameters
!> \details
...
...
@@ -229,5 +231,6 @@ module elpa
call
obj
%
destroy
()
deallocate
(
obj
)
end
subroutine
#endif
end
module
src/elpa_api.F90
View file @
603ffc20
...
...
@@ -153,10 +153,12 @@ module elpa_api
elpa_solve_tridiagonal_d
,
&
!< matrix
elpa_solve_tridiagonal_f
#ifdef ENABLE_AUTOTUNING
! Auto-tune
procedure
(
elpa_autotune_setup_i
),
deferred
,
public
::
autotune_setup
!< method to prepare the ELPA autotuning
procedure
(
elpa_autotune_step_i
),
deferred
,
public
::
autotune_step
!< method to do an autotuning step
procedure
(
elpa_autotune_set_best_i
),
deferred
,
public
::
autotune_set_best
!< method to set the best options
#endif
!> \brief These method have to be public, in order to be overrideable in the extension types
procedure
(
elpa_set_integer_i
),
deferred
,
public
::
elpa_set_integer
...
...
@@ -204,7 +206,7 @@ module elpa_api
procedure
(
elpa_solve_tridiagonal_f_i
),
deferred
,
public
::
elpa_solve_tridiagonal_f
end
type
elpa_t
#ifdef ENABLE_AUTOTUNING
!> \brief Abstract definition of the elpa_autotune type
type
,
abstract
::
elpa_autotune_t
private
...
...
@@ -212,7 +214,7 @@ module elpa_api
procedure
(
elpa_autotune_destroy_i
),
deferred
,
public
::
destroy
procedure
(
elpa_autotune_print_i
),
deferred
,
public
::
print
end
type
#endif
!> \brief definition of helper function to get C strlen
!> Parameters
...
...
@@ -243,7 +245,7 @@ module elpa_api
end
function
end
interface
#ifdef ENABLE_AUTOTUNING
!> \brief abstract definition of the autotune setup method
!> Parameters
!> \details
...
...
@@ -298,7 +300,7 @@ module elpa_api
class
(
elpa_autotune_t
),
intent
(
in
),
target
::
tune_state
end
subroutine
end
interface
#endif
!> \brief abstract definition of set method for integer values
!> Parameters
...
...
@@ -558,7 +560,7 @@ module elpa_api
end
subroutine
end
interface
#ifdef ENABLE_AUTOTUNING
!> \brief abstract definition of interface to print the autotuning state
!> Parameters
!> \param self class(elpa_autotune_t): the ELPA autotune object
...
...
@@ -581,7 +583,7 @@ module elpa_api
class
(
elpa_autotune_t
),
intent
(
inout
)
::
self
end
subroutine
end
interface
#endif
contains
...
...
src/elpa_autotune_impl.F90
View file @
603ffc20
...
...
@@ -4,7 +4,7 @@ module elpa_autotune_impl
use
elpa_abstract_impl
use
,
intrinsic
::
iso_c_binding
implicit
none
#ifdef ENABLE_AUTOTUNING
type
,
extends
(
elpa_autotune_t
)
::
elpa_autotune_impl_t
class
(
elpa_abstract_impl_t
),
pointer
::
parent
=>
NULL
()
integer
::
i
=
0
...
...
@@ -37,5 +37,5 @@ module elpa_autotune_impl
class
(
elpa_autotune_impl_t
),
intent
(
inout
)
::
self
! nothing to do atm
end
subroutine
#endif
end
module
src/elpa_impl.F90
View file @
603ffc20
...
...
@@ -60,7 +60,9 @@ module elpa_impl
use
elpa_utilities
,
only
:
error_unit
use
elpa_abstract_impl
#ifdef ENABLE_AUTOTUNING
use
elpa_autotune_impl
#endif
use
,
intrinsic
::
iso_c_binding
implicit
none
...
...
@@ -154,10 +156,11 @@ module elpa_impl
procedure
,
private
::
elpa_transform_back_generalized_fc
#endif
#ifdef ENABLE_AUTOTUNING
procedure
,
public
::
autotune_setup
=>
elpa_autotune_setup
procedure
,
public
::
autotune_step
=>
elpa_autotune_step
procedure
,
public
::
autotune_set_best
=>
elpa_autotune_set_best
#endif
procedure
,
private
::
construct_scalapack_descriptor
=>
elpa_construct_scalapack_descriptor
end
type
elpa_impl_t
...
...
@@ -228,7 +231,7 @@ module elpa_impl
deallocate
(
self
)
end
subroutine
#ifdef ENABLE_AUTOTUNING
!c> /*! \brief C interface for the implementation of the elpa_autotune_deallocate method
!c> *
!c> * \param elpa_autotune_impl_t handle of ELPA autotune object to be deallocated
...
...
@@ -244,7 +247,7 @@ module elpa_impl
call
self
%
destroy
()
deallocate
(
self
)
end
subroutine
#endif
!> \brief function to setup an ELPA object and to store the MPI communicators internally
!> Parameters
...
...
@@ -734,7 +737,7 @@ module elpa_impl
#undef SINGLE_PRECISION
#endif
#ifdef ENABLE_AUTOTUNING
!> \brief function to setup the ELPA autotuning and create the autotune object
!> Parameters
!> \param self the allocated ELPA object
...
...
@@ -942,7 +945,7 @@ module elpa_impl
call
self
%
autotune_set_best
(
tune_state
)
end
subroutine
#endif
end
module
src/elpa_impl_math_template.F90
View file @
603ffc20
...
...
@@ -466,8 +466,11 @@
#endif
type
(
c_ptr
),
intent
(
in
),
value
::
handle
,
a_p
,
b_p
,
ev_p
,
q_p
integer
(
kind
=
c_int
),
intent
(
in
),
value
::
is_already_decomposed
#ifdef USE_FORTRAN2008
integer
(
kind
=
c_int
),
optional
,
intent
(
in
)
::
error
#else