...
 
Commits (152)
This diff is collapsed.
Changelog for upcoming release
- not yet decided
Changelog for ELPA 2019.05.001.rc2
- elpa_print_kernels supports GPU usage
- fix an error if PAPI measurements are activated
- new simple real kernels: block4 and block6
- c functions can be build with optional arguments if compiler supports it
(configure option)
- allow measurements with the likwid tool
- users can define the default-kernel at build time
- ELPA versioning number is provided in the C header files
- as announced a year ago, the following deprecated routines have been finally
removed; see DEPRECATED_FEATURES for the replacement routines , which have
been introduced a year ago. Removed routines:
-> mult_at_b_real
-> mult_ah_b_complex
-> invert_trm_real
-> invert_trm_complex
-> cholesky_real
-> cholesky_complex
-> solve_tridi
- new kernels for ARM arch64 added
Changelog for ELPA 2018.11.001
- improved autotuning
......
......@@ -26,16 +26,17 @@ have been replaced by new names. The old interfaces will be removed
| get_elpa_communicators | elpa_get_communicators | (removed since 2017.11.001) |
| solve_evp_real | elpa_solve_evp_real_1stage_double | (removed since 2017.11.001) |
| solve_evp_complex | elpa_solve_evp_complex_1stage_double | (removed since 2017.11.001) |
| solve_evp_real_1stage | elpa_solve_evp_real_1stage_double | will be removed 2018.11.001 |
| solve_evp_complex_1stage | elpa_solve_evp_complex_1stage_double | will be removed 2018.11.001 |
| solve_evp_real_2stage | elpa_solve_evp_real_2stage_double | will be removed 2018.11.001 |
| solve_evp_complex_2stage | elpa_solve_evp_complex_2stage_double | will be removed 2018.11.001 |
| mult_at_b_real | elpa_mult_at_b_real_double | will be removed 2018.11.001 |
| mult_ah_b_complex | elpa_mult_ah_b_complex_double | will be removed 2018.11.001 |
| invert_trm_real | elpa_invert_trm_real_double | will be removed 2018.11.001 |
| invert_trm_complex | elpa_invert_trm_complex_double | will be removed 2018.11.001 |
| cholesky_real | elpa_cholesky_real_double | will be removed 2018.11.001 |
| cholesky_complex | elpa_cholesky_complex_double | will be removed 2018.11.001 |
| solve_evp_real_1stage | elpa_solve_evp_real_1stage_double | (removed since 2019.05.001) |
| solve_evp_complex_1stage | elpa_solve_evp_complex_1stage_double | (removed since 2019.05.001) |
| solve_evp_real_2stage | elpa_solve_evp_real_2stage_double | (removed since 2019.05.001) |
| solve_evp_complex_2stage | elpa_solve_evp_complex_2stage_double | (removed since 2019.05.001) |
| mult_at_b_real | elpa_mult_at_b_real_double | (removed since 2019.05.001) |
| mult_ah_b_complex | elpa_mult_ah_b_complex_double | (removed since 2019.05.001) |
| invert_trm_real | elpa_invert_trm_real_double | (removed since 2019.05.001) |
| invert_trm_complex | elpa_invert_trm_complex_double | (removed since 2019.05.001) |
| cholesky_real | elpa_cholesky_real_double | (removed since 2019.05.001) |
| cholesky_complex | elpa_cholesky_complex_double | (removed since 2019.05.001) |
| solve_tridi | elpa_solve_tridi_double | (removed since 2019.05.001) |
For all symbols also the corresponding "_single" routines are available
......
......@@ -893,62 +893,61 @@ EXCLUDE = @top_srcdir@/src/GPU/check_for_gpu.F90 \
@top_srcdir@/src/elpa2/elpa2_trans_ev_band_to_full_template.F90 \
@top_srcdir@/src/elpa2/elpa2_trans_ev_tridi_to_band_template.F90 \
@top_srcdir@/src/elpa2/kernels/simple_template.F90 \
@top_srcdir@/src/elpa2/kernels/complex_avx-avx2_1hv_template.c \
@top_srcdir@/src/elpa2/kernels/complex_avx-avx2_2hv_template.c \
@top_srcdir@/src/elpa2/kernels/real_template.F90 \
@top_srcdir@/src/elpa2/kernels/complex_sse_1hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_sse_1hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sse_6hv_template.c \
@top_srcdir@/src/elpa2/kernels/real_128bit_256bit_512bit_BLOCK_template.c \
@top_srcdir@/src/elpa2/kernels/complex_template.F90 \
@top_srcdir@/src/elpa2/kernels/complex_avx-avx2_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sse_4hv_template.c \
@top_srcdir@/src/elpa2/kernels/complex_avx-avx2_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_avx512_1hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sse_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sparc64_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_neon_arch64_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_4hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/asm_x86_64_double_precision.s \
@top_srcdir@/src/elpa2/kernels/real_avx512_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sse_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sse_2hv_template.c \
@top_srcdir@/src/elpa2/kernels/real_sparc64_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_neon_arch64_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_avx512_1hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/asm_x86_64_single_precision.s \
@top_srcdir@/src/elpa2/kernels/real_avx512_6hv_template.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_simple.F90 \
@top_srcdir@/src/elpa2/kernels/complex_avx-avx2_1hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real.F90 \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_4hv_template.c \
@top_srcdir@/src/elpa2/kernels/complex_avx-avx2_1hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_avx512_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/mod_single_hh_trafo_real.F90 \
@top_srcdir@/src/elpa2/kernels/complex_sse_1hv_template.c \
@top_srcdir@/src/elpa2/kernels/complex_128bit_256bit_512bit_BLOCK_template.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_2hv_template.c \
@top_srcdir@/src/elpa2/kernels/complex_avx512_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_sse_2hv_template.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_4hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_bgq.f90 \
@top_srcdir@/src/elpa2/kernels/real_sse_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_avx512_2hv_template.c \
@top_srcdir@/src/elpa2/kernels/real_sparc64_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_neon_arch64_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_simple.F90 \
@top_srcdir@/src/elpa2/kernels/complex.F90 \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_2hv_template.c \
@top_srcdir@/src/elpa2/kernels/complex_avx512_1hv_template.c \
@top_srcdir@/src/elpa2/kernels/real_sse_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sparc64_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_neon_arch64_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_sse_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_4hv_template.c \
@top_srcdir@/src/elpa2/kernels/real_bgp.f90 \
@top_srcdir@/src/elpa2/kernels/real_sse_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sparc64_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_neon_arch64_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_sse_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_6hv_template.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sse_4hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sparc64_4hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_neon_arch64_4hv_single_precision.c \
@top_srcdir@/src/elpa2/elpa2_compute_complex_template.F90 \
@top_srcdir@/src/elpa2/elpa2_bandred_template.F90 \
@top_srcdir@/src/elpa2/pack_unpack_gpu.F90 \
......
......@@ -2,7 +2,7 @@
## Preamble ##
This file provides documentation on how to build the *ELPA* library in **version ELPA-2018.11.001**.
This file provides documentation on how to build the *ELPA* library in **version ELPA-2019.05.001.rc2**.
With release of **version ELPA-2017.05.001** the build process has been significantly simplified,
which makes it easier to install the *ELPA* library.
......@@ -10,13 +10,13 @@ The old, obsolete legacy API will be deprecated in the future !
Allready now, all new features of ELPA are only available with the new API. Thus, there
is no reason to keep the legacy API arround for too long.
The release ELPA 2018.11.001 will be the last release, where the legacy API is
The release ELPA 2018.11.001 was the last release, where the legacy API has been
enabled by default (and can be disabled at build time).
With release ELPA 2019.05.001 the legacy API will be disabled by default, however,
With release ELPA 2019.05.001 the legacy API is disabled by default, however,
can be still switched on at build time.
Most likely with the release ELPA 2019.11.001 the legacy API will be deprecated and not supported anymore.
The release of ELPA 2018.11.001 changes the ABI and API, since more functions have an error argument. In C this error argument is required, in Fortran it is optional.
The release of ELPA 2019.05.001.rc1 changes the ABI and API, since it allows to also build the C-functions with optional error arguments
## How to install *ELPA* ##
......@@ -49,7 +49,8 @@ An excerpt of the most important (*ELPA* specific) options reads as follows:
| configure option | description |
|:------------------------------------ |:----------------------------------------------------- |
| --disable-legacy | do not build legacy API, will be build as default |
| --enable-legacy-interface | build legacy API, will not be build as default |
| --enable-optional-argument-in-C-API | treat error arguments in C-API as optional |
| --enable-openmp | use OpenMP threading, default no. |
| --enable-redirect | for ELPA test programs, allow redirection of <br> stdout/stderr per MPI taks in a file <br> (useful for timing), default no. |
| --enable-single-precision | build with single precision version |
......@@ -57,6 +58,7 @@ An excerpt of the most important (*ELPA* specific) options reads as follows:
| --disable-band-to-full-blocking | build ELPA2 with blocking in band_to_full <br> (default:enabled) |
| --disable-mpi-module | do not use the Fortran MPI module, <br> get interfaces by 'include "mpif.h') |
| --disable-generic | do not build GENERIC kernels, default: enabled |
| --enable-sparc64 | do not build SPARC64 kernels, default: disabled |
| --disable-sse | do not build SSE kernels, default: enabled |
| --disable-sse-assembly | do not build SSE_ASSEMBLY kernels, default: enabled |
| --disable-avx | do not build AVX kernels, default: enabled |
......@@ -72,14 +74,17 @@ An excerpt of the most important (*ELPA* specific) options reads as follows:
| --with-fixed-real-kernel=KERNEL | compile with only a single specific real kernel. |
| --with-fixed-complex-kernel=KERNEL | compile with only a single specific complex kernel. |
| --with-gpu-support-only | Compile and always use the GPU version |
| --with-likwid=[yes|no|PATH] | use the likwid tool to measure performance (has an performance impact!), default: no |
| --with-default-real-kernel=KERNEL | set the real kernel KERNEL as default |
| --with-default-complex-kernel=KERNEL| set the compplex kernel KERNEL as default |
| --enable-scalapack-tests | build SCALAPACK test cases for performance <br> omparison, needs MPI, default no. |
| --enable-autotuning | enables autotuning functionality, default yes |
| --enable-c-tests | enables the C tests for elpa, default yes |
| --disable-assumed-size | do NOT use assumed-size Fortran arrays. default use |
| --enable-scalapack-tests | build also ScalaPack tests for performance comparison; needs MPI |
| --disable-Fortran2008-features | disable Fortran 2008 if compiler does not support it |
| --enable-pyhton | build and install python wrapper, default no |
| --enable-python-tests | enable python tests, default no. |
| --disable-assumed-size | do NOT use assumed-size Fortran arrays. default use |
| --enable-scalapack-tests | build also ScalaPack tests for performance comparison; needs MPI |
| --disable-Fortran2008-features | disable Fortran 2008 if compiler does not support it |
| --enable-pyhton | build and install python wrapper, default no |
| --enable-python-tests | enable python tests, default no. |
We recommend that you do not build ELPA in its main directory but that you use it
......
......@@ -3,6 +3,9 @@
For more details and recent updates please visit the online [issue system] (https://gitlab.mpcdf.mpg.de/elpa/elpa/issues)
Issues which are not mentioned in a newer release are (considered as) solved.
### ELPA 2019.11.001.rc1 release ###
- same issues as in ELPA 2017.11.001
### ELPA 2018.11.001 release ###
- same issues as in ELPA 2017.11.001
......
......@@ -40,6 +40,7 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/helpers/mod_precision.F90 \
src/helpers/mod_mpi.F90 \
src/helpers/mod_mpi_stubs.F90 \
src/helpers/mod_omp.F90 \
src/elpa_generated_fortran_interfaces.F90 \
src/elpa2/mod_redist_band.F90 \
src/elpa2/mod_pack_unpack_cpu.F90 \
......@@ -108,6 +109,8 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa2/kernels/real_template.F90 \
src/elpa2/kernels/complex_template.F90 \
src/elpa2/kernels/simple_template.F90 \
src/elpa2/kernels/simple_block4_template.F90 \
src/elpa2/kernels/simple_block6_template.F90 \
src/elpa2/pack_unpack_cpu.F90 \
src/elpa2/pack_unpack_gpu.F90 \
src/elpa2/compute_hh_trafo.F90 \
......@@ -188,6 +191,14 @@ if WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_simple.F90
endif
if WITH_REAL_GENERIC_SIMPLE_BLOCK4_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_simple_block4.F90
endif
if WITH_REAL_GENERIC_SIMPLE_BLOCK6_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_simple_block6.F90
endif
if WITH_REAL_BGP_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_bgp.f90
endif
......@@ -218,6 +229,13 @@ if WITH_REAL_SPARC64_BLOCK2_KERNEL
#endif
endif
if WITH_REAL_NEON_ARCH64_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_neon_arch64_2hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_neon_arch64_2hv_single_precision.c
endif
endif
if WITH_REAL_VSX_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_vsx_2hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
......@@ -260,6 +278,13 @@ if WITH_REAL_SPARC64_BLOCK4_KERNEL
#endif
endif
if WITH_REAL_NEON_ARCH64_BLOCK4_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_neon_arch64_4hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_neon_arch64_4hv_single_precision.c
endif
endif
if WITH_REAL_VSX_BLOCK4_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_vsx_4hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
......@@ -302,6 +327,13 @@ if WITH_REAL_SPARC64_BLOCK6_KERNEL
#endif
endif
if WITH_REAL_NEON_ARCH64_BLOCK6_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_neon_arch64_6hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_neon_arch64_6hv_single_precision.c
endif
endif
if WITH_REAL_VSX_BLOCK6_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_vsx_6hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
......@@ -422,6 +454,12 @@ if WANT_SINGLE_PRECISION_COMPLEX
endif
endif
if STORE_BUILD_CONFIG
libelpa@SUFFIX@_private_la_SOURCES += src/helpers/print_build_config.c
endif
# Cuda files
.cu.lo:
NVCC="$(NVCC)" libtool --mode=compile --tag=CC $(top_srcdir)/nvcc_wrap $(NVCCFLAGS) $(LDFLAGS) -I$(top_builddir)/ -I$(top_srcdir)/ -c $< -o $@
......@@ -443,8 +481,10 @@ nobase_elpa_include_HEADERS = \
elpa/elpa_legacy.h
nobase_nodist_elpa_include_HEADERS = \
elpa/elpa_version.h \
elpa/elpa_constants.h \
elpa/elpa_generated.h \
elpa/elpa_generated_c_api.h \
elpa/elpa_generated_legacy.h
dist_man_MANS = \
......@@ -635,7 +675,6 @@ wrapper_la_CFLAGS = $(PYTHON_INCLUDE) $(NUMPY_INCLUDE) $(AM_CFLAGS)
python/pyelpa/wrapper.c: python/pyelpa/wrapper.pyx
cython $< -o $@
# test scripts
TASKS ?= 2
if WITH_MPI
......@@ -676,7 +715,8 @@ test_python.sh:
include doxygen.am
CLEANFILES = \
elpa-generated.h \
elpa_generated.h \
elpa_generated_c_api.h \
elpa1_test* \
elpa2_test*\
elpa2_real* \
......@@ -708,16 +748,14 @@ clean-local:
-rm -rf $(generated_headers)
distclean-local:
-rm -rf ./m4
-rm -rf ./src
-rm -rf ./test
-rm -rf ./modules
-rm -rf .fortran_dependencies
-rm config-f90.h
-rm -rf ./src/elpa2/kernels/.deps
-rm -rf ./src/.deps
-rm -rf ./test/.deps
-rmdir ./src/elpa2/kernels/
-rmdir ./src
-rmdir ./test
-rmdir ./m4
-rmdir modules/
-rmdir .fortran_dependencies/
-rm -rf elpa/elpa_generated_c_api.h
EXTRA_DIST = \
elpa.spec \
......@@ -758,27 +796,13 @@ EXTRA_DIST = \
src/elpa2/elpa2_trans_ev_band_to_full_template.F90 \
src/elpa2/elpa2_trans_ev_tridi_to_band_template.F90 \
src/elpa2/elpa2_tridiag_band_template.F90 \
src/elpa2/kernels/complex_avx-avx2_1hv_template.c \
src/elpa2/kernels/complex_avx-avx2_2hv_template.c \
src/elpa2/kernels/complex_avx512_1hv_template.c \
src/elpa2/kernels/complex_avx512_2hv_template.c \
src/elpa2/kernels/complex_sse_1hv_template.c \
src/elpa2/kernels/complex_sse_2hv_template.c \
src/elpa2/kernels/complex_128bit_256bit_512bit_BLOCK_template.c \
src/elpa2/kernels/complex_template.F90 \
src/elpa2/kernels/real_avx-avx2_2hv_template.c \
src/elpa2/kernels/real_avx-avx2_4hv_template.c \
src/elpa2/kernels/real_avx-avx2_6hv_template.c \
src/elpa2/kernels/real_avx512_2hv_template.c \
src/elpa2/kernels/real_avx512_4hv_template.c \
src/elpa2/kernels/real_avx512_6hv_template.c \
src/elpa2/kernels/real_vsx_2hv_template.c \
src/elpa2/kernels/real_vsx_4hv_template.c \
src/elpa2/kernels/real_vsx_6hv_template.c \
src/elpa2/kernels/real_sse_2hv_template.c \
src/elpa2/kernels/real_sse_4hv_template.c \
src/elpa2/kernels/real_sse_6hv_template.c \
src/elpa2/kernels/real_128bit_256bit_512bit_BLOCK_template.c \
src/elpa2/kernels/real_template.F90 \
src/elpa2/kernels/simple_template.F90 \
src/elpa2/kernels/simple_block4_template.F90 \
src/elpa2/kernels/simple_block6_template.F90 \
src/elpa2/pack_unpack_cpu.F90 \
src/elpa2/pack_unpack_gpu.F90 \
src/elpa2/qr/elpa_pdgeqrf_template.F90 \
......
......@@ -2,22 +2,20 @@
## Current Release ##
The current release is ELPA 2018.11.001 The current supported API version
is 20181113. This release supports the earliest API version 20170403.
The current release is ELPA 2019.05.001.rc1 The current supported API version
is 20190501. This release supports the earliest API version 20170403.
The old, obsolete legacy API will be deprecated in the future !
Allready now, all new features of ELPA are only available with the new API. Thus, there
is no reason to keep the legacy API arround for too long.
The release ELPA 2018.11.001 will be the last release, where the legacy API is
The release ELPA 2018.11.001 was the last release, where the legacy API has been
enabled by default (and can be disabled at build time).
With release ELPA 2019.05.001 the legacy API will be disabled by default, however,
With release ELPA 2019.05.001 the legacy API is disabled by default, however,
can be still switched on at build time.
Most likely with the release ELPA 2019.11.001 the legacy API will be deprecated and
not supported anymore.
Note that the release ELPA 2018.11.001 introduces error codes to all functions, and thus an ABI and API change was neccessary
[![Build
status](https://gitlab.mpcdf.mpg.de/elpa/elpa/badges/master/build.svg)](https://gitlab.mpcdf.mpg.de/elpa/elpa/commits/master)
......@@ -91,6 +89,13 @@ Nonetheless, we are grateful if you cite the following publications:
structure theory and computational science",
Journal of Physics Condensed Matter, 26 (2014)
doi:10.1088/0953-8984/26/21/213201
Kus, P; Marek, A.; Lederer, H.
"GPU Optimization of Large-Scale Eigenvalue Solver",
In: Radu F., Kumar K., Berre I., Nordbotten J., Pop I. (eds)
Numerical Mathematics and Advanced Applications ENUMATH 2017. ENUMATH 2017.
Lecture Notes in Computational Science and Engineering, vol 126. Springer, Cham
## Installation of the *ELPA* library
......@@ -110,7 +115,7 @@ the possible configure options.
## Using *ELPA*
Please have a look at the "**USERS_GUIDE**" file, to get a documentation or at the [online]
(http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2018.11.001/html/index.html) doxygen
(http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2019.05.001.rc1/html/index.html) doxygen
documentation, where you find the definition of the interfaces.
## Contributing to *ELPA*
......
This file contains the release notes for the ELPA 2018.11.001 version
This file contains the release notes for the ELPA 2019.05.001.rc2 version
What is new?
-------------
For detailed information about changes since release ELPA 2018.05 please have a look at the Changelog file
For detailed information about changes since release ELPA 2018.11 please have a look at the Changelog file
- improved autotuning
- improved performance of generalized problem via Cannon's algorithm
- check pointing functionality of elpa objects
- store/read/resume of autotuning
- Python interface for ELPA
- more ELPA functions have an optional error argument (Fortran) or required
error argument (C) => ABI and API change
- user can define a default kernel at build time
- ELPA VERSION number is exported to the C-header
- C functions can have an optional error argument, if compiler supports this
=> ABI and API change
- as anounced, removal of deprecated routines
- new kernels for Arm arch64
ABI change
---------------------
Since release 2018.05.001 the ABI has changed.
Since release 2018.10.001 the ABI has changed.
Any incompatibilities to previous version?
---------------------------------------
For Fortran:
Break of ABI compatibility, since all functions obtained an optional, integer
argument of the error code.
Break of ABI compatibility, since all routines announced as deperecated have been removed
For C:
Break of ABI and API compatibility, since all functions obtained a required int* argument of the error code.
......@@ -146,7 +146,7 @@ Local documentation (via man pages) should be available (if *ELPA* has been inst
For example "man elpa2_print_kernels" should provide the documentation for the *ELPA* program which prints all
the available kernels.
Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2018.11.001/html/index.html)
Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2019.05.001.rc1/html/index.html)
for each *ELPA* release is available.
......@@ -13,7 +13,7 @@ Local documentation (via man pages) should be available (if *ELPA* has been inst
For example "man elpa2_print_kernels" should provide the documentation for the *ELPA* program, which prints all
the available kernels.
Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2018.11.001/html/index.html)
Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2019.05.001.rc1/html/index.html)
for each *ELPA* release is available.
......@@ -36,9 +36,9 @@ The old, obsolete legacy API will be deprecated in the future !
Allready now, all new features of ELPA are only available with the new API. Thus, there
is no reason to keep the legacy API arround for too long.
The release ELPA 2018.11.001 will be the last release, where the legacy API is
The release ELPA 2018.11.001 was the last release, where the legacy API has been
enabled by default (and can be disabled at build time).
With release ELPA 2019.05.001 the legacy API will be disabled by default, however,
With release ELPA 2019.05.001 the legacy API is disabled by default, however,
can be still switched on at build time.
Most likely with the release ELPA 2019.11.001 the legacy API will be deprecated and
not supported anymore.
......@@ -200,7 +200,7 @@ The following table gives a list of all supported parameters which can be used t
## III) List of computational routines ##
The following compute routines are available in *ELPA*: Please have a look at the man pages or [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2018.11.001/html/index.html) for details.
The following compute routines are available in *ELPA*: Please have a look at the man pages or [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2019.05.001.rc1/html/index.html) for details.
| Name | Purpose | since API version |
......
......@@ -22,7 +22,7 @@ The *ELPA* library consists of two main parts:
Both variants of the *ELPA* solvers are available for real or complex singe and double precision valued matrices.
Thus *ELPA* provides the following user functions (see man pages or [online] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2018.11.001/html/index.html) for details):
Thus *ELPA* provides the following user functions (see man pages or [online] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2019.05.001.rc1/html/index.html) for details):
- elpa_get_communicators : set the row / column communicators for *ELPA*
- elpa_solve_evp_complex_1stage_{single|double} : solve a {single|double} precision complex eigenvalue proplem with the *ELPA 1stage* solver
......
......@@ -2,24 +2,24 @@
if [ "$(hostname)" == "freya01" ]; then module purge && source /mpcdf/soft/obs_modules.sh && module load git intel/18.0.3 impi/2018.3 mkl/2018.4 anaconda/3/5.1 mpi4py/3.0.0 gcc/8 autoconf automake libtool pkg-config && unset SLURM_MPI_TYPE I_MPI_SLURM_EXT I_MPI_PMI_LIBRARY I_MPI_PMI2 I_MPI_HYDRA_BOOTSTRAP; fi
if [ "$(hostname)" == "buildtest-rzg" ]; then module load impi/5.1.3 intel/16.0 gcc/6.3 mkl/11.3 autotools pkg-config; fi
#if [ "$(hostname)" == "buildtest-rzg" ]; then module load impi/5.1.3 intel/16.0 gcc/6.3 mkl/11.3 autotools pkg-config; fi
if [ "$(hostname)" == "knl1" -o "$(hostname)" == "knl2" -o "$(hostname)" == "knl3" -o "$(hostname)" == "knl4" ] ; then module load impi/2017.4 intel/17.0 gcc/7.2 mkl/2017 pkg-config; fi
if [ "$(hostname)" == "maik" ]; then module load impi/5.1.3 intel intel/17.0 gcc/6.3 mkl/2017 pkg-config/0.29.1; fi
#if [ "$(hostname)" == "knl1" -o "$(hostname)" == "knl2" -o "$(hostname)" == "knl3" -o "$(hostname)" == "knl4" ] ; then module load impi/2017.4 intel/17.0 gcc/7.2 mkl/2017 pkg-config; fi
#if [ "$(hostname)" == "maik" ]; then module load impi/5.1.3 intel intel/17.0 gcc/6.3 mkl/2017 pkg-config/0.29.1; fi
if [ "$(hostname)" == "gp02" ] ; then module load impi/5.1.3 intel/17.0 gcc/6.3 mkl/2017 pkg-config; fi
#if [ "$(hostname)" == "gp02" ] ; then module load impi/5.1.3 intel/17.0 gcc/6.3 mkl/2017 pkg-config; fi
if [ "$(hostname)" == "amarek-elpa-gitlab-runner-1" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" == "amarek-elpa-gitlab-runner-2" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" == "amarek-elpa-gitlab-runner-3" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" == "amarek-elpa-gitlab-runner-4" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" == "dvl01" ]; then module load intel/17.0 gcc/5.4 mkl/2017 impi/2017.2 gcc/5.4 cuda/8.0; fi
if [ "$(hostname)" == "dvl02" ]; then module load intel/17.0 gcc/5.4 mkl/2017 impi/2017.2 gcc/5.4 cuda/8.0; fi
#if [ "$(hostname)" == "dvl01" ]; then module load intel/17.0 gcc/6.4 mkl/2017 impi/2017.4 cuda/9.2; fi
#if [ "$(hostname)" == "dvl02" ]; then module load intel/17.0 gcc/6.4 mkl/2017 impi/2017.4 cuda/9.2; fi
if [ "$(hostname)" == "miy01" ]; then module purge && module load gcc/5.4 smpi essl/5.5 cuda pgi/17.9 && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH && export OMPI_CC=gcc && export OMPI_FC=gfortran; fi
if [ "$(hostname)" == "miy02" ]; then module load gcc/5.4 pgi/17.9 ompi/pgi/17.9/1.10.2 essl/5.5 cuda && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH; fi
if [ "$(hostname)" == "miy03" ]; then module load gcc/5.4 pgi/17.9 ompi/pgi/17.9/1.10.2 essl/5.5 cuda && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH; fi
#if [ "$(hostname)" == "miy01" ]; then module purge && module load gcc/5.4 smpi essl/5.5 cuda pgi/17.9 && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH && export OMPI_CC=gcc && export OMPI_FC=gfortran; fi
#if [ "$(hostname)" == "miy02" ]; then module load gcc/5.4 pgi/17.9 ompi/pgi/17.9/1.10.2 essl/5.5 cuda && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH; fi
#if [ "$(hostname)" == "miy03" ]; then module load gcc/5.4 pgi/17.9 ompi/pgi/17.9/1.10.2 essl/5.5 cuda && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH; fi
if [ "$(hostname)" != "miy01" -a "$(hostname)" != "miy02" -a "$(hostname)" != "miy03" ]; then
......
#!/bin/bash
source /etc/profile.d/modules.sh
#source /etc/profile.d/modules.sh
if [ -f /etc/profile.d/modules.sh ]; then source /etc/profile.d/modules.sh ; else source /etc/profile.d/mpcdf_modules.sh; fi
set -ex
source ./ci_test_scripts/.ci-env-vars
......
#!/bin/bash
source /etc/profile.d/modules.sh
#source /etc/profile.d/modules.sh
if [ -f /etc/profile.d/modules.sh ]; then source /etc/profile.d/modules.sh ; else source /etc/profile.d/mpcdf_modules.sh; fi
set -ex
source ./ci_test_scripts/.ci-env-vars
......
This diff is collapsed.
......@@ -13,6 +13,7 @@ configueArg=""
skipStep=0
batchCommand=""
interactiveRun="yes"
slurmBatch="no"
function usage() {
cat >&2 <<-EOF
......@@ -20,7 +21,7 @@ function usage() {
Call all the necessary steps to perform an ELPA CI test
Usage:
run_ci_tests [-c configure arguments] [-j makeTasks] [-h] [-t MPI Tasks] [-m matrix size] [-n number of eigenvectors] [-b block size] [-o OpenMP threads] [-s skipStep] [-q submit command] [-i interactive run]
run_ci_tests [-c configure arguments] [-j makeTasks] [-h] [-t MPI Tasks] [-m matrix size] [-n number of eigenvectors] [-b block size] [-o OpenMP threads] [-s skipStep] [-q submit command] [-i interactive run] [-S submit to Slurm]"
Options:
-c configure arguments
......@@ -40,7 +41,7 @@ function usage() {
-o OpenMP threads
Number of OpenMP threads used during runs of ELPA tests
-j makeTaks
-j makeTasks
Number of processes make should use during build (default 1)
-s skipStep
......@@ -52,13 +53,16 @@ function usage() {
-i interactive_run
if "yes" NO no batch command will be triggered
-S submit to slurm
if "yes" a SLURM batch job will be submitted
-h
Print this help text
EOF
}
while getopts "c:t:j:m:n:b:o:s:q:i:h" opt; do
while getopts "c:t:j:m:n:b:o:s:q:i:S:h" opt; do
case $opt in
j)
makeTasks=$OPTARG;;
......@@ -80,6 +84,8 @@ while getopts "c:t:j:m:n:b:o:s:q:i:h" opt; do
batchCommand=$OPTARG;;
i)
interactiveRun=$OPTARG;;
S)
slurmBatch=$OPTARG;;
:)
echo "Option -$OPTARG requires an argument" >&2;;
h)
......@@ -90,17 +96,135 @@ while getopts "c:t:j:m:n:b:o:s:q:i:h" opt; do
esac
done
if [ $skipStep -eq 1 ]
then
echo "Skipping the test since option -s has been specified"
exit 0
else
fi
if [ "$slurmBatch" == "yes" ]
then
# default exit code
exitCode=1
CLUSTER=""
if [[ "$HOST" =~ "cobra" ]]
then
CLUSTER="cobra"
fi
if [[ "$HOST" =~ "talos" ]]
then
CLUSTER="talos"
fi
if [[ "$HOST" =~ "freya" ]]
then
CLUSTER="freya"
fi
if [[ "$HOST" =~ "draco" ]]
then
CLUSTER="draco"
fi
echo "Running on $CLUSTER with runner $CI_RUNNER_DESCRIPTION with tag $CI_RUNNER_TAGS on $mpiTasks tasks"
# GPU runners
if [ "$CI_RUNNER_TAGS" == "gpu" ]
then
cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node_2GPU.sh .
echo "./configure " "$configureArgs" >> ./run_${CLUSTER}_1node_2GPU.sh
echo " " >> ./run_${CLUSTER}_1node_2GPU.sh
echo "make -j 16" >> ./run_${CLUSTER}_1node_2GPU.sh
echo " " >> ./run_${CLUSTER}_1node_2GPU.sh
echo "export OMP_NUM_THREADS=$ompThreads" >> ./run_${CLUSTER}_1node_2GPU.sh
echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node_2GPU.sh
echo "make check TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" " >> ./run_${CLUSTER}_1node_2GPU.sh
echo " "
echo "Job script for the run"
cat ./run_${CLUSTER}_1node_2GPU.sh
echo " "
echo "Submitting to SLURM"
if sbatch -W ./run_${CLUSTER}_1node_2GPU.sh; then
exitCode=$?
else
exitCode=$?
echo "Submission exited with exitCode $exitCode"
fi
#if (( $exitCode > 0 ))
#then
cat ./ELPA_CI_2gpu.err.*
#fi
fi
#SSE, AVX, AVX2, and AVX-512 runners
if [[ "$CI_RUNNER_TAGS" =~ "sse" ]] || [[ "$CI_RUNNER_TAGS" =~ "avx" ]] || [[ "$CI_RUNNER_TAGS" =~ "avx2" ]] || [ ["$CI_RUNNER_TAGS" =~ "avx512" ]]
then
cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node.sh .
echo "./configure " "$configureArgs" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "make -j 16" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "export OMP_NUM_THREADS=$ompThreads" >> ./run_${CLUSTER}_1node.sh
echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node.sh
echo "make check TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" " >> ./run_${CLUSTER}_1node.sh
echo " "
echo "Job script for the run"
cat ./run_${CLUSTER}_1node.sh
echo " "
echo "Submitting to SLURM"
if sbatch -W ./run_${CLUSTER}_1node.sh; then
exitCode=$?
else
exitCode=$?
echo "Submission excited with exitCode $exitCode"
fi
echo " "
echo "Exit Code of sbatch: $exitCode"
echo " "
cat ./ELPA_CI.out.*
#if [ $exitCode -ne 0 ]
#then
cat ./ELPA_CI.err.*
#fi
fi
#if [ $exitCode -ne 0 ]
#then
if [ -f ./test-suite.log ]
then
cat ./test-suite.log
fi
#fi
exit $exitCode
fi
# not skipped then proceed
if [ "$slurmBatch" == "no" ]
then
# this is the old, messy implementation for
# - appdev
# - freya-interactive
# - draco
# - buildtest
# - virtual machine runners
# hopefully this can be removed soon
echo "Using old CI logic for appdev"
if [ "$batchCommand" == "srun" ]
then
# use srun to start mpi jobs
if [ "$interactiveRun" == "no" ]
then
# interactive runs are not possible
echo "Running with $batchCommand with $SRUN_COMMANDLINE_CONFIGURE"
# $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=1 $SRUN_COMMANDLINE_CONFIGURE bash -c ' {source /etc/profile.d/modules.sh && source ./ci_test_scripts/ci-env-vars && eval ./configure $configureArgs; }'
# $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=1 $SRUN_COMMANDLINE_CONFIGURE bash -c ' {source /etc/profile.d/modules.sh && source ./ci_test_scripts/ci-env-vars && eval ./configure $configureArgs; }'
$batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=1 $SRUN_COMMANDLINE_CONFIGURE ./ci_test_scripts/configure_step.sh "$configureArgs"
if [ $? -ne 0 ]; then cat config.log && exit 1; fi
......@@ -112,22 +236,26 @@ else
grep -i "Expected %stop" test-suite.log && exit 1 || true ;
if [ $? -ne 0 ]; then exit 1; fi
else
#eval ./configure $configureArgs
./ci_test_scripts/configure_step.sh "$configureArgs"
# interactive runs are possible
#eval ./configure $configureArgs
./ci_test_scripts/configure_step.sh "$configureArgs"
if [ $? -ne 0 ]; then cat config.log && exit 1; fi
if [ $? -ne 0 ]; then cat config.log && exit 1; fi
make -j $makeTasks
if [ $? -ne 0 ]; then exit 1; fi
make -j $makeTasks
if [ $? -ne 0 ]; then exit 1; fi
OMP_NUM_THREADS=$ompThreads make check TASKS=$mpiTasks TEST_FLAGS="$matrixSize $nrEV $blockSize" || { cat test-suite.log; exit 1; }
if [ $? -ne 0 ]; then exit 1; fi
OMP_NUM_THREADS=$ompThreads make check TASKS=$mpiTasks TEST_FLAGS="$matrixSize $nrEV $blockSize" || { cat test-suite.log; exit 1; }
if [ $? -ne 0 ]; then exit 1; fi
grep -i "Expected %stop" test-suite.log && exit 1 || true ;
if [ $? -ne 0 ]; then exit 1; fi
grep -i "Expected %stop" test-suite.log && exit 1 || true ;
if [ $? -ne 0 ]; then exit 1; fi
fi
else
# do not use srun to start mpi applications
#eval ./configure $configureArgs
./ci_test_scripts/configure_step.sh "$configureArgs"
......@@ -142,5 +270,8 @@ else
grep -i "Expected %stop" test-suite.log && exit 1 || true ;
if [ $? -ne 0 ]; then exit 1; fi
fi
else
# a submission to SLURM via a batch script will be done
echo "Submitting to a SLURM batch system"
fi
#!/bin/bash
set -e
set -x
#some defaults
mpiTasks=2
matrixSize=150
nrEV=$matrixSize
blockSize=16
ompThreads=1
configueArg=""
batchCommand=""
slurmBatch="no"
function usage() {
cat >&2 <<-EOF
Call all the necessary steps to perform an ELPA CI test
Usage:
run_distcheck_tests [-c configure arguments] [-h] [-t MPI Tasks] [-m matrix size] [-n number of eigenvectors] [-b block size] [-o OpenMP threads] [-q submit command] [-S submit to Slurm]"
Options:
-c configure arguments
Line of arguments passed to configure call
-t MPI Tasks
Number of MPI processes used during test runs of ELPA tests
-m Matrix size
Size of the mxm matrix used during runs of ELPA tests
-n Number of eigenvectors
Number of eigenvectors to be computed during runs of ELPA tests
-b block size
Block size of block-cyclic distribution during runs of ELPA tests
-o OpenMP threads
Number of OpenMP threads used during runs of ELPA tests
-q submit command
Job steps will be submitted via command to a batch system (default no submission)
-S submit to slurm
if "yes" a SLURM batch job will be submitted
-h
Print this help text
EOF
}
while getopts "c:t:j:m:n:b:o:s:q:i:S:h" opt; do
case $opt in
t)
mpiTasks=$OPTARG;;
m)
matrixSize=$OPTARG;;
n)
nrEV=$OPTARG;;
b)
blockSize=$OPTARG;;
o)
ompThreads=$OPTARG;;
c)
configureArgs=$OPTARG;;
q)
batchCommand=$OPTARG;;
S)
slurmBatch=$OPTARG;;
:)
echo "Option -$OPTARG requires an argument" >&2;;
h)
usage
exit 1;;
*)
exit 1;;
esac
done
if [ "$slurmBatch" == "yes" ]
then
# default exit code
exitCode=1
CLUSTER=""
if [[ "$HOST" =~ "cobra" ]]
then
CLUSTER="cobra"
fi
if [[ "$HOST" =~ "talos" ]]
then
CLUSTER="talos"
fi
if [[ "$HOST" =~ "freya" ]]
then
CLUSTER="freya"
fi
if [[ "$HOST" =~ "draco" ]]
then
CLUSTER="draco"
fi
echo "Running on $CLUSTER with runner $CI_RUNNER_DESCRIPTION with tag $CI_RUNNER_TAGS on $mpiTasks tasks"
#distcheck
if [[ "$CI_RUNNER_TAGS" =~ "distcheck" ]]
then
cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node.sh .
echo "./configure " "$configureArgs" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node.sh
echo "export DISTCHECK_CONFIGURE_FLAGS=\" $configureArgs \" " >> ./run_${CLUSTER}_1node.sh
echo "make distcheck TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" || { chmod u+rwX -R . ; exit 1 ; } " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " "
echo "Job script for the run"
cat ./run_${CLUSTER}_1node.sh
echo " "
echo "Submitting to SLURM"
sbatch -W ./run_${CLUSTER}_1node.sh
exitCode=$?
echo " "
echo "Exit Code of sbatch: $exitCode"
echo " "
cat ./ELPA_CI.out.*
#if [ $exitCode -ne 0 ]
#then
cat ./ELPA_CI.err.*
#fi
if [ -f ./test-suite.log ]
then
cat ./test-suite.log
fi
fi
#if [ $exitCode -ne 0 ]
#then
cat ./test-suite.log
#fi
exit $exitCode
fi
#!/bin/bash
set -e
set -x
#some defaults
mpiTasks=2
matrixSize=150
nrEV=$matrixSize
blockSize=16
ompThreads=1
configueArg=""
batchCommand=""
slurmBatch="no"
projectName="unknown"
projectExecutable=""
projectConfigureArg=""
function usage() {
cat >&2 <<-EOF
Call all the necessary steps to perform an ELPA CI test
Usage:
run_project_tests [-c configure arguments] [-h] [-t MPI Tasks] [-m matrix size] [-n number of eigenvectors] [-b block size] [-o OpenMP threads] [-q submit command] [-S submit to Slurm] [-p projectName] [-e projectExecutable] [-C project configure arguments]"
Options:
-c configure arguments
Line of arguments passed to configure call
-t MPI Tasks
Number of MPI processes used during test runs of ELPA tests
-m Matrix size
Size of the mxm matrix used during runs of ELPA tests
-n Number of eigenvectors
Number of eigenvectors to be computed during runs of ELPA tests
-b block size
Block size of block-cyclic distribution during runs of ELPA tests
-o OpenMP threads
Number of OpenMP threads used during runs of ELPA tests
-q submit command
Job steps will be submitted via command to a batch system (default no submission)
-S submit to slurm
if "yes" a SLURM batch job will be submitted
-p project name
specifies which project to build and test
-e project executable
specifies which executable to run
-C project configure arguments
arguments for the configure of the project
-h
Print this help text
EOF
}
while getopts "c:t:j:m:n:b:o:s:q:i:S:p:e:C:h" opt; do
case $opt in
t)
mpiTasks=$OPTARG;;
m)
matrixSize=$OPTARG;;
n)
nrEV=$OPTARG;;
b)
blockSize=$OPTARG;;
o)
ompThreads=$OPTARG;;
c)
configureArgs=$OPTARG;;
q)
batchCommand=$OPTARG;;
S)
slurmBatch=$OPTARG;;
p)
projectName=$OPTARG;;
e)
projectExecutable=$OPTARG;;
C)
projectConfigureArgs=$OPTARG;;
:)
echo "Option -$OPTARG requires an argument" >&2;;
h)
usage
exit 1;;
*)
exit 1;;
esac
done
if [ "$slurmBatch" == "yes" ]
then
# default exit code
exitCode=1
CLUSTER=""
if [[ "$HOST" =~ "cobra" ]]
then
CLUSTER="cobra"
fi
if [[ "$HOST" =~ "talos" ]]
then
CLUSTER="talos"
fi
if [[ "$HOST" =~ "freya" ]]
then
CLUSTER="freya"
fi
if [[ "$HOST" =~ "draco" ]]
then
CLUSTER="draco"
fi
echo "Running on $CLUSTER with runner $CI_RUNNER_DESCRIPTION with tag $CI_RUNNER_TAGS on $mpiTasks tasks"
#project_test
if [[ "$CI_RUNNER_TAGS" =~ "project_test" ]]
then
cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node.sh .
echo "mkdir -p build" >> ./run_${CLUSTER}_1node.sh
echo "pushd build" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "#Running autogen " >> ./run_${CLUSTER}_1node.sh
echo "../autogen.sh" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "#Running configure " >> ./run_${CLUSTER}_1node.sh
echo "../configure " "$configureArgs" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "#Running make " >> ./run_${CLUSTER}_1node.sh
echo "make -j 8" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "#Running make install" >> ./run_${CLUSTER}_1node.sh
echo "make install" >> ./run_${CLUSTER}_1node.sh
echo "popd" >> ./run_${CLUSTER}_1node.sh
echo "mkdir -p $projectName/build" >> ./run_${CLUSTER}_1node.sh
echo "pushd $projectName/build" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " #Testting project " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "#Running autogen " >> ./run_${CLUSTER}_1node.sh
echo "../autogen.sh" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "#Running configure " >> ./run_${CLUSTER}_1node.sh
echo "../configure " "$projectConfigureArgs " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "#Running make " >> ./run_${CLUSTER}_1node.sh
echo "make -j 8" >> ./run_${CLUSTER}_1node.sh
echo "export LD_LIBRARY_PATH=$MKL_HOME/lib/intel64:\$LD_LIBRARY_PATH" >> ./run_${CLUSTER}_1node.sh
echo "./$projectExecutable" >> ./run_${CLUSTER}_1node.sh
echo "make distclean" >> ./run_${CLUSTER}_1node.sh
echo "popd" >> ./run_${CLUSTER}_1node.sh
echo "pushd build" >> ./run_${CLUSTER}_1node.sh
echo "make distclean" >> ./run_${CLUSTER}_1node.sh
echo "rm -rf installdest" >> ./run_${CLUSTER}_1node.sh
echo "popd" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " "
echo "Job script for the run"
cat ./run_${CLUSTER}_1node.sh
echo " "
echo "Submitting to SLURM"
sbatch -W ./run_${CLUSTER}_1node.sh
exitCode=$?
echo " "
echo "Exit Code of sbatch: $exitCode"
echo " "
cat ./ELPA_CI.out.*
#if [ $exitCode -ne 0 ]
#then
cat ./ELPA_CI.err.*
#fi
if [ -f ./test-suite.log ]
then
cat ./test-suite.log
fi
fi
exit $exitCode
fi
#!/bin/bash
source /etc/profile.d/modules.sh
#source /etc/profile.d/modules.sh
if [ -f /etc/profile.d/modules.sh ]; then source /etc/profile.d/modules.sh ; else source /etc/profile.d/mpcdf_modules.sh; fi
set -ex
source ./ci_test_scripts/.ci-env-vars
......
This diff is collapsed.
......@@ -19,7 +19,7 @@
%define with_openmp 0
Name: elpa
Version: 2018.11.001
Version: 2019.05.001.rc2
Release: 1
Summary: A massively parallel eigenvector solver
License: LGPL-3.0
......
......@@ -4,6 +4,8 @@
#include <limits.h>
#include <complex.h>
#include <elpa/elpa_version.h>
struct elpa_struct;
typedef struct elpa_struct *elpa_t;
......@@ -12,6 +14,7 @@ typedef struct elpa_autotune_struct *elpa_autotune_t;
#include <elpa/elpa_constants.h>
#include <elpa/elpa_generated_c_api.h>
#include <elpa/elpa_generated.h>
#include <elpa/elpa_generic.h>
......
// The stored build config
......@@ -44,9 +44,14 @@ enum ELPA_SOLVERS {
X(ELPA_2STAGE_REAL_SPARC64_BLOCK2, 19, @ELPA_2STAGE_REAL_SPARC64_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SPARC64_BLOCK4, 20, @ELPA_2STAGE_REAL_SPARC64_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SPARC64_BLOCK6, 21, @ELPA_2STAGE_REAL_SPARC64_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK2, 22, @ELPA_2STAGE_REAL_VSX_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK4, 23, @ELPA_2STAGE_REAL_VSX_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK6, 24, @ELPA_2STAGE_REAL_VSX_BLOCK6_COMPILED@, __VA_ARGS__)
X(ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK2, 22, @ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK4, 23, @ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK6, 24, @ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK2, 25, @ELPA_2STAGE_REAL_VSX_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK4, 26, @ELPA_2STAGE_REAL_VSX_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK6, 27, @ELPA_2STAGE_REAL_VSX_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4, 28, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6, 29, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6_COMPILED@, __VA_ARGS__)
#define ELPA_FOR_ALL_2STAGE_REAL_KERNELS_AND_DEFAULT(X) \
ELPA_FOR_ALL_2STAGE_REAL_KERNELS(X) \
......
#define ELPA_API_VERSION @CURRENT_API_VERSION@
#define ELPA_AUTOTUNE_API_VERSION @CURRENT_AUTOTUNE_VERSION@
......@@ -227,6 +227,7 @@ for lang, m, g, q, t, p, d, s, lay, spl in product(sorted(language_flag.keys()),
print("endif\n" * endifs)
for lang, p, d in product(sorted(language_flag.keys()), sorted(prec_flag.keys()), sorted(domain_flag.keys())):
endifs = 0
if (p == "single"):
......@@ -286,3 +287,12 @@ print(" " + " \\\n ".join([
prec_flag['double']]))
print("endif")
name = "test_split_comm_real_double"
print("check_SCRIPTS += " + name + "_extended.sh")
print("noinst_PROGRAMS += " + name)
print(name + "_SOURCES = test/Fortran/test_split_comm.F90")
print(name + "_LDADD = $(test_program_ldadd)")
print(name + "_FCFLAGS = $(test_program_fcflags) \\")
print(" " + " \\\n ".join([
domain_flag['real'],
prec_flag['double']]))
......@@ -22,6 +22,22 @@ elpa/elpa_generated.h: $(top_srcdir)/src/elpa_impl.F90 \
$(top_srcdir)/src/elpa_api.F90 | elpa
@rm -f $@
$(call extract_interface,!c>)
$(call extract_interface,!c_o>)
$(call extract_interface,!c_no>)
#if OPTIONAL_C_ERROR_ARGUMENT
#
#elpa/elpa_generated.h: $(top_srcdir)/src/elpa_impl.F90 \
# $(top_srcdir)/src/elpa_impl_math_template.F90 \
# $(top_srcdir)/src/elpa_api.F90 | elpa
# $(call extract_interface,!c_o>)
#
#else
#elpa/elpa_generated.h: $(top_srcdir)/src/elpa_impl.F90 \
# $(top_srcdir)/src/elpa_impl_math_template.F90 \
# $(top_srcdir)/src/elpa_api.F90 | elpa
# $(call extract_interface,!c_no>)
#endif
generated_headers += elpa/elpa_generated_legacy.h
elpa/elpa_generated_legacy.h: $(top_srcdir)/src/elpa_driver/legacy_interface/elpa_driver_c_interface.F90 \
......@@ -37,7 +53,7 @@ test/shared/generated.h: $(wildcard $(top_srcdir)/test/shared/*.*90) | test/shar
$(call extract_interface,!c>)
generated_headers += src/elpa_generated_fortran_interfaces.h
src/elpa_generated_fortran_interfaces.h: $(filter-out $(wildcard $(top_srcdir)/src/*generated*), $(wildcard $(top_srcdir)/src/elpa2/kernels/*.c $(top_srcdir)/src/elpa2/kernels/*.s $(top_srcdir)/src/*.[ch] $(top_srcdir)/src/elpa_generalized/*.[ch])) | src
src/elpa_generated_fortran_interfaces.h: $(filter-out $(wildcard $(top_srcdir)/src/*generated*), $(wildcard $(top_srcdir)/src/helpers/*.c $(top_srcdir)/src/elpa2/kernels/*.c $(top_srcdir)/src/elpa2/kernels/*.s $(top_srcdir)/src/*.[ch] $(top_srcdir)/src/elpa_generalized/*.[ch])) | src
@rm -f $@
$(call extract_interface,!f>)
$(call extract_interface,#!f>)
......
......@@ -2,5 +2,5 @@
import sys
import os
args = filter(lambda q : q != "-Xcompiler", sys.argv[1:])
args = [q for q in sys.argv[1:] if q != "-Xcompiler"]
os.execvp(args[0], args[0:])
......@@ -62,17 +62,17 @@ module mod_check_for_gpu
gpuAvailable = .false.
if(cublasHandle .ne. -1) then
gpuAvailable = .true.
numberOfDevices = -1
if(myid == 0) then
if (cublasHandle .ne. -1) then
gpuAvailable = .true.
numberOfDevices = -1
if (myid == 0) then
print *, "Skipping GPU init, should have already been initialized "
endif
return
endif
return
else
if(myid == 0) then
print *, "Initializing the GPU devices"
endif
if (myid == 0) then
print *, "Initializing the GPU devices"
endif
endif
if (.not.(present(wantDebug))) then
......
......@@ -85,6 +85,9 @@ module elpa1_impl
use, intrinsic :: iso_c_binding
use elpa_utilities
use elpa1_auxiliary_impl
#ifdef HAVE_LIKWID
use likwid
#endif
implicit none
......
......@@ -66,9 +66,8 @@ function elpa_solve_evp_&
use elpa_abstract_impl
use elpa_mpi
use elpa1_compute
#ifdef WITH_OPENMP
use omp_lib
#endif
use elpa_omp
implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
......@@ -122,7 +121,11 @@ function elpa_solve_evp_&
&")
#ifdef WITH_OPENMP
!nrThreads = omp_get_max_threads()
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
omp_threads_caller = omp_get_max_threads()
! check the number of threads that ELPA should use internally
call obj%get("omp_threads",nrThreads,error)
call omp_set_num_threads(nrThreads)
#else
......@@ -156,6 +159,13 @@ function elpa_solve_evp_&
if (.not.(obj%eigenvalues_only)) then
q(1,1) = ONE
endif
! restore original OpenMP settings
#ifdef WITH_OPENMP
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller)
#endif
call obj%timer%stop("elpa_solve_evp_&
&MATH_DATATYPE&
&_1stage_&
......@@ -314,16 +324,28 @@ function elpa_solve_evp_&
if (do_tridiag) then
call obj%timer%start("forward")
#ifdef HAVE_LIKWID
call likwid_markerStartRegion("tridi")
#endif
call tridiag_&
&MATH_DATATYPE&
&_&
&PRECISION&
& (obj, na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU_tridiag, wantDebug, nrThreads)
#ifdef HAVE_LIKWID
call likwid_markerStopRegion("tridi")
#endif
call obj%timer%stop("forward")
endif !do_tridiag
if (do_solve) then
call obj%timer%start("solve")
#ifdef HAVE_LIKWID
call likwid_markerStartRegion("solve")
#endif
call solve_tridi_&