Commit b5c6f576 authored by Pavel Kus's avatar Pavel Kus

Merge remote-tracking branch 'origin/master_pre_stage' into pkus/generalized

generalized tests disabled (files taken from master_pre_stage due to
many conflicts), have to be re-introduced
parents 973ff6d1 8b5fa5e3
if [ "$HOST" = "buildtest-rzg" ]; then module load impi/5.1.3 intel/16.0 gcc/6.3 mkl/11.3 autotools pkg-config; fi
if [ "$HOST" = "knl1" -o "$HOST" = "knl2" -o "$HOST" = "knl3" -o "$HOST" = "knl4" ] ; then module load impi/5.1.3 intel/17.0 gcc/6.3 mkl/2017 pkg-config; fi
if [ "$HOST" = "gp02" ] ; then module load impi/2017.2 intel/17.0 gcc/6.3 mkl/2017 pkg-config; fi
if [ "$HOST" != "hydra03" -a "$HOST" != "hydra04" -a "$HOST" != "hydra05" -a "$HOST" != "hydra06" -a "$HOST" != "hydra07" -a "$HOST" != "knl1" -a "$HOST" != "knl2" -a "$HOST" != "knl3" -a "$HOST" != "knl4" -a "$HOST" != "gp02" -a "$HOST" != "draco01" -a "$HOST" != "draco02" -a "$HOST" != "draco03" -a "$HOST" != "draco04" ] ; then module load impi/5.1.3 intel/16.0 gcc/6.4 mkl/11.3 autotools pkg-config; fi
if [ "$HOST" = "draco01" -o "$HOST" = "draco02" -o "$HOST" = "draco03" -o "$HOST" = "draco04" ] ; then module load impi/2017.3 intel/17.0 gcc/6.3 mkl/2017 pkg-config; fi
module list
if [ "$HOST" != "buildtest-rzg" ]; then pushd $HOME/bin/reserve_timeslot && git pull && popd ; fi
if [ "$(hostname)" == "buildtest-rzg" ]; then module load impi/5.1.3 intel/16.0 gcc/6.3 mkl/11.3 autotools pkg-config; fi
if [ "$(hostname)" == "knl1" -o "$(hostname)" == "knl2" -o "$(hostname)" == "knl3" -o "$(hostname)" == "knl4" ] ; then module load impi/2017.4 intel/17.0 gcc/7.2 mkl/2017 pkg-config; fi
if [ "$(hostname)" == "maik" ]; then module load impi/5.1.3 intel intel/17.0 gcc/6.3 mkl/2017 pkg-config/0.29.1; fi
if [ "$(hostname)" == "gp02" ] ; then module load impi/5.1.3 intel/17.0 gcc/6.3 mkl/2017 pkg-config; fi
if [ "$(hostname)" == "amarek-elpa-gitlab-runner-1" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" == "amarek-elpa-gitlab-runner-2" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" == "amarek-elpa-gitlab-runner-3" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" == "amarek-elpa-gitlab-runner-4" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" == "dvl01" ]; then module load intel/17.0 gcc/5.4 mkl/2017 impi/2017.2 gcc/5.4 cuda/8.0; fi
if [ "$(hostname)" == "dvl02" ]; then module load intel/17.0 gcc/5.4 mkl/2017 impi/2017.2 gcc/5.4 cuda/8.0; fi
if [ "$(hostname)" == "miy01" ]; then module purge && module load gcc/5.4 smpi essl/5.5 cuda pgi/17.9 && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH && export OMPI_CC=gcc && export OMPI_FC=gfortran; fi
if [ "$(hostname)" == "miy02" ]; then module load gcc/5.4 pgi/17.9 ompi/pgi/17.9/1.10.2 essl/5.5 cuda && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH; fi
if [ "$(hostname)" == "miy03" ]; then module load gcc/5.4 pgi/17.9 ompi/pgi/17.9/1.10.2 essl/5.5 cuda && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH; fi
if [ "$(hostname)" != "miy01" -a "$(hostname)" != "miy02" -a "$(hostname)" != "miy03" ]; then
export MKL_INTEL_SCALAPACK_MPI_NO_OMP_BASELINE="-L$MKL_HOME/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread"
export MKL_INTEL_SCALAPACK_FCFLAGS_MPI_NO_OMP="-I$MKL_HOME/include/intel64/lp64"
export MKL_INTEL_SCALAPACK_LDFLAGS_MPI_NO_OMP="$MKL_INTEL_SCALAPACK_MPI_NO_OMP_BASELINE -Wl,-rpath,$MKL_HOME/lib/intel64"
......@@ -40,3 +53,9 @@ export MKL_GFORTRAN_SCALAPACK_LDFLAGS_NO_MPI_OMP="$MKL_GFORTRAN_SCALAPACK_NO_MPI
export ASAN_OPTIONS=suppressions=no_asan_for_mpi.supp,fast_unwind_on_malloc=0
export LSAN_OPTIONS=suppressions=no_lsan_for_mpi.supp
fi
if [ "$(hostname)" != "knl1" -a "$(hostname)" != "knl2" -a "$(hostname)" != "knl3" -a "$(hostname)" != "knl4" -a "$(hostname)" != "maik" ] ; then export I_MPI_DEBUG=5; fi
module list
This diff is collapsed.
Changelog for ELPA 2017.11.001
- added new compute kernels for IBM Power8 and Fujistu Sparc64
processors
- a first implementation of autotuning capability
- correct some type statements in Fortran
- correct detection of PAPI in configure step
Changelog for ELPA 2017.05.003
- remove bug in invert_triangular, which had been introduced
in ELPA 2017.05.002
Changelog for ELPA 2017.05.002
Mainly bugfixes for ELPA 2017.05.001:
......
......@@ -17,12 +17,12 @@ following listed interfaces will be removed at some time.
In order to unfiy the namespace of the *ELPA* public interfaces, several interfaces
have been replaced by new names. The old interfaces will be removed
Deprecated interface Replacement
===================================================
get_elpa_row_col_coms elpa_get_communicators
get_elpa_communicators elpa_get_communicators
solve_evp_real elpa_solve_evp_real_1stage_double
solve_evp_complex elpa_solve_evp_complex_1stage_double
Deprecated interface Replacement Comment
==================================================================================================
get_elpa_row_col_coms elpa_get_communicators (removed since 2017.11.001)
get_elpa_communicators elpa_get_communicators (removed since 2017.11.001)
solve_evp_real elpa_solve_evp_real_1stage_double (removed since 2017.11.001)
solve_evp_complex elpa_solve_evp_complex_1stage_double (removed since 2017.11.001)
solve_evp_real_1stage elpa_solve_evp_real_1stage_double
solve_evp_complex_1stage elpa_solve_evp_complex_1stage_double
solve_evp_real_2stage elpa_solve_evp_real_2stage_double
......
# Installation guide for the *ELPA* library#
## Preamle ##
## Preamble ##
This file provides documentation on how to build the *ELPA* library in **version ELPA-2017.05.002**.
This file provides documentation on how to build the *ELPA* library in **version ELPA-2017.11.001**.
With release of **version ELPA-2017.05.001** the build process has been significantly simplified,
which makes it easier to install the *ELPA* library
......@@ -28,7 +28,7 @@ for the documentation how to proceed.
*ELPA* can be installed with the build steps
- configure
- make
- make check
- make check | or make check CHECK_LEVEL=extended
- make install
Please look at configure --help for all available options.
......
......@@ -3,6 +3,12 @@
For more details and recent updates please visit the online [issue system] (https://gitlab.mpcdf.mpg.de/elpa/elpa/issues)
Issues which are not mentioned in a newer release are (considered as) solved
### ELPA 2017.11.001 release ###
- the elpa autotune print functions cannot print at the moment
### ELPA 2017.05.003 release ###
- at the moment no issues are known
### ELPA 2017.05.002 release ###
- at the moment no issues are known
......
......@@ -73,3 +73,9 @@ https://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html
NO incompatible API changes w.r.t. to the previous version. Only some functions have
been added. The state of release 2017.05.001 defines this interface
- 11
Incompatible API changes w.r.t. to the previous version (only in the so called
"legacy interface", since as anounced some deprecated function aliases have been
removed). For the current interface all changes since 2017.05.001 are
compatible, since only some functions have been added.
The state of release 2017.11.001.(rc1) defines this interface
......@@ -35,6 +35,7 @@ noinst_LTLIBRARIES += libelpa@SUFFIX@_private.la
libelpa@SUFFIX@_private_la_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules
libelpa@SUFFIX@_private_la_SOURCES = \
src/elpa_impl.F90 \
src/elpa_autotune_impl.F90 \
src/elpa_abstract_impl.F90 \
src/helpers/mod_precision.F90 \
src/helpers/mod_mpi.F90 \
......@@ -203,6 +204,20 @@ endif
endif
endif
if WITH_REAL_SPARC64_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sparc64_2hv_double_precision.c
#if WANT_SINGLE_PRECISION_REAL
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sparc64_2hv_single_precision.c
#endif
endif
if WITH_REAL_VSX_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_vsx_2hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_vsx_2hv_single_precision.c
endif
endif
if WITH_REAL_SSE_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sse_2hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
......@@ -231,6 +246,19 @@ if WANT_SINGLE_PRECISION_REAL
endif
endif
if WITH_REAL_SPARC64_BLOCK4_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sparc64_4hv_double_precision.c
#if WANT_SINGLE_PRECISION_REAL
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sparc64_4hv_single_precision.c
#endif
endif
if WITH_REAL_VSX_BLOCK4_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_vsx_4hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_vsx_4hv_single_precision.c
endif
endif
if WITH_REAL_SSE_BLOCK4_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sse_4hv_double_precision.c
......@@ -260,7 +288,19 @@ if WANT_SINGLE_PRECISION_REAL
endif
endif
if WITH_REAL_SPARC64_BLOCK6_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sparc64_6hv_double_precision.c
#if WANT_SINGLE_PRECISION_REAL
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sparc64_6hv_single_precision.c
#endif
endif
if WITH_REAL_VSX_BLOCK6_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_vsx_6hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_vsx_6hv_single_precision.c
endif
endif
if WITH_REAL_SSE_BLOCK6_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sse_6hv_double_precision.c
......@@ -290,6 +330,19 @@ if WANT_SINGLE_PRECISION_REAL
endif
endif
#if WITH_COMPLEX_SPARC64_BLOCK1_KERNEL
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sparc64_1hv_double_precision.c
#if WANT_SINGLE_PRECISION_COMPLEX
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sparc64_1hv_single_precision.c
#endif
#endif
#
#if WITH_COMPLEX_VSX_BLOCK1_KERNEL
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_vsx_1hv_double_precision.c
#if WANT_SINGLE_PRECISION_COMPLEX
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_vsx_1hv_single_precision.c
#endif
#endif
if WITH_COMPLEX_SSE_BLOCK1_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sse_1hv_double_precision.c
......@@ -320,6 +373,20 @@ if WANT_SINGLE_PRECISION_COMPLEX
endif
endif
#if WITH_COMPLEX_SPARC64_BLOCK2_KERNEL
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sparc64_2hv_double_precision.c
#if WANT_SINGLE_PRECISION_COMPLEX
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sparc64_2hv_single_precision.c
#endif
#endif
#
#if WITH_COMPLEX_VSX_BLOCK2_KERNEL
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_vsx_2hv_double_precision.c
#if WANT_SINGLE_PRECISION_COMPLEX
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_vsx_2hv_single_precision.c
#endif
#endif
if WITH_COMPLEX_SSE_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sse_2hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX
......@@ -390,9 +457,7 @@ dist_man_MANS = \
if ENABLE_LEGACY
dist_man_MANS += \
man/solve_evp_real.3 \
man/solve_evp_real_1stage_double.3 \
man/solve_evp_complex.3 \
man/solve_evp_complex_1stage_double.3 \
man/solve_evp_real_2stage_double.3 \
man/solve_evp_complex_2stage_double.3 \
......@@ -400,8 +465,7 @@ dist_man_MANS += \
man/elpa_solve_evp_complex_1stage_double.3 \
man/elpa_solve_evp_real_2stage_double.3 \
man/elpa_solve_evp_complex_2stage_double.3 \
man/get_elpa_row_col_comms.3 \
man/get_elpa_communicators.3 \
man/elpa_get_communicators.3 \
man/elpa_mult_at_b_real_double.3 \
man/elpa_mult_at_b_real_single.3 \
man/elpa_mult_ah_b_complex_double.3 \
......@@ -413,7 +477,11 @@ dist_man_MANS += \
man/elpa_solve_evp_real_double.3 \
man/elpa_solve_evp_real_single.3 \
man/elpa_solve_evp_complex_double.3 \
man/elpa_solve_evp_complex_single.3
man/elpa_solve_evp_complex_single.3 \
man/elpa_autotune_setup.3 \
man/elpa_autotune_step.3 \
man/elpa_autotune_set_best.3 \
man/elpa_autotune_deallocate.3
endif
......@@ -489,26 +557,26 @@ include legacy_test_programs.am
endif
noinst_PROGRAMS += double_instance@SUFFIX@
check_SCRIPTS += double_instance@SUFFIX@.sh
check_SCRIPTS += double_instance@SUFFIX@_default.sh
double_instance@SUFFIX@_SOURCES = test/Fortran/elpa2/double_instance.F90
double_instance@SUFFIX@_LDADD = $(test_program_ldadd)
double_instance@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODINC)test_modules $(FC_MODINC)modules
noinst_PROGRAMS += real_2stage_banded@SUFFIX@
check_SCRIPTS += real_2stage_banded@SUFFIX@.sh
check_SCRIPTS += real_2stage_banded@SUFFIX@_default.sh
real_2stage_banded@SUFFIX@_SOURCES = test/Fortran/elpa2/real_2stage_banded.F90
real_2stage_banded@SUFFIX@_LDADD = $(test_program_ldadd)
real_2stage_banded@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODINC)test_modules $(FC_MODINC)modules
noinst_PROGRAMS += complex_2stage_banded@SUFFIX@
check_SCRIPTS += complex_2stage_banded@SUFFIX@.sh
check_SCRIPTS += complex_2stage_banded@SUFFIX@_default.sh
complex_2stage_banded@SUFFIX@_SOURCES = test/Fortran/elpa2/complex_2stage_banded.F90
complex_2stage_banded@SUFFIX@_LDADD = $(test_program_ldadd)
complex_2stage_banded@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODINC)test_modules $(FC_MODINC)modules
if WANT_SINGLE_PRECISION_REAL
noinst_PROGRAMS += single_real_2stage_banded@SUFFIX@
check_SCRIPTS += single_real_2stage_banded@SUFFIX@.sh
check_SCRIPTS += single_real_2stage_banded@SUFFIX@_default.sh
single_real_2stage_banded@SUFFIX@_SOURCES = test/Fortran/elpa2/single_real_2stage_banded.F90
single_real_2stage_banded@SUFFIX@_LDADD = $(test_program_ldadd)
single_real_2stage_banded@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODINC)test_modules $(FC_MODINC)modules
......@@ -516,14 +584,14 @@ endif
if WANT_SINGLE_PRECISION_COMPLEX
noinst_PROGRAMS += single_complex_2stage_banded@SUFFIX@
check_SCRIPTS += single_complex_2stage_banded@SUFFIX@.sh
check_SCRIPTS += single_complex_2stage_banded@SUFFIX@_default.sh
single_complex_2stage_banded@SUFFIX@_SOURCES = test/Fortran/elpa2/single_complex_2stage_banded.F90
single_complex_2stage_banded@SUFFIX@_LDADD = $(test_program_ldadd)
single_complex_2stage_banded@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODINC)test_modules $(FC_MODINC)modules
endif
# test scriptsT
# test scripts
TASKS ?= 2
if WITH_MPI
wrapper=$(MPI_BINARY) -n $${TASKS:-$(TASKS)}
......@@ -531,7 +599,12 @@ else
wrapper=
endif
TESTS = $(check_SCRIPTS)
%.sh: %
%_extended.sh: %
@echo "#!/bin/bash" > $@
@echo 'if [ "$$CHECK_LEVEL" = "extended" ] ; then $(wrapper) ./$^ $$TEST_FLAGS ; else exit 77; fi' >> $@
@chmod +x $@
%_default.sh: %
@echo "#!/bin/bash" > $@
@echo '$(wrapper)' ./$^ '$$TEST_FLAGS' >> $@
@chmod +x $@
......@@ -622,6 +695,9 @@ EXTRA_DIST = \
src/elpa2/kernels/real_avx512_2hv_template.c \
src/elpa2/kernels/real_avx512_4hv_template.c \
src/elpa2/kernels/real_avx512_6hv_template.c \
src/elpa2/kernels/real_vsx_2hv_template.c \
src/elpa2/kernels/real_vsx_4hv_template.c \
src/elpa2/kernels/real_vsx_6hv_template.c \
src/elpa2/kernels/real_sse_2hv_template.c \
src/elpa2/kernels/real_sse_4hv_template.c \
src/elpa2/kernels/real_sse_6hv_template.c \
......@@ -646,12 +722,12 @@ EXTRA_DIST = \
test/shared/test_check_correctness_template.F90 \
test/shared/test_prepare_matrix_template.F90 \
test/shared/test_analytic_template.F90 \
test_project/Makefile.am \
test_project/autogen.sh \
test_project/configure.ac \
test_project/fdep \
test_project/m4 \
test_project/src/test_real.F90 \
test_project_1stage/Makefile.am \
test_project_1stage/autogen.sh \
test_project_1stage/configure.ac \
test_project_1stage/fdep \
test_project_1stage/m4 \
test_project_1stage/src/test_real.F90 \
test_project_2stage/Makefile.am \
test_project_2stage/autogen.sh \
test_project_2stage/configure.ac \
......@@ -664,7 +740,19 @@ EXTRA_DIST = \
test_project_C/fdep \
test_project_C/m4 \
test_project_C/src/test_real.c \
test_project_C/src/test_blacs_infrastructure.F90
test_project_C/src/test_blacs_infrastructure.F90\
test_project_1stage_legacy_api/Makefile.am \
test_project_1stage_legacy_api/autogen.sh \
test_project_1stage_legacy_api/configure.ac \
test_project_1stage_legacy_api/fdep \
test_project_1stage_legacy_api/m4 \
test_project_1stage_legacy_api/src/test_real.F90 \
test_project_2stage_legacy_api/Makefile.am \
test_project_2stage_legacy_api/autogen.sh \
test_project_2stage_legacy_api/configure.ac \
test_project_2stage_legacy_api/fdep \
test_project_2stage_legacy_api/m4 \
test_project_2stage_legacy_api/src/test_real2.F90
if ENABLE_LEGACY
EXTRA_DIST += \
......
......@@ -2,10 +2,19 @@
## Current Release ##
The current release is ELPA 2017.05.002. The current supported API version
is 20170403. This release supports the earliest API version 20170403.
The current release is ELPA 2017.11.001 The current supported API version
is 20171201. This release supports the earliest API version 20170403.
## About *ELPA*
[![Build
status](https://gitlab.mpcdf.mpg.de/elpa/elpa/badges/master/build.svg)](https://gitlab.mpcdf.mpg.de/elpa/elpa/commits/master)
[![Code
coverage](https://gitlab.mpcdf.mpg.de/elpa/badges/master/coverage.svg)](http://elpa.pages.mpcdf.de/elpa/coverage_summary
[![License: LGPL v3][license-badge]](LICENSE)
[license-badge]: https://img.shields.io/badge/License-LGPL%20v3-blue.svg
## About *ELPA* ##
The computation of selected or all eigenvalues and eigenvectors of a symmetric
(Hermitian) matrix has high relevance for various scientific disciplines.
......
This file contains the release notes for the ELPA 2017.05.002 version
This file contains the release notes for the ELPA 2017.11.001 version
What is new?
-------------
For detailed information about changes since release ELPA 2016.11.001 please have a look at the Changelog file
Highlights are:
For detailed information about changes since release ELPA 2017.05.03 please have a look at the Changelog file
- a more generic and more flexible API, which allows easy implementation of upcoming features
- faster GPU implementation, especially for ELPA 1stage
- the restriction of the block-cyclic distribution blocksize = 128 in the GPU
case is relaxed
- Faster CPU implementation due to better blocking
- support of already banded matrices (new API only!)
- improved KNL support
- This release contains new compute kernels for IBM Power 8 and Fujistu Sparc64 processors
- A first version of autotuning has been implemented
ABI change
---------------------
Since release 2016.05.001 the ABI has not changed. Only additional interfaces habe been added, please have also a look
at the DEPRECTATED_FEATURES file
Since release 2017.05.003 the ABI has changed.
Any incompatibilities to previous version?
---------------------------------------
As mentioned before, the ABI of ELPA was not changed; There is no
incompatibility with the previous version ELPA 2016.11.001.
As mentioned before, the ABI of ELPA was changed;
This change only affectes the old "lecgacy interface", since as
anounced some deprecated function aliases have been removed.
For the current interface there is no incompatibility with the
previous version ELPA 2017.05.003.
......@@ -124,8 +124,6 @@ the user application):
"debug" can be one of { 0 | 1 }, will give more information case of an error if set to 1
"eigenvalues_only" can be one of { 0 | 1 }, if set to 1 solve will only compute the eigenvalues
6. do the desired task with the *ELPA* library, which could be
a) e%eigenvectors ! solve EV problem with solver as set by "set" method; computes eigenvalues AND eigenvectors
......@@ -151,7 +149,7 @@ Local documentation (via man pages) should be available (if *ELPA* has been inst
For example "man elpa2_print_kernels" should provide the documentation for the *ELPA* program which prints all
the available kernels.
Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2017.05.002/html/index.html)
Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2017.11.001/html/index.html)
for each *ELPA* release is available.
......@@ -9,7 +9,7 @@ Local documentation (via man pages) should be available (if *ELPA* has been inst
For example "man elpa2_print_kernels" should provide the documentation for the *ELPA* program which prints all
the available kernels.
Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2017.05.002/html/index.html)
Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2017.11.001/html/index.html)
for each *ELPA* release is available.
......@@ -125,7 +125,7 @@ of a simple example program can be found in ./test_project/src.
! All ELPA routines need MPI communicators for communicating within
! rows or columns of processes, these are set in get_elpa_communicators
! rows or columns of processes, these are set in elpa_get_communicators
success = elpa_get_communicators(mpi_comm_world, my_prow, my_pcol, &
mpi_comm_rows, mpi_comm_cols)
......@@ -216,8 +216,8 @@ SYNOPSIS
integer, intent(in) ldq: leading dimension of matrix q which stores the eigenvectors
integer, intent(in) nblk: blocksize of block cyclic distributin, must be the same in both directions
integer, intent(in) matrixCols: number of columns of locally distributed matrices a and q
integer, intent(in) mpi_comm_rows: communicator for communication in rows. Constructed with get_elpa_communicators(3)
integer, intent(in) mpi_comm_cols: communicator for communication in colums. Constructed with get_elpa_communicators(3)
integer, intent(in) mpi_comm_rows: communicator for communication in rows. Constructed with elpa_get_communicators(3)
integer, intent(in) mpi_comm_cols: communicator for communication in colums. Constructed with elpa_get_communicators(3)
logical success: return value indicating success or failure
......@@ -238,14 +238,14 @@ SYNOPSIS
int ldq: leading dimension of matrix q which stores the eigenvectors
int nblk: blocksize of block cyclic distributin, must be the same in both directions
int matrixCols: number of columns of locally distributed matrices a and q
int mpi_comm_rows: communicator for communication in rows. Constructed with get_elpa_communicators(3)
int mpi_comm_cols: communicator for communication in colums. Constructed with get_elpa_communicators(3)
int mpi_comm_rows: communicator for communication in rows. Constructed with elpa_get_communicators(3)
int mpi_comm_cols: communicator for communication in colums. Constructed with elpa_get_communicators(3)
int success: return value indicating success (1) or failure (0)
DESCRIPTION
Solve the real eigenvalue problem with the 1-stage solver. The ELPA communicators mpi_comm_rows and mpi_comm_cols are obtained with the
get_elpa_communicators(3) function. The distributed quadratic marix a has global dimensions na x na, and a local size lda x matrixCols.
elpa_get_communicators(3) function. The distributed quadratic marix a has global dimensions na x na, and a local size lda x matrixCols.
The solver will compute the first nev eigenvalues, which will be stored on exit in ev. The eigenvectors corresponding to the eigenvalues
will be stored in q. All memory of the arguments must be allocated outside the call to the solver.
......@@ -265,8 +265,8 @@ DESCRIPTION
integer, intent(in) ldq: leading dimension of matrix q which stores the eigenvectors
integer, intent(in) nblk: blocksize of block cyclic distributin, must be the same in both directions
integer, intent(in) matrixCols: number of columns of locally distributed matrices a and q
integer, intent(in) mpi_comm_rows: communicator for communication in rows. Constructed with get_elpa_communicators(3)
integer, intent(in) mpi_comm_cols: communicator for communication in colums. Constructed with get_elpa_communicators(3)
integer, intent(in) mpi_comm_rows: communicator for communication in rows. Constructed with elpa_get_communicators(3)
integer, intent(in) mpi_comm_cols: communicator for communication in colums. Constructed with elpa_get_communicators(3)
logical success: return value indicating success or failure
......@@ -288,14 +288,14 @@ DESCRIPTION
int ldq: leading dimension of matrix q which stores the eigenvectors
int nblk: blocksize of block cyclic distributin, must be the same in both directions
int matrixCols: number of columns of locally distributed matrices a and q
int mpi_comm_rows: communicator for communication in rows. Constructed with get_elpa_communicators(3)
int mpi_comm_cols: communicator for communication in colums. Constructed with get_elpa_communicators(3)
int mpi_comm_rows: communicator for communication in rows. Constructed with elpa_get_communicators(3)
int mpi_comm_cols: communicator for communication in colums. Constructed with elpa_get_communicators(3)
int success: return value indicating success (1) or failure (0)
DESCRIPTION
Solve the complex eigenvalue problem with the 1-stage solver. The ELPA communicators mpi_comm_rows and mpi_comm_cols are obtained with the
get_elpa_communicators(3) function. The distributed quadratic marix a has global dimensions na x na, and a local size lda x matrixCols.
elpa_get_communicators(3) function. The distributed quadratic marix a has global dimensions na x na, and a local size lda x matrixCols.
The solver will compute the first nev eigenvalues, which will be stored on exit in ev. The eigenvectors corresponding to the eigenvalues
will be stored in q. All memory of the arguments must be allocated outside the call to the solver.
......@@ -357,8 +357,8 @@ SYNOPSIS
integer, intent(in) ldq: leading dimension of matrix q which stores the eigenvectors
integer, intent(in) nblk: blocksize of block cyclic distributin, must be the same in both directions
integer, intent(in) matrixCols: number of columns of locally distributed matrices a and q
integer, intent(in) mpi_comm_rows: communicator for communication in rows. Constructed with get_elpa_communicators(3)
integer, intent(in) mpi_comm_cols: communicator for communication in colums. Constructed with get_elpa_communicators(3)
integer, intent(in) mpi_comm_rows: communicator for communication in rows. Constructed with elpa_get_communicators(3)
integer, intent(in) mpi_comm_cols: communicator for communication in colums. Constructed with elpa_get_communicators(3)
integer, intent(in) mpi_comm_all: communicator for all processes in the processor set involved in ELPA
logical, intent(in), optional: useQR: optional argument; switches to QR-decomposition if set to .true.
logical, intent(in), optional: useGPU: decide whether GPUs should be used ore not
......@@ -382,8 +382,8 @@ SYNOPSIS
int ldq: leading dimension of matrix q which stores the eigenvectors
int nblk: blocksize of block cyclic distributin, must be the same in both directions
int matrixCols: number of columns of locally distributed matrices a and q
int mpi_comm_rows: communicator for communication in rows. Constructed with get_elpa_communicators(3)
int mpi_comm_cols: communicator for communication in colums. Constructed with get_elpa_communicators(3)
int mpi_comm_rows: communicator for communication in rows. Constructed with elpa_get_communicators(3)
int mpi_comm_cols: communicator for communication in colums. Constructed with elpa_get_communicators(3)
int mpi_comm_all: communicator for all processes in the processor set involved in ELPA
int useQR: if set to 1 switch to QR-decomposition
int useGPU: decide whether the GPU version should be used or not
......@@ -393,7 +393,7 @@ SYNOPSIS
DESCRIPTION
Solve the real eigenvalue problem with the 2-stage solver. The ELPA communicators mpi_comm_rows and mpi_comm_cols are obtained with the
get_elpa_communicators(3) function. The distributed quadratic marix a has global dimensions na x na, and a local size lda x matrixCols.
elpa_get_communicators(3) function. The distributed quadratic marix a has global dimensions na x na, and a local size lda x matrixCols.
The solver will compute the first nev eigenvalues, which will be stored on exit in ev. The eigenvectors corresponding to the eigenvalues
will be stored in q. All memory of the arguments must be allocated outside the call to the solver.
......
......@@ -27,12 +27,13 @@ AM_SILENT_RULES([yes])
# by the current interface, as they are ABI compatible (e.g. only new symbols
# were added by the new interface)
#
AC_SUBST([ELPA_SO_VERSION], [10:0:2])
AC_SUBST([ELPA_SO_VERSION], [11:0:0])
# API Version
AC_DEFINE([EARLIEST_API_VERSION], [20170403], [Earliest supported ELPA API version])
AC_DEFINE([CURRENT_API_VERSION], [20170403], [Current ELPA API version])
AC_DEFINE([CURRENT_API_VERSION], [20171201], [Current ELPA API version])
# Autotune Version
AC_DEFINE([EARLIEST_AUTOTUNE_VERSION], [20171201], [Earliest ELPA API version, which supports autotuning])
AX_CHECK_GNU_MAKE()
if test x$_cv_gnu_make_command = x ; then
......@@ -142,7 +143,7 @@ fi
dnl check which MPI binray invokes a MPI job
if test x"$with_mpi" = x"yes"; then
AC_CHECK_PROGS([MPI_BINARY], [srun mpiexec.hydra mpiexec mpirun poe runjob], [no])
AC_CHECK_PROGS([MPI_BINARY], [mpiexec.hydra mpiexec mpirun poe runjob srun], [no])
if test x"$MPI_BINARY" = x"no"; then
AC_MSG_ERROR([Could not find either of the MPI binaries: mpiexec.hydra, mpiexec, mpirun, poe, runjob, srun])
fi
......@@ -188,6 +189,7 @@ fi
AM_CONDITIONAL([HAVE_DETAILED_TIMINGS], [test x"$enable_timings" = x"yes"])
dnl PAPI for ftimings
AC_LANG_PUSH([C])
AC_ARG_WITH([papi],
[AS_HELP_STRING([--with-papi],
[Use PAPI to also measure flop count in the detailed timing (--enable-timing), disabled by default])],
......@@ -202,6 +204,7 @@ if test x"${enable_timings}" = x"yes"; then
AC_DEFINE([HAVE_LIBPAPI], [1], [Use the PAPI library])
fi
fi
AC_LANG_POP([C])
save_FCFLAGS=$FCFLAGS
save_LDFLAGS=$LDFLAGS
......@@ -466,6 +469,18 @@ m4_define(elpa_m4_sse_kernels, [
complex_sse_block2
])
m4_define(elpa_m4_sparc64_kernels, [
real_sparc64_block2
real_sparc64_block4
real_sparc64_block6
])
m4_define(elpa_m4_vsx_kernels, [
real_vsx_block2
real_vsx_block4
real_vsx_block6
])
m4_define(elpa_m4_avx_kernels, [
real_avx_block2
real_avx_block4
......@@ -505,7 +520,7 @@ m4_define(elpa_m4_gpu_kernels, [
complex_gpu
])
m4_define(elpa_m4_kernel_types, [generic sse sse_assembly avx avx2 avx512 bgp bgq gpu])
m4_define(elpa_m4_kernel_types, [generic sparc64 vsx sse sse_assembly avx avx2 avx512 bgp bgq gpu])
m4_define(elpa_m4_all_kernels,
m4_foreach_w([elpa_m4_type],
......@@ -538,6 +553,8 @@ AC_DEFUN([ELPA_SELECT_KERNELS], [
dnl Modify list of kernels with configure arguments
ELPA_SELECT_KERNELS([generic],[enable])
ELPA_SELECT_KERNELS([sparc64],[disable])
ELPA_SELECT_KERNELS([vsx],[disable])
ELPA_SELECT_KERNELS([sse],[enable])
ELPA_SELECT_KERNELS([sse_assembly],[enable])
ELPA_SELECT_KERNELS([avx],[enable])
......@@ -552,7 +569,7 @@ m4_foreach_w([elpa_m4_kind],[real complex],[
])
if test x"${enable_bgp}" = x"yes" -o x"$enable_bgq" = x"yes"; then
m4_foreach_w([elpa_m4_kernel], elpa_m4_sse_kernels elpa_m4_avx_kernels elpa_m4_avx2_kernels elpa_m4_avx512_kernels, [
m4_foreach_w([elpa_m4_kernel], elpa_m4_sparc64_kernels elpa_m4_vsx_kernels elpa_m4_sse_kernels elpa_m4_avx_kernels elpa_m4_avx2_kernels elpa_m4_avx512_kernels, [
if x"$use_[]elpa_m4_kernel[]" = x"yes" ; then
echo "Disabling elpa_m4_kernel due to BGP/BGQ option"
fi
......@@ -612,7 +629,7 @@ AC_DEFUN([ELPA_KERNEL_DEPENDS],[
])
fi
])
m4_foreach_w([elpa_m4_arch],[sse avx avx2 avx512],[
m4_foreach_w([elpa_m4_arch],[sparc64 vsx sse avx avx2 avx512],[
ELPA_KERNEL_DEPENDS([real_]elpa_m4_arch[_block6], [real_]elpa_m4_arch[_block4 real_]elpa_m4_arch[_block2])
ELPA_KERNEL_DEPENDS([real_]elpa_m4_arch[_block4], [real_]elpa_m4_arch[_block2])
ELPA_KERNEL_DEPENDS([complex_]elpa_m4_arch[_block2], [complex_]elpa_m4_arch[_block1])
......@@ -646,7 +663,7 @@ dnl choosing a default kernel
m4_foreach_w([elpa_m4_kind],[real complex],[
m4_foreach_w([elpa_m4_kernel],
m4_foreach_w([elpa_m4_cand_kernel],
elpa_m4_avx512_kernels elpa_m4_avx2_kernels elpa_m4_avx_kernels elpa_m4_sse_kernels elpa_m4_sse_assembly_kernels elpa_m4_generic_kernels,
elpa_m4_avx512_kernels elpa_m4_avx2_kernels elpa_m4_avx_kernels elpa_m4_sse_kernels elpa_m4_sse_assembly_kernels elpa_m4_sparc64_kernels elpa_m4_vsx_kernels elpa_m4_generic_kernels,
[m4_bmatch(elpa_m4_cand_kernel,elpa_m4_kind,elpa_m4_cand_kernel)] ),
[
if test -z "$default_[]elpa_m4_kind[]_kernel"; then
......@@ -664,8 +681,56 @@ m4_foreach_w([elpa_m4_kind],[real complex],[
AC_SUBST([ELPA_2STAGE_]m4_toupper(elpa_m4_kind)[_DEFAULT])
])
dnl #include <fjmfunc.h>
dnl #include <emmintrin.h>
dnl int main(int argc, char **argv) {
dnl __m128d q;
dnl __m128d h1 = _fjsp_neg_v2r8(q);
dnl return 0;
dnl }
AC_LANG_PUSH([C])
if test x"${need_vsx}" = x"yes"; then
AC_MSG_CHECKING(whether we can compile Altivec VSX with intrinsics in C)
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <altivec.h>
int main(int argc, char **argv) {
__vector double a, b, c;
c = vec_add(a,b);
return 0;
}
])],
[can_compile_vsx=yes],
[can_compile_vsx=no]
)
AC_MSG_RESULT([${can_compile_vsx}])
if test x"$can_compile_vsx" != x"yes"; then
AC_MSG_ERROR([Could not compile test program, try with --disable-vsx, or adjust the C compiler or CFLAGS])
fi
AC_DEFINE([HAVE_VSX_SSE],[1],[Altivec VSX intrinsics are supported on this CPU])
fi
if test x"${need_sparc64}" = x"yes"; then
AC_MSG_CHECKING(whether we can compile SPARC64 with intrinsics in C)
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv) {
double* q;
__m128d h1 = _mm_loaddup_pd(q);
return 0;
}
])],
[can_compile_sparc64=yes],
[can_compile_sparc64=no]
)
AC_MSG_RESULT([${can_compile_sparc64}])
if test x"$can_compile_sparc64" != x"yes"; then
AC_MSG_ERROR([Could not compile test program, try with --disable-sparc64, or adjust the C compiler or CFLAGS])
fi
AC_DEFINE([HAVE_SPARC64_SSE],[1],[SPARC64 intrinsics are supported on this CPU])
fi
if test x"${need_sse}" = x"yes"; then
AC_MSG_CHECKING(whether we can compile SSE3 with gcc intrinsics in C)
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
......@@ -909,6 +974,49 @@ if test x"${USE_ASSUMED_SIZE}" = x"yes" ; then
AC_DEFINE([USE_ASSUMED_SIZE],[1],[for performance reasons use assumed size Fortran arrays, even if not debuggable])
fi
enable_fortran2008_features=yes
AC_MSG_CHECKING(whether Fortran2008 features should be enabled)
AC_ARG_ENABLE([Fortran2008-features],
AS_HELP_STRING([--enable-Fortran2008-features],
[enables some Fortran 2008 features, default yes.]),
[
if test x"$enableval" = x"yes"; then