Commit 3e45b5fb authored by Andreas Marek's avatar Andreas Marek

Merge branch 'master_pre_stage'

parents 41342036 cf64301f
This diff is collapsed.
......@@ -2,7 +2,8 @@ Changelog for upcoming release
- not yet decided
Changelog for ELPA 2019.05.001.rc1
Changelog for ELPA 2019.05.001
- elpa_print_kernels supports GPU usage
- fix an error if PAPI measurements are activated
- new simple real kernels: block4 and block6
......@@ -21,6 +22,9 @@ been introduced a year ago. Removed routines:
-> cholesky_real
-> cholesky_complex
-> solve_tridi
- new kernels for ARM arch64 added
- fix an out-of-bound-error in elpa2
Changelog for ELPA 2018.11.001
......
......@@ -893,62 +893,61 @@ EXCLUDE = @top_srcdir@/src/GPU/check_for_gpu.F90 \
@top_srcdir@/src/elpa2/elpa2_trans_ev_band_to_full_template.F90 \
@top_srcdir@/src/elpa2/elpa2_trans_ev_tridi_to_band_template.F90 \
@top_srcdir@/src/elpa2/kernels/simple_template.F90 \
@top_srcdir@/src/elpa2/kernels/complex_avx-avx2_1hv_template.c \
@top_srcdir@/src/elpa2/kernels/complex_avx-avx2_2hv_template.c \
@top_srcdir@/src/elpa2/kernels/real_template.F90 \
@top_srcdir@/src/elpa2/kernels/complex_sse_1hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_sse_1hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sse_6hv_template.c \
@top_srcdir@/src/elpa2/kernels/real_128bit_256bit_512bit_BLOCK_template.c \
@top_srcdir@/src/elpa2/kernels/complex_template.F90 \
@top_srcdir@/src/elpa2/kernels/complex_avx-avx2_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sse_4hv_template.c \
@top_srcdir@/src/elpa2/kernels/complex_avx-avx2_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_avx512_1hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sse_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sparc64_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_neon_arch64_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_4hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/asm_x86_64_double_precision.s \
@top_srcdir@/src/elpa2/kernels/real_avx512_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sse_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sse_2hv_template.c \
@top_srcdir@/src/elpa2/kernels/real_sparc64_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_neon_arch64_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_avx512_1hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/asm_x86_64_single_precision.s \
@top_srcdir@/src/elpa2/kernels/real_avx512_6hv_template.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_simple.F90 \
@top_srcdir@/src/elpa2/kernels/complex_avx-avx2_1hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real.F90 \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_4hv_template.c \
@top_srcdir@/src/elpa2/kernels/complex_avx-avx2_1hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_avx512_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/mod_single_hh_trafo_real.F90 \
@top_srcdir@/src/elpa2/kernels/complex_sse_1hv_template.c \
@top_srcdir@/src/elpa2/kernels/complex_128bit_256bit_512bit_BLOCK_template.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_2hv_template.c \
@top_srcdir@/src/elpa2/kernels/complex_avx512_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_sse_2hv_template.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_4hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_bgq.f90 \
@top_srcdir@/src/elpa2/kernels/real_sse_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_avx512_2hv_template.c \
@top_srcdir@/src/elpa2/kernels/real_sparc64_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_neon_arch64_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_simple.F90 \
@top_srcdir@/src/elpa2/kernels/complex.F90 \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_2hv_template.c \
@top_srcdir@/src/elpa2/kernels/complex_avx512_1hv_template.c \
@top_srcdir@/src/elpa2/kernels/real_sse_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sparc64_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_neon_arch64_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_sse_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_4hv_template.c \
@top_srcdir@/src/elpa2/kernels/real_bgp.f90 \
@top_srcdir@/src/elpa2/kernels/real_sse_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sparc64_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_neon_arch64_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_sse_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_6hv_template.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sse_4hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sparc64_4hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_neon_arch64_4hv_single_precision.c \
@top_srcdir@/src/elpa2/elpa2_compute_complex_template.F90 \
@top_srcdir@/src/elpa2/elpa2_bandred_template.F90 \
@top_srcdir@/src/elpa2/pack_unpack_gpu.F90 \
......
......@@ -2,7 +2,7 @@
## Preamble ##
This file provides documentation on how to build the *ELPA* library in **version ELPA-2019.05.001.rc1**.
This file provides documentation on how to build the *ELPA* library in **version ELPA-2019.05.001**.
With release of **version ELPA-2017.05.001** the build process has been significantly simplified,
which makes it easier to install the *ELPA* library.
......@@ -16,7 +16,7 @@ With release ELPA 2019.05.001 the legacy API is disabled by default, however,
can be still switched on at build time.
Most likely with the release ELPA 2019.11.001 the legacy API will be deprecated and not supported anymore.
The release of ELPA 2019.05.001.rc1 changes the ABI and API, since it allows to also build the C-functions with optional error arguments
The release of ELPA 2019.05.001 changes the ABI and API, since it allows to also build the C-functions with optional error arguments
## How to install *ELPA* ##
......
......@@ -3,7 +3,7 @@
For more details and recent updates please visit the online [issue system] (https://gitlab.mpcdf.mpg.de/elpa/elpa/issues)
Issues which are not mentioned in a newer release are (considered as) solved.
### ELPA 2019.11.001.rc1 release ###
### ELPA 2019.11.001 release ###
- same issues as in ELPA 2017.11.001
### ELPA 2018.11.001 release ###
......
......@@ -110,6 +110,7 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa2/kernels/complex_template.F90 \
src/elpa2/kernels/simple_template.F90 \
src/elpa2/kernels/simple_block4_template.F90 \
src/elpa2/kernels/simple_block6_template.F90 \
src/elpa2/pack_unpack_cpu.F90 \
src/elpa2/pack_unpack_gpu.F90 \
src/elpa2/compute_hh_trafo.F90 \
......@@ -194,9 +195,10 @@ if WITH_REAL_GENERIC_SIMPLE_BLOCK4_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_simple_block4.F90
endif
#if WITH_REAL_GENERIC_SIMPLE_BLOCK6_KERNEL
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_simple_block6.F90
#endif
if WITH_REAL_GENERIC_SIMPLE_BLOCK6_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_simple_block6.F90
endif
if WITH_REAL_BGP_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_bgp.f90
endif
......@@ -227,6 +229,13 @@ if WITH_REAL_SPARC64_BLOCK2_KERNEL
#endif
endif
if WITH_REAL_NEON_ARCH64_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_neon_arch64_2hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_neon_arch64_2hv_single_precision.c
endif
endif
if WITH_REAL_VSX_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_vsx_2hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
......@@ -269,6 +278,13 @@ if WITH_REAL_SPARC64_BLOCK4_KERNEL
#endif
endif
if WITH_REAL_NEON_ARCH64_BLOCK4_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_neon_arch64_4hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_neon_arch64_4hv_single_precision.c
endif
endif
if WITH_REAL_VSX_BLOCK4_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_vsx_4hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
......@@ -311,6 +327,13 @@ if WITH_REAL_SPARC64_BLOCK6_KERNEL
#endif
endif
if WITH_REAL_NEON_ARCH64_BLOCK6_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_neon_arch64_6hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_neon_arch64_6hv_single_precision.c
endif
endif
if WITH_REAL_VSX_BLOCK6_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_vsx_6hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
......@@ -768,28 +791,15 @@ EXTRA_DIST = \
src/elpa2/elpa2_trans_ev_band_to_full_template.F90 \
src/elpa2/elpa2_trans_ev_tridi_to_band_template.F90 \
src/elpa2/elpa2_tridiag_band_template.F90 \
src/elpa2/kernels/complex_avx-avx2_1hv_template.c \
src/elpa2/kernels/complex_avx-avx2_2hv_template.c \
src/elpa2/kernels/complex_avx512_1hv_template.c \
src/elpa2/kernels/complex_avx512_2hv_template.c \
src/elpa2/kernels/complex_sse_1hv_template.c \
src/elpa2/kernels/complex_sse_2hv_template.c \
src/elpa2/kernels/complex_128bit_256bit_512bit_BLOCK_template.c \
src/elpa2/kernels/complex_template.F90 \
src/elpa2/kernels/real_avx-avx2_2hv_template.c \
src/elpa2/kernels/real_avx-avx2_4hv_template.c \
src/elpa2/kernels/real_avx-avx2_6hv_template.c \
src/elpa2/kernels/real_avx512_2hv_template.c \
src/elpa2/kernels/real_avx512_4hv_template.c \
src/elpa2/kernels/real_avx512_6hv_template.c \
src/elpa2/kernels/real_vsx_2hv_template.c \
src/elpa2/kernels/real_vsx_4hv_template.c \
src/elpa2/kernels/real_vsx_6hv_template.c \
src/elpa2/kernels/real_sse_2hv_template.c \
src/elpa2/kernels/real_sse_4hv_template.c \
src/elpa2/kernels/real_sse_6hv_template.c \
src/elpa2/kernels/real_128bit_256bit_512bit_BLOCK_template.c \
src/elpa2/kernels/real_template.F90 \
src/elpa2/kernels/simple_template.F90 \
src/elpa2/kernels/simple_block4_template.F90 \
src/elpa2/kernels/simple_block6_template.F90 \
src/elpa2/pack_unpack_cpu.F90 \
src/elpa2/pack_unpack_gpu.F90 \
src/elpa2/qr/elpa_pdgeqrf_template.F90 \
......
......@@ -2,7 +2,7 @@
## Current Release ##
The current release is ELPA 2019.05.001.rc1 The current supported API version
The current release is ELPA 2019.05.001 The current supported API version
is 20190501. This release supports the earliest API version 20170403.
The old, obsolete legacy API will be deprecated in the future !
......@@ -76,6 +76,8 @@ No other conditions have to be met.
Nonetheless, we are grateful if you cite the following publications:
If you use ELPA in general:
T. Auckenthaler, V. Blum, H.-J. Bungartz, T. Huckle, R. Johanni,
L. Kr\"amer, B. Lang, H. Lederer, and P. R. Willems,
"Parallel solution of partial symmetric eigenvalue problems from
......@@ -90,12 +92,20 @@ Nonetheless, we are grateful if you cite the following publications:
Journal of Physics Condensed Matter, 26 (2014)
doi:10.1088/0953-8984/26/21/213201
If you use the GPU version of ELPA:
Kus, P; Marek, A.; Lederer, H.
"GPU Optimization of Large-Scale Eigenvalue Solver",
In: Radu F., Kumar K., Berre I., Nordbotten J., Pop I. (eds)
Numerical Mathematics and Advanced Applications ENUMATH 2017. ENUMATH 2017.
Lecture Notes in Computational Science and Engineering, vol 126. Springer, Cham
If you use the new API and/or autotuning:
Kus; P.; Marek, A.; Koecher, S. S.; Kowalski H.-H.; Carbogno, Ch.; Scheurer, Ch.; Reuter, K.; Scheffler, M.; Lederer, H.
"Optimizations of the Eigenvaluesolvers in the ELPA Library",
Parllel Computing 85, 167-177 (2019)
## Installation of the *ELPA* library
......@@ -115,7 +125,7 @@ the possible configure options.
## Using *ELPA*
Please have a look at the "**USERS_GUIDE**" file, to get a documentation or at the [online]
(http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2019.05.001.rc1/html/index.html) doxygen
(http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2019.05.001/html/index.html) doxygen
documentation, where you find the definition of the interfaces.
## Contributing to *ELPA*
......
This file contains the release notes for the ELPA 2019.05.001.rc1 version
This file contains the release notes for the ELPA 2019.05.001 version
What is new?
-------------
......@@ -9,6 +9,7 @@ For detailed information about changes since release ELPA 2018.11 please have a
- C functions can have an optional error argument, if compiler supports this
=> ABI and API change
- as anounced, removal of deprecated routines
- new kernels for Arm arch64
ABI change
---------------------
......
......@@ -146,7 +146,7 @@ Local documentation (via man pages) should be available (if *ELPA* has been inst
For example "man elpa2_print_kernels" should provide the documentation for the *ELPA* program which prints all
the available kernels.
Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2019.05.001.rc1/html/index.html)
Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2019.05.001/html/index.html)
for each *ELPA* release is available.
......@@ -13,7 +13,7 @@ Local documentation (via man pages) should be available (if *ELPA* has been inst
For example "man elpa2_print_kernels" should provide the documentation for the *ELPA* program, which prints all
the available kernels.
Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2019.05.001.rc1/html/index.html)
Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2019.05.001/html/index.html)
for each *ELPA* release is available.
......@@ -200,7 +200,7 @@ The following table gives a list of all supported parameters which can be used t
## III) List of computational routines ##
The following compute routines are available in *ELPA*: Please have a look at the man pages or [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2019.05.001.rc1/html/index.html) for details.
The following compute routines are available in *ELPA*: Please have a look at the man pages or [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2019.05.001/html/index.html) for details.
| Name | Purpose | since API version |
......
......@@ -22,7 +22,7 @@ The *ELPA* library consists of two main parts:
Both variants of the *ELPA* solvers are available for real or complex singe and double precision valued matrices.
Thus *ELPA* provides the following user functions (see man pages or [online] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2019.05.001.rc1/html/index.html) for details):
Thus *ELPA* provides the following user functions (see man pages or [online] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2019.05.001/html/index.html) for details):
- elpa_get_communicators : set the row / column communicators for *ELPA*
- elpa_solve_evp_complex_1stage_{single|double} : solve a {single|double} precision complex eigenvalue proplem with the *ELPA 1stage* solver
......
This diff is collapsed.
......@@ -125,7 +125,7 @@ then
CLUSTER="draco"
fi
echo "Running on $CLUSTER with runner $CI_RUNNER_DESCRIPTION with tag $CI_RUNNER_TAGS"
echo "Running on $CLUSTER with runner $CI_RUNNER_DESCRIPTION with tag $CI_RUNNER_TAGS on $mpiTasks tasks"
# GPU runners
if [ "$CI_RUNNER_TAGS" == "gpu" ]
......@@ -144,16 +144,17 @@ then
cat ./run_${CLUSTER}_1node_2GPU.sh
echo " "
echo "Submitting to SLURM"
sbatch -W ./run_${CLUSTER}_1node_2GPU.sh
exitCode=$?
if sbatch -W ./run_${CLUSTER}_1node_2GPU.sh; then
exitCode=$?
else
exitCode=$?
echo "Submission exited with exitCode $exitCode"
fi
echo " "
echo "Exit Code of sbatch: $exitCode"
echo " "
if (( $exitCode > 0 ))
then
#if (( $exitCode > 0 ))
#then
cat ./ELPA_CI_2gpu.err.*
fi
#fi
fi
......@@ -174,24 +175,31 @@ then
cat ./run_${CLUSTER}_1node.sh
echo " "
echo "Submitting to SLURM"
sbatch -W ./run_${CLUSTER}_1node.sh
exitCode=$?
if sbatch -W ./run_${CLUSTER}_1node.sh; then
exitCode=$?
else
exitCode=$?
echo "Submission excited with exitCode $exitCode"
fi
echo " "
echo "Exit Code of sbatch: $exitCode"
echo " "
cat ./ELPA_CI.out.*
if [ $exitCode -ne 0 ]
then
#if [ $exitCode -ne 0 ]
#then
cat ./ELPA_CI.err.*
fi
#fi
fi
if [ $exitCode -ne 0 ]
#if [ $exitCode -ne 0 ]
#then
if [ -f ./test-suite.log ]
then
cat ./test-suite.log
fi
#fi
exit $exitCode
......
......@@ -102,7 +102,7 @@ then
CLUSTER="draco"
fi
echo "Running on $CLUSTER with runner $CI_RUNNER_DESCRIPTION with tag $CI_RUNNER_TAGS"
echo "Running on $CLUSTER with runner $CI_RUNNER_DESCRIPTION with tag $CI_RUNNER_TAGS on $mpiTasks tasks"
#distcheck
if [[ "$CI_RUNNER_TAGS" =~ "distcheck" ]]
......@@ -126,17 +126,21 @@ then
echo "Exit Code of sbatch: $exitCode"
echo " "
cat ./ELPA_CI.out.*
if [ $exitCode -ne 0 ]
then
#if [ $exitCode -ne 0 ]
#then
cat ./ELPA_CI.err.*
#fi
if [ -f ./test-suite.log ]
then
cat ./test-suite.log
fi
fi
if [ $exitCode -ne 0 ]
then
#if [ $exitCode -ne 0 ]
#then
cat ./test-suite.log
fi
#fi
exit $exitCode
......
......@@ -120,7 +120,7 @@ then
CLUSTER="draco"
fi
echo "Running on $CLUSTER with runner $CI_RUNNER_DESCRIPTION with tag $CI_RUNNER_TAGS"
echo "Running on $CLUSTER with runner $CI_RUNNER_DESCRIPTION with tag $CI_RUNNER_TAGS on $mpiTasks tasks"
#project_test
if [[ "$CI_RUNNER_TAGS" =~ "project_test" ]]
......@@ -129,18 +129,18 @@ then
echo "mkdir -p build" >> ./run_${CLUSTER}_1node.sh
echo "pushd build" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "Running autogen " >> ./run_${CLUSTER}_1node.sh
echo "#Running autogen " >> ./run_${CLUSTER}_1node.sh
echo "../autogen.sh" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "Running configure " >> ./run_${CLUSTER}_1node.sh
echo "#Running configure " >> ./run_${CLUSTER}_1node.sh
echo "../configure " "$configureArgs" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "Running make " >> ./run_${CLUSTER}_1node.sh
echo "#Running make " >> ./run_${CLUSTER}_1node.sh
echo "make -j 8" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "Running make install" >> ./run_${CLUSTER}_1node.sh
echo "#Running make install" >> ./run_${CLUSTER}_1node.sh
echo "make install" >> ./run_${CLUSTER}_1node.sh
echo "popd" >> ./run_${CLUSTER}_1node.sh
echo "mkdir -p $projectName/build" >> ./run_${CLUSTER}_1node.sh
......@@ -149,19 +149,19 @@ then
echo " " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " Testting project " >> ./run_${CLUSTER}_1node.sh
echo " #Testting project " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "Running autogen " >> ./run_${CLUSTER}_1node.sh
echo "#Running autogen " >> ./run_${CLUSTER}_1node.sh
echo "../autogen.sh" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "Running configure " >> ./run_${CLUSTER}_1node.sh
echo "#Running configure " >> ./run_${CLUSTER}_1node.sh
echo "../configure " "$projectConfigureArgs " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "Running make " >> ./run_${CLUSTER}_1node.sh
echo "#Running make " >> ./run_${CLUSTER}_1node.sh
echo "make -j 8" >> ./run_${CLUSTER}_1node.sh
echo "export LD_LIBRARY_PATH=$MKL_HOME/lib/intel64:\$LD_LIBRARY_PATH" >> ./run_${CLUSTER}_1node.sh
echo "./$projectExecutable" >> ./run_${CLUSTER}_1node.sh
......@@ -184,18 +184,17 @@ then
echo "Exit Code of sbatch: $exitCode"
echo " "
cat ./ELPA_CI.out.*
if [ $exitCode -ne 0 ]
then
#if [ $exitCode -ne 0 ]
#then
cat ./ELPA_CI.err.*
#fi
if [ -f ./test-suite.log ]
then
cat ./test-suite.log
fi
fi
if [ $exitCode -ne 0 ]
then
cat ./test-suite.log
fi
exit $exitCode
fi
......@@ -50,13 +50,15 @@ if test x$_cv_gnu_make_command = x ; then
AC_MSG_ERROR([Need GNU Make])
fi
enable_legacy=no
AC_MSG_CHECKING(whether legacy interface should be provided)
AC_ARG_ENABLE([legacy-interface],
AS_HELP_STRING([--enable-legacy-interface],
[build legacy API, default no]),
[
if test x"$enableval" = x"yes"; then
enable_legayc=yes
enable_legacy=yes
else
enable_legacy=no
fi
......@@ -227,9 +229,9 @@ fi
dnl check which MPI binray invokes a MPI job
if test x"$with_mpi" = x"yes"; then
AC_CHECK_PROGS([MPI_BINARY], [mpiexec.hydra mpiexec mpirun poe runjob srun], [no])
AC_CHECK_PROGS([MPI_BINARY], [mpiexec.hydra mpiexec mpirun poe runjob srun aprun], [no])
if test x"$MPI_BINARY" = x"no"; then
AC_MSG_ERROR([Could not find either of the MPI binaries: mpiexec.hydra, mpiexec, mpirun, poe, runjob, srun])
AC_MSG_ERROR([Could not find either of the MPI binaries: mpiexec.hydra, mpiexec, mpirun, poe, runjob, srun, aprun])
fi
fi
......@@ -613,6 +615,7 @@ m4_define(elpa_m4_generic_kernels, [
real_generic
real_generic_simple
real_generic_simple_block4
real_generic_simple_block6
complex_generic
complex_generic_simple
])
......@@ -636,6 +639,12 @@ m4_define(elpa_m4_sparc64_kernels, [
real_sparc64_block6
])
m4_define(elpa_m4_neon_arch64_kernels, [
real_neon_arch64_block2
real_neon_arch64_block4
real_neon_arch64_block6
])
m4_define(elpa_m4_vsx_kernels, [
real_vsx_block2
real_vsx_block4
......@@ -681,7 +690,7 @@ m4_define(elpa_m4_gpu_kernels, [
complex_gpu
])
m4_define(elpa_m4_kernel_types, [generic sparc64 vsx sse sse_assembly avx avx2 avx512 bgp bgq gpu])
m4_define(elpa_m4_kernel_types, [generic sparc64 neon_arch64 vsx sse sse_assembly avx avx2 avx512 bgp bgq gpu])
m4_define(elpa_m4_all_kernels,
m4_foreach_w([elpa_m4_type],
......@@ -715,6 +724,7 @@ AC_DEFUN([ELPA_SELECT_KERNELS], [
dnl Modify list of kernels with configure arguments
ELPA_SELECT_KERNELS([generic],[enable])
ELPA_SELECT_KERNELS([sparc64],[disable])
ELPA_SELECT_KERNELS([neon_arch64],[disable])
ELPA_SELECT_KERNELS([vsx],[disable])
ELPA_SELECT_KERNELS([sse],[enable])
ELPA_SELECT_KERNELS([sse_assembly],[enable])
......@@ -730,7 +740,7 @@ m4_foreach_w([elpa_m4_kind],[real complex],[
])
if test x"${enable_bgp}" = x"yes" -o x"$enable_bgq" = x"yes"; then
m4_foreach_w([elpa_m4_kernel], elpa_m4_sparc64_kernels elpa_m4_vsx_kernels elpa_m4_sse_kernels elpa_m4_avx_kernels elpa_m4_avx2_kernels elpa_m4_avx512_kernels, [
m4_foreach_w([elpa_m4_kernel], elpa_m4_sparc64_kernels elpa_m4_neon_arch64_kernels elpa_m4_vsx_kernels elpa_m4_sse_kernels elpa_m4_avx_kernels elpa_m4_avx2_kernels elpa_m4_avx512_kernels, [
if x"$use_[]elpa_m4_kernel[]" = x"yes" ; then
echo "Disabling elpa_m4_kernel due to BGP/BGQ option"
fi
......@@ -790,7 +800,7 @@ AC_DEFUN([ELPA_KERNEL_DEPENDS],[
])
fi
])
m4_foreach_w([elpa_m4_arch],[sparc64 vsx sse avx avx2 avx512],[
m4_foreach_w([elpa_m4_arch],[sparc64 neon_arch64 vsx sse avx avx2 avx512],[
ELPA_KERNEL_DEPENDS([real_]elpa_m4_arch[_block6], [real_]elpa_m4_arch[_block4 real_]elpa_m4_arch[_block2])
ELPA_KERNEL_DEPENDS([real_]elpa_m4_arch[_block4], [real_]elpa_m4_arch[_block2])
ELPA_KERNEL_DEPENDS([complex_]elpa_m4_arch[_block2], [complex_]elpa_m4_arch[_block1])
......@@ -848,7 +858,7 @@ m4_foreach_w([elpa_m4_kind],[real complex],[
m4_foreach_w([elpa_m4_kind],[real complex],[
m4_foreach_w([elpa_m4_kernel],
m4_foreach_w([elpa_m4_cand_kernel],
elpa_m4_avx512_kernels elpa_m4_avx2_kernels elpa_m4_avx_kernels elpa_m4_sse_kernels elpa_m4_sse_assembly_kernels elpa_m4_sparc64_kernels elpa_m4_vsx_kernels elpa_m4_generic_kernels,
elpa_m4_avx512_kernels elpa_m4_avx2_kernels elpa_m4_avx_kernels elpa_m4_sse_kernels elpa_m4_sse_assembly_kernels elpa_m4_sparc64_kernels elpa_m4_neon_arch64_kernels elpa_m4_vsx_kernels elpa_m4_generic_kernels,
[m4_bmatch(elpa_m4_cand_kernel,elpa_m4_kind,elpa_m4_cand_kernel)] ),
[
if test -z "$default_[]elpa_m4_kind[]_kernel"; then
......@@ -895,7 +905,6 @@ int main(int argc, char **argv) {
AC_DEFINE([HAVE_VSX_SSE],[1],[Altivec VSX intrinsics are supported on this CPU])
fi
if test x"${need_sparc64}" = x"yes"; then
AC_MSG_CHECKING(whether we can compile SPARC64 with intrinsics in C)
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
......@@ -917,6 +926,27 @@ int main(int argc, char **argv) {
AC_DEFINE([HAVE_SPARC64_SSE],[1],[SPARC64 intrinsics are supported on this CPU])
fi
if test x"${need_neon_arch64}" = x"yes"; then
AC_MSG_CHECKING(whether we can compile NEON ARCH64 with intrinsics in C)
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <arm_neon.h>
int main(int argc, char **argv) {
__Float64x2_t x1, x2, x3, x4;
x4 = vfmaq_f64(x1, x2, x3);
return 0;
}
])],
[can_compile_neon_arch64=yes],
[can_compile_neon_arch64=no]
)
AC_MSG_RESULT([${can_compile_neon_arch64}])
if test x"$can_compile_neon_arch64" != x"yes"; then
AC_MSG_ERROR([Could not compile test program, try with --disable-neon_arch64, or adjust the C compiler or CFLAGS])
fi
AC_DEFINE([HAVE_NEON_ARCH64_SSE],[1],[NEON_ARCH64 intrinsics are supported on this CPU])
fi
if test x"${need_sse}" = x"yes"; then
AC_MSG_CHECKING(whether we can compile SSE3 with gcc intrinsics in C)
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
......@@ -1502,12 +1532,12 @@ m4_foreach_w([elpa_m4_kind],[real complex],[
#echo "***********************************************************************"
#echo " "
#echo " "
echo "***********************************************************************"
echo "* This is a the first release candidate of ELPA 2019.05.001.rc1 *"
echo "* There might be still some changes until the final release of *"
echo "* ELPA 2019.05.001 *"
echo "***********************************************************************"
echo " "
#echo "***********************************************************************"
#echo "* This is a the first release candidate of ELPA 2019.05.001.rc2 *"
#echo "* There might be still some changes until the final release of *"
#echo "* ELPA 2019.05.001 *"
#echo "***********************************************************************"
#echo " "
if test x"$enable_kcomputer" = x"yes" ; then
echo " "
......
......@@ -19,7 +19,7 @@
%define with_openmp 0
Name: elpa
Version: 2019.05.001.rc1
Version: 2019.05.001
Release: 1
Summary: A massively parallel eigenvector solver
License: LGPL-3.0
......
......@@ -44,10 +44,14 @@ enum ELPA_SOLVERS {
X(ELPA_2STAGE_REAL_SPARC64_BLOCK2, 19, @ELPA_2STAGE_REAL_SPARC64_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SPARC64_BLOCK4, 20, @ELPA_2STAGE_REAL_SPARC64_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SPARC64_BLOCK6, 21, @ELPA_2STAGE_REAL_SPARC64_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK2, 22, @ELPA_2STAGE_REAL_VSX_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK4, 23, @ELPA_2STAGE_REAL_VSX_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK6, 24, @ELPA_2STAGE_REAL_VSX_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4, 25, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4_COMPILED@, __VA_ARGS__)
X(ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK2, 22, @ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK4, 23, @ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK6, 24, @ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK2, 25, @ELPA_2STAGE_REAL_VSX_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK4, 26, @ELPA_2STAGE_REAL_VSX_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK6, 27, @ELPA_2STAGE_REAL_VSX_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4, 28, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6, 29, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6_COMPILED@, __VA_ARGS__)
#define ELPA_FOR_ALL_2STAGE_REAL_KERNELS_AND_DEFAULT(X) \
ELPA_FOR_ALL_2STAGE_REAL_KERNELS(X) \
......
......@@ -339,6 +339,7 @@
kernel .eq. ELPA_2STAGE_REAL_AVX512_BLOCK2 .or. &
kernel .eq. ELPA_2STAGE_REAL_SSE_BLOCK2 .or. &
kernel .eq. ELPA_2STAGE_REAL_SPARC64_BLOCK2 .or. &
kernel .eq. ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK2 .or. &
kernel .eq. ELPA_2STAGE_REAL_VSX_BLOCK2 .or. &
kernel .eq. ELPA_2STAGE_REAL_GENERIC .or. &
kernel .eq. ELPA_2STAGE_REAL_GENERIC_SIMPLE .or. &
......@@ -850,6 +851,43 @@
#endif /* REALCASE == 1 */
#if REALCASE == 1
! implementation of neon_arch64 block 2 real case
#if defined(WITH_REAL_NEON_ARCH64_BLOCK2_KERNEL)
#ifndef WITH_FIXED_REAL_KERNEL
if (kernel .eq. ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK2) then
#endif /* not WITH_FIXED_REAL_KERNEL */
#if (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_NEON_ARCH64_BLOCK6_KERNEL) && !defined(WITH_REAL_NEON_ARCH64_BLOCK4_KERNEL))
do j = ncols, 2, -2
w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1)
#ifdef WITH_OPENMP
call double_hh_trafo_&
&MATH_DATATYPE&
&_neon_arch64_2hv_&
&PRECISION &
& (c_loc(a(1,j+off+a_off-1,istripe,my_thread)), w, nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo_&
&MATH_DATATYPE&
&_neon_arch64_2hv_&
&PRECISION &
& (c_loc(a(1,j+off+a_off-1,istripe)), w, nbw, nl, stripe_width, nbw)
#endif
enddo
#endif /* (!defined(WITH_FIXED_REAL_KERNEL)) || (defined(WITH_FIXED_REAL_KERNEL) && !defined(WITH_REAL_NEON_ARCH64_BLOCK6_KERNEL) && !defined(WITH_REAL_NEON_ARCH64_BLOCK4_KERNEL)) */
#ifndef WITH_FIXED_REAL_KERNEL
endif
#endif /* not WITH_FIXED_REAL_KERNEL */