Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
elpa
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
10
Issues
10
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Environments
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
elpa
elpa
Commits
f20ce195
Commit
f20ce195
authored
Nov 23, 2017
by
Andreas Marek
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master_pre_stage' into loh/autotuning
parents
c9effde3
f5faf502
Changes
44
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
44 changed files
with
31595 additions
and
1291 deletions
+31595
-1291
.ci-env-vars
.ci-env-vars
+15
-11
.gitlab-ci.yml
.gitlab-ci.yml
+20212
-1002
Changelog
Changelog
+5
-0
INSTALL.md
INSTALL.md
+1
-1
Makefile.am
Makefile.am
+69
-0
README.md
README.md
+1
-1
RELEASE_NOTES
RELEASE_NOTES
+5
-14
SWITCHING_TO_NEW_INTERFACE.md
SWITCHING_TO_NEW_INTERFACE.md
+1
-1
USERS_GUIDE.md
USERS_GUIDE.md
+1
-1
configure.ac
configure.ac
+69
-7
elpa.spec
elpa.spec
+1
-1
elpa/elpa_constants.h.in
elpa/elpa_constants.h.in
+8
-2
generate_gitlab_ci_tests.py
generate_gitlab_ci_tests.py
+674
-0
src/elpa1/elpa_invert_trm.F90
src/elpa1/elpa_invert_trm.F90
+1
-1
src/elpa1/legacy_interface/elpa_cholesky_template.F90
src/elpa1/legacy_interface/elpa_cholesky_template.F90
+7
-2
src/elpa2/compute_hh_trafo.F90
src/elpa2/compute_hh_trafo.F90
+564
-8
src/elpa2/elpa2_template.F90
src/elpa2/elpa2_template.F90
+22
-2
src/elpa2/kernels/complex_sparc64_1hv_double_precision.c
src/elpa2/kernels/complex_sparc64_1hv_double_precision.c
+55
-0
src/elpa2/kernels/complex_sparc64_1hv_single_precision.c
src/elpa2/kernels/complex_sparc64_1hv_single_precision.c
+55
-0
src/elpa2/kernels/complex_sparc64_2hv_double_precision.c
src/elpa2/kernels/complex_sparc64_2hv_double_precision.c
+55
-0
src/elpa2/kernels/complex_sparc64_2hv_single_precision.c
src/elpa2/kernels/complex_sparc64_2hv_single_precision.c
+55
-0
src/elpa2/kernels/real_sparc64_2hv_double_precision.c
src/elpa2/kernels/real_sparc64_2hv_double_precision.c
+55
-0
src/elpa2/kernels/real_sparc64_2hv_single_precision.c
src/elpa2/kernels/real_sparc64_2hv_single_precision.c
+55
-0
src/elpa2/kernels/real_sparc64_4hv_double_precision.c
src/elpa2/kernels/real_sparc64_4hv_double_precision.c
+55
-0
src/elpa2/kernels/real_sparc64_4hv_single_precision.c
src/elpa2/kernels/real_sparc64_4hv_single_precision.c
+55
-0
src/elpa2/kernels/real_sparc64_6hv_double_precision.c
src/elpa2/kernels/real_sparc64_6hv_double_precision.c
+55
-0
src/elpa2/kernels/real_sparc64_6hv_single_precision.c
src/elpa2/kernels/real_sparc64_6hv_single_precision.c
+55
-0
src/elpa2/kernels/real_sse_2hv_template.c
src/elpa2/kernels/real_sse_2hv_template.c
+779
-14
src/elpa2/kernels/real_sse_4hv_template.c
src/elpa2/kernels/real_sse_4hv_template.c
+961
-7
src/elpa2/kernels/real_sse_6hv_template.c
src/elpa2/kernels/real_sse_6hv_template.c
+1822
-207
src/elpa2/kernels/real_vsx_2hv_double_precision.c
src/elpa2/kernels/real_vsx_2hv_double_precision.c
+55
-0
src/elpa2/kernels/real_vsx_2hv_single_precision.c
src/elpa2/kernels/real_vsx_2hv_single_precision.c
+55
-0
src/elpa2/kernels/real_vsx_2hv_template.c
src/elpa2/kernels/real_vsx_2hv_template.c
+1998
-0
src/elpa2/kernels/real_vsx_4hv_double_precision.c
src/elpa2/kernels/real_vsx_4hv_double_precision.c
+55
-0
src/elpa2/kernels/real_vsx_4hv_single_precision.c
src/elpa2/kernels/real_vsx_4hv_single_precision.c
+55
-0
src/elpa2/kernels/real_vsx_4hv_template.c
src/elpa2/kernels/real_vsx_4hv_template.c
+1435
-0
src/elpa2/kernels/real_vsx_6hv_double_precision.c
src/elpa2/kernels/real_vsx_6hv_double_precision.c
+55
-0
src/elpa2/kernels/real_vsx_6hv_single_precision.c
src/elpa2/kernels/real_vsx_6hv_single_precision.c
+55
-0
src/elpa2/kernels/real_vsx_6hv_template.c
src/elpa2/kernels/real_vsx_6hv_template.c
+2055
-0
src/elpa2/legacy_interface/elpa2_utilities.F90
src/elpa2/legacy_interface/elpa2_utilities.F90
+1
-1
test_project/configure.ac
test_project/configure.ac
+2
-2
test_project_2stage/configure.ac
test_project_2stage/configure.ac
+2
-2
test_project_2stage_legacy_api/configure.ac
test_project_2stage_legacy_api/configure.ac
+2
-2
test_project_legacy_api/configure.ac
test_project_legacy_api/configure.ac
+2
-2
No files found.
.ci-env-vars
View file @
f20ce195
if [ "$(hostname)" = "buildtest-rzg" ]; then module load impi/5.1.3 intel/16.0 gcc/6.3 mkl/11.3 autotools pkg-config; fi
if [ "$(hostname)" =
=
"buildtest-rzg" ]; then module load impi/5.1.3 intel/16.0 gcc/6.3 mkl/11.3 autotools pkg-config; fi
if [ "$(hostname)" =
"knl1" -o "$(hostname)" = "knl2" -o "$(hostname)" = "knl3" -o "$(hostname)" = "knl4" ] ; then module load impi/5.1.3 intel/16.0 gcc/7.2 mkl/11.3
pkg-config; fi
if [ "$(hostname)" =
= "knl1" -o "$(hostname)" == "knl2" -o "$(hostname)" == "knl3" -o "$(hostname)" == "knl4" ] ; then module load impi/2017.4 intel/17.0 gcc/7.2 mkl/2017
pkg-config; fi
if [ "$(hostname)" == "maik" ]; then module load impi/5.1.3 intel intel/17.0 gcc/
7.2
mkl/2017 pkg-config/0.29.1; fi
if [ "$(hostname)" == "maik" ]; then module load impi/5.1.3 intel intel/17.0 gcc/
6.3
mkl/2017 pkg-config/0.29.1; fi
if [ "$(hostname)" = "gp02" ] ; then module load impi/5.1.3 intel/17.0 gcc/6.3 mkl/2017 pkg-config; fi
if [ "$(hostname)" =
=
"gp02" ] ; then module load impi/5.1.3 intel/17.0 gcc/6.3 mkl/2017 pkg-config; fi
if [ "$(hostname)" = "amarek-elpa-gitlab-runner-1" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" =
=
"amarek-elpa-gitlab-runner-1" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" = "amarek-elpa-gitlab-runner-2" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" =
=
"amarek-elpa-gitlab-runner-2" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" = "amarek-elpa-gitlab-runner-3" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" =
=
"amarek-elpa-gitlab-runner-3" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" = "amarek-elpa-gitlab-runner-4" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" =
=
"amarek-elpa-gitlab-runner-4" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" = "dvl01" ]; then module load intel/17.0 gcc/5.4 mkl/2017 impi/2017.2 gcc/5.4 cuda/8.0; fi
if [ "$(hostname)" =
=
"dvl01" ]; then module load intel/17.0 gcc/5.4 mkl/2017 impi/2017.2 gcc/5.4 cuda/8.0; fi
if [ "$(hostname)" = "dvl02" ]; then module load intel/17.0 gcc/5.4 mkl/2017 impi/2017.2 gcc/5.4 cuda/8.0; fi
if [ "$(hostname)" =
=
"dvl02" ]; then module load intel/17.0 gcc/5.4 mkl/2017 impi/2017.2 gcc/5.4 cuda/8.0; fi
if [ "$(hostname)" == "miy01" ]; then module
load gcc/5.4 pgi/17.9 ompi/pgi/17.9/1.10.2 essl/5.5 cuda && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH
; fi
if [ "$(hostname)" == "miy01" ]; then module
purge && module load gcc/5.4 smpi essl/5.5 cuda pgi/17.9 && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH && export OMPI_CC=gcc && export OMPI_FC=gfortran
; fi
if [ "$(hostname)" == "miy02" ]; then module load gcc/5.4 pgi/17.9 ompi/pgi/17.9/1.10.2 essl/5.5 cuda && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH; fi
if [ "$(hostname)" == "miy02" ]; then module load gcc/5.4 pgi/17.9 ompi/pgi/17.9/1.10.2 essl/5.5 cuda && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH; fi
if [ "$(hostname)" == "miy03" ]; then module load gcc/5.4 pgi/17.9 ompi/pgi/17.9/1.10.2 essl/5.5 cuda && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH; fi
if [ "$(hostname)" == "miy03" ]; then module load gcc/5.4 pgi/17.9 ompi/pgi/17.9/1.10.2 essl/5.5 cuda && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH; fi
...
@@ -53,5 +53,9 @@ export MKL_GFORTRAN_SCALAPACK_LDFLAGS_NO_MPI_OMP="$MKL_GFORTRAN_SCALAPACK_NO_MPI
...
@@ -53,5 +53,9 @@ export MKL_GFORTRAN_SCALAPACK_LDFLAGS_NO_MPI_OMP="$MKL_GFORTRAN_SCALAPACK_NO_MPI
export ASAN_OPTIONS=suppressions=no_asan_for_mpi.supp,fast_unwind_on_malloc=0
export ASAN_OPTIONS=suppressions=no_asan_for_mpi.supp,fast_unwind_on_malloc=0
export LSAN_OPTIONS=suppressions=no_lsan_for_mpi.supp
export LSAN_OPTIONS=suppressions=no_lsan_for_mpi.supp
fi
fi
if [ "$(hostname)" != "knl1" -a "$(hostname)" != "knl2" -a "$(hostname)" != "knl3" -a "$(hostname)" != "knl4" -a "$(hostname)" != "maik" ] ; then export I_MPI_DEBUG=5; fi
module list
module list
.gitlab-ci.yml
View file @
f20ce195
This diff is collapsed.
Click to expand it.
Changelog
View file @
f20ce195
Changelog for ELPA 2017.05.003
- remove bug in invert_triangular, which had been introduced
in ELPA 2017.05.002
Changelog for ELPA 2017.05.002
Changelog for ELPA 2017.05.002
Mainly bugfixes for ELPA 2017.05.001:
Mainly bugfixes for ELPA 2017.05.001:
...
...
INSTALL.md
View file @
f20ce195
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
## Preamle ##
## Preamle ##
This file provides documentation on how to build the
*ELPA*
library in
**version ELPA-2017.05.00
2
**
.
This file provides documentation on how to build the
*ELPA*
library in
**version ELPA-2017.05.00
3
**
.
With release of
**version ELPA-2017.05.001**
the build process has been significantly simplified,
With release of
**version ELPA-2017.05.001**
the build process has been significantly simplified,
which makes it easier to install the
*ELPA*
library
which makes it easier to install the
*ELPA*
library
...
...
Makefile.am
View file @
f20ce195
...
@@ -203,6 +203,20 @@ endif
...
@@ -203,6 +203,20 @@ endif
endif
endif
endif
endif
if
WITH_REAL_SPARC64_BLOCK2_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_sparc64_2hv_double_precision.c
#if WANT_SINGLE_PRECISION_REAL
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sparc64_2hv_single_precision.c
#endif
endif
if
WITH_REAL_VSX_BLOCK2_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_vsx_2hv_double_precision.c
if
WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_vsx_2hv_single_precision.c
endif
endif
if
WITH_REAL_SSE_BLOCK2_KERNEL
if
WITH_REAL_SSE_BLOCK2_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_sse_2hv_double_precision.c
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_sse_2hv_double_precision.c
if
WANT_SINGLE_PRECISION_REAL
if
WANT_SINGLE_PRECISION_REAL
...
@@ -231,6 +245,19 @@ if WANT_SINGLE_PRECISION_REAL
...
@@ -231,6 +245,19 @@ if WANT_SINGLE_PRECISION_REAL
endif
endif
endif
endif
if
WITH_REAL_SPARC64_BLOCK4_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_sparc64_4hv_double_precision.c
#if WANT_SINGLE_PRECISION_REAL
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sparc64_4hv_single_precision.c
#endif
endif
if
WITH_REAL_VSX_BLOCK4_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_vsx_4hv_double_precision.c
if
WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_vsx_4hv_single_precision.c
endif
endif
if
WITH_REAL_SSE_BLOCK4_KERNEL
if
WITH_REAL_SSE_BLOCK4_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_sse_4hv_double_precision.c
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_sse_4hv_double_precision.c
...
@@ -260,7 +287,19 @@ if WANT_SINGLE_PRECISION_REAL
...
@@ -260,7 +287,19 @@ if WANT_SINGLE_PRECISION_REAL
endif
endif
endif
endif
if
WITH_REAL_SPARC64_BLOCK6_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_sparc64_6hv_double_precision.c
#if WANT_SINGLE_PRECISION_REAL
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sparc64_6hv_single_precision.c
#endif
endif
if
WITH_REAL_VSX_BLOCK6_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_vsx_6hv_double_precision.c
if
WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_vsx_6hv_single_precision.c
endif
endif
if
WITH_REAL_SSE_BLOCK6_KERNEL
if
WITH_REAL_SSE_BLOCK6_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_sse_6hv_double_precision.c
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_sse_6hv_double_precision.c
...
@@ -290,6 +329,19 @@ if WANT_SINGLE_PRECISION_REAL
...
@@ -290,6 +329,19 @@ if WANT_SINGLE_PRECISION_REAL
endif
endif
endif
endif
#if WITH_COMPLEX_SPARC64_BLOCK1_KERNEL
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sparc64_1hv_double_precision.c
#if WANT_SINGLE_PRECISION_COMPLEX
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sparc64_1hv_single_precision.c
#endif
#endif
#
#if WITH_COMPLEX_VSX_BLOCK1_KERNEL
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_vsx_1hv_double_precision.c
#if WANT_SINGLE_PRECISION_COMPLEX
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_vsx_1hv_single_precision.c
#endif
#endif
if
WITH_COMPLEX_SSE_BLOCK1_KERNEL
if
WITH_COMPLEX_SSE_BLOCK1_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/complex_sse_1hv_double_precision.c
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/complex_sse_1hv_double_precision.c
...
@@ -320,6 +372,20 @@ if WANT_SINGLE_PRECISION_COMPLEX
...
@@ -320,6 +372,20 @@ if WANT_SINGLE_PRECISION_COMPLEX
endif
endif
endif
endif
#if WITH_COMPLEX_SPARC64_BLOCK2_KERNEL
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sparc64_2hv_double_precision.c
#if WANT_SINGLE_PRECISION_COMPLEX
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sparc64_2hv_single_precision.c
#endif
#endif
#
#if WITH_COMPLEX_VSX_BLOCK2_KERNEL
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_vsx_2hv_double_precision.c
#if WANT_SINGLE_PRECISION_COMPLEX
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_vsx_2hv_single_precision.c
#endif
#endif
if
WITH_COMPLEX_SSE_BLOCK2_KERNEL
if
WITH_COMPLEX_SSE_BLOCK2_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/complex_sse_2hv_double_precision.c
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/complex_sse_2hv_double_precision.c
if
WANT_SINGLE_PRECISION_COMPLEX
if
WANT_SINGLE_PRECISION_COMPLEX
...
@@ -621,6 +687,9 @@ EXTRA_DIST = \
...
@@ -621,6 +687,9 @@ EXTRA_DIST = \
src/elpa2/kernels/real_avx512_2hv_template.c
\
src/elpa2/kernels/real_avx512_2hv_template.c
\
src/elpa2/kernels/real_avx512_4hv_template.c
\
src/elpa2/kernels/real_avx512_4hv_template.c
\
src/elpa2/kernels/real_avx512_6hv_template.c
\
src/elpa2/kernels/real_avx512_6hv_template.c
\
src/elpa2/kernels/real_vsx_2hv_template.c
\
src/elpa2/kernels/real_vsx_4hv_template.c
\
src/elpa2/kernels/real_vsx_6hv_template.c
\
src/elpa2/kernels/real_sse_2hv_template.c
\
src/elpa2/kernels/real_sse_2hv_template.c
\
src/elpa2/kernels/real_sse_4hv_template.c
\
src/elpa2/kernels/real_sse_4hv_template.c
\
src/elpa2/kernels/real_sse_6hv_template.c
\
src/elpa2/kernels/real_sse_6hv_template.c
\
...
...
README.md
View file @
f20ce195
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
## Current Release ##
## Current Release ##
The current release is ELPA 2017.05.00
2
. The current supported API version
The current release is ELPA 2017.05.00
3
. The current supported API version
is 20170403. This release supports the earliest API version 20170403.
is 20170403. This release supports the earliest API version 20170403.
## About *ELPA*
## About *ELPA*
...
...
RELEASE_NOTES
View file @
f20ce195
This file contains the release notes for the ELPA 2017.05.00
2
version
This file contains the release notes for the ELPA 2017.05.00
3
version
What is new?
What is new?
-------------
-------------
For detailed information about changes since release ELPA 2016.11.001 please have a look at the Changelog file
For detailed information about changes since release ELPA 2017.05.002 please have a look at the Changelog file
Highlights are:
- a more generic and more flexible API, which allows easy implementation of upcoming features
- This release contains a fix for a bug introduced in ELPA 2017.05.002
- faster GPU implementation, especially for ELPA 1stage
- the restriction of the block-cyclic distribution blocksize = 128 in the GPU
case is relaxed
- Faster CPU implementation due to better blocking
- support of already banded matrices (new API only!)
- improved KNL support
ABI change
ABI change
---------------------
---------------------
Since release 2016.05.001 the ABI has not changed. Only additional interfaces habe been added, please have also a look
Since release 2017.05.001 the ABI has not changed.
at the DEPRECTATED_FEATURES file
Any incompatibilities to previous version?
Any incompatibilities to previous version?
---------------------------------------
---------------------------------------
As mentioned before, the ABI of ELPA was not changed; There is no
As mentioned before, the ABI of ELPA was not changed; There is no
incompatibility with the previous version ELPA 201
6.11.001
.
incompatibility with the previous version ELPA 201
7.05.002
.
SWITCHING_TO_NEW_INTERFACE.md
View file @
f20ce195
...
@@ -149,7 +149,7 @@ Local documentation (via man pages) should be available (if *ELPA* has been inst
...
@@ -149,7 +149,7 @@ Local documentation (via man pages) should be available (if *ELPA* has been inst
For example "man elpa2_print_kernels" should provide the documentation for the
*ELPA*
program which prints all
For example "man elpa2_print_kernels" should provide the documentation for the
*ELPA*
program which prints all
the available kernels.
the available kernels.
Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2017.05.00
2
/html/index.html)
Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2017.05.00
3
/html/index.html)
for each
*ELPA*
release is available.
for each
*ELPA*
release is available.
USERS_GUIDE.md
View file @
f20ce195
...
@@ -9,7 +9,7 @@ Local documentation (via man pages) should be available (if *ELPA* has been inst
...
@@ -9,7 +9,7 @@ Local documentation (via man pages) should be available (if *ELPA* has been inst
For example "man elpa2_print_kernels" should provide the documentation for the
*ELPA*
program which prints all
For example "man elpa2_print_kernels" should provide the documentation for the
*ELPA*
program which prints all
the available kernels.
the available kernels.
Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2017.05.00
2
/html/index.html)
Also a [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2017.05.00
3
/html/index.html)
for each
*ELPA*
release is available.
for each
*ELPA*
release is available.
...
...
configure.ac
View file @
f20ce195
...
@@ -27,7 +27,7 @@ AM_SILENT_RULES([yes])
...
@@ -27,7 +27,7 @@ AM_SILENT_RULES([yes])
# by the current interface, as they are ABI compatible (e.g. only new symbols
# by the current interface, as they are ABI compatible (e.g. only new symbols
# were added by the new interface)
# were added by the new interface)
#
#
AC_SUBST([ELPA_SO_VERSION], [10:
0
:2])
AC_SUBST([ELPA_SO_VERSION], [10:
1
:2])
# API Version
# API Version
AC_DEFINE([EARLIEST_API_VERSION], [20170403], [Earliest supported ELPA API version])
AC_DEFINE([EARLIEST_API_VERSION], [20170403], [Earliest supported ELPA API version])
...
@@ -142,7 +142,7 @@ fi
...
@@ -142,7 +142,7 @@ fi
dnl check which MPI binray invokes a MPI job
dnl check which MPI binray invokes a MPI job
if test x"$with_mpi" = x"yes"; then
if test x"$with_mpi" = x"yes"; then
AC_CHECK_PROGS([MPI_BINARY], [
srun mpiexec.hydra mpiexec mpirun poe runjob
], [no])
AC_CHECK_PROGS([MPI_BINARY], [
mpiexec.hydra mpiexec mpirun poe runjob srun
], [no])
if test x"$MPI_BINARY" = x"no"; then
if test x"$MPI_BINARY" = x"no"; then
AC_MSG_ERROR([Could not find either of the MPI binaries: mpiexec.hydra, mpiexec, mpirun, poe, runjob, srun])
AC_MSG_ERROR([Could not find either of the MPI binaries: mpiexec.hydra, mpiexec, mpirun, poe, runjob, srun])
fi
fi
...
@@ -466,6 +466,18 @@ m4_define(elpa_m4_sse_kernels, [
...
@@ -466,6 +466,18 @@ m4_define(elpa_m4_sse_kernels, [
complex_sse_block2
complex_sse_block2
])
])
m4_define(elpa_m4_sparc64_kernels, [
real_sparc64_block2
real_sparc64_block4
real_sparc64_block6
])
m4_define(elpa_m4_vsx_kernels, [
real_vsx_block2
real_vsx_block4
real_vsx_block6
])
m4_define(elpa_m4_avx_kernels, [
m4_define(elpa_m4_avx_kernels, [
real_avx_block2
real_avx_block2
real_avx_block4
real_avx_block4
...
@@ -505,7 +517,7 @@ m4_define(elpa_m4_gpu_kernels, [
...
@@ -505,7 +517,7 @@ m4_define(elpa_m4_gpu_kernels, [
complex_gpu
complex_gpu
])
])
m4_define(elpa_m4_kernel_types, [generic sse sse_assembly avx avx2 avx512 bgp bgq gpu])
m4_define(elpa_m4_kernel_types, [generic s
parc64 vsx s
se sse_assembly avx avx2 avx512 bgp bgq gpu])
m4_define(elpa_m4_all_kernels,
m4_define(elpa_m4_all_kernels,
m4_foreach_w([elpa_m4_type],
m4_foreach_w([elpa_m4_type],
...
@@ -538,6 +550,8 @@ AC_DEFUN([ELPA_SELECT_KERNELS], [
...
@@ -538,6 +550,8 @@ AC_DEFUN([ELPA_SELECT_KERNELS], [
dnl Modify list of kernels with configure arguments
dnl Modify list of kernels with configure arguments
ELPA_SELECT_KERNELS([generic],[enable])
ELPA_SELECT_KERNELS([generic],[enable])
ELPA_SELECT_KERNELS([sparc64],[disable])
ELPA_SELECT_KERNELS([vsx],[disable])
ELPA_SELECT_KERNELS([sse],[enable])
ELPA_SELECT_KERNELS([sse],[enable])
ELPA_SELECT_KERNELS([sse_assembly],[enable])
ELPA_SELECT_KERNELS([sse_assembly],[enable])
ELPA_SELECT_KERNELS([avx],[enable])
ELPA_SELECT_KERNELS([avx],[enable])
...
@@ -552,7 +566,7 @@ m4_foreach_w([elpa_m4_kind],[real complex],[
...
@@ -552,7 +566,7 @@ m4_foreach_w([elpa_m4_kind],[real complex],[
])
])
if test x"${enable_bgp}" = x"yes" -o x"$enable_bgq" = x"yes"; then
if test x"${enable_bgp}" = x"yes" -o x"$enable_bgq" = x"yes"; then
m4_foreach_w([elpa_m4_kernel], elpa_m4_sse_kernels elpa_m4_avx_kernels elpa_m4_avx2_kernels elpa_m4_avx512_kernels, [
m4_foreach_w([elpa_m4_kernel], elpa_m4_s
parc64_kernels elpa_m4_vsx_kernels elpa_m4_s
se_kernels elpa_m4_avx_kernels elpa_m4_avx2_kernels elpa_m4_avx512_kernels, [
if x"$use_[]elpa_m4_kernel[]" = x"yes" ; then
if x"$use_[]elpa_m4_kernel[]" = x"yes" ; then
echo "Disabling elpa_m4_kernel due to BGP/BGQ option"
echo "Disabling elpa_m4_kernel due to BGP/BGQ option"
fi
fi
...
@@ -612,7 +626,7 @@ AC_DEFUN([ELPA_KERNEL_DEPENDS],[
...
@@ -612,7 +626,7 @@ AC_DEFUN([ELPA_KERNEL_DEPENDS],[
])
])
fi
fi
])
])
m4_foreach_w([elpa_m4_arch],[sse avx avx2 avx512],[
m4_foreach_w([elpa_m4_arch],[s
parc64 vsx s
se avx avx2 avx512],[
ELPA_KERNEL_DEPENDS([real_]elpa_m4_arch[_block6], [real_]elpa_m4_arch[_block4 real_]elpa_m4_arch[_block2])
ELPA_KERNEL_DEPENDS([real_]elpa_m4_arch[_block6], [real_]elpa_m4_arch[_block4 real_]elpa_m4_arch[_block2])
ELPA_KERNEL_DEPENDS([real_]elpa_m4_arch[_block4], [real_]elpa_m4_arch[_block2])
ELPA_KERNEL_DEPENDS([real_]elpa_m4_arch[_block4], [real_]elpa_m4_arch[_block2])
ELPA_KERNEL_DEPENDS([complex_]elpa_m4_arch[_block2], [complex_]elpa_m4_arch[_block1])
ELPA_KERNEL_DEPENDS([complex_]elpa_m4_arch[_block2], [complex_]elpa_m4_arch[_block1])
...
@@ -646,7 +660,7 @@ dnl choosing a default kernel
...
@@ -646,7 +660,7 @@ dnl choosing a default kernel
m4_foreach_w([elpa_m4_kind],[real complex],[
m4_foreach_w([elpa_m4_kind],[real complex],[
m4_foreach_w([elpa_m4_kernel],
m4_foreach_w([elpa_m4_kernel],
m4_foreach_w([elpa_m4_cand_kernel],
m4_foreach_w([elpa_m4_cand_kernel],
elpa_m4_avx512_kernels elpa_m4_avx2_kernels elpa_m4_avx_kernels elpa_m4_sse_kernels elpa_m4_sse_assembly_kernels elpa_m4_generic_kernels,
elpa_m4_avx512_kernels elpa_m4_avx2_kernels elpa_m4_avx_kernels elpa_m4_sse_kernels elpa_m4_sse_assembly_kernels elpa_m4_
sparc64_kernels elpa_m4_vsx_kernels elpa_m4_
generic_kernels,
[m4_bmatch(elpa_m4_cand_kernel,elpa_m4_kind,elpa_m4_cand_kernel)] ),
[m4_bmatch(elpa_m4_cand_kernel,elpa_m4_kind,elpa_m4_cand_kernel)] ),
[
[
if test -z "$default_[]elpa_m4_kind[]_kernel"; then
if test -z "$default_[]elpa_m4_kind[]_kernel"; then
...
@@ -664,8 +678,56 @@ m4_foreach_w([elpa_m4_kind],[real complex],[
...
@@ -664,8 +678,56 @@ m4_foreach_w([elpa_m4_kind],[real complex],[
AC_SUBST([ELPA_2STAGE_]m4_toupper(elpa_m4_kind)[_DEFAULT])
AC_SUBST([ELPA_2STAGE_]m4_toupper(elpa_m4_kind)[_DEFAULT])
])
])
dnl #include <fjmfunc.h>
dnl #include <emmintrin.h>
dnl int main(int argc, char **argv) {
dnl __m128d q;
dnl __m128d h1 = _fjsp_neg_v2r8(q);
dnl return 0;
dnl }
AC_LANG_PUSH([C])
AC_LANG_PUSH([C])
if test x"${need_vsx}" = x"yes"; then
AC_MSG_CHECKING(whether we can compile Altivec VSX with intrinsics in C)
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <altivec.h>
int main(int argc, char **argv) {
__vector double a, b, c;
c = vec_add(a,b);
return 0;
}
])],
[can_compile_vsx=yes],
[can_compile_vsx=no]
)
AC_MSG_RESULT([${can_compile_vsx}])
if test x"$can_compile_vsx" != x"yes"; then
AC_MSG_ERROR([Could not compile test program, try with --disable-vsx, or adjust the C compiler or CFLAGS])
fi
AC_DEFINE([HAVE_VSX_SSE],[1],[Altivec VSX intrinsics are supported on this CPU])
fi
if test x"${need_sparc64}" = x"yes"; then
AC_MSG_CHECKING(whether we can compile SPARC64 with intrinsics in C)
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv) {
double* q;
__m128d h1 = _mm_loaddup_pd(q);
return 0;
}
])],
[can_compile_sparc64=yes],
[can_compile_sparc64=no]
)
AC_MSG_RESULT([${can_compile_sparc64}])
if test x"$can_compile_sparc64" != x"yes"; then
AC_MSG_ERROR([Could not compile test program, try with --disable-sparc64, or adjust the C compiler or CFLAGS])
fi
AC_DEFINE([HAVE_SPARC64_SSE],[1],[SPARC64 intrinsics are supported on this CPU])
fi
if test x"${need_sse}" = x"yes"; then
if test x"${need_sse}" = x"yes"; then
AC_MSG_CHECKING(whether we can compile SSE3 with gcc intrinsics in C)
AC_MSG_CHECKING(whether we can compile SSE3 with gcc intrinsics in C)
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
...
@@ -1035,7 +1097,7 @@ m4_foreach_w([elpa_m4_kind],[real complex],[
...
@@ -1035,7 +1097,7 @@ m4_foreach_w([elpa_m4_kind],[real complex],[
#echo " "
#echo " "
#echo "***********************************************************************"
#echo "***********************************************************************"
#echo "* This is a the release of ELPA 2017.05.00
2
*"
#echo "* This is a the release of ELPA 2017.05.00
3
*"
#echo "* It mainly contains bugfixes to ELPA 2017.05.001 *"
#echo "* It mainly contains bugfixes to ELPA 2017.05.001 *"
#echo "***********************************************************************"
#echo "***********************************************************************"
#echo " "
#echo " "
...
...
elpa.spec
View file @
f20ce195
...
@@ -32,7 +32,7 @@
...
@@ -32,7 +32,7 @@
%endif
%endif
Name: elpa
Name: elpa
Version: 2017.05.00
2
Version: 2017.05.00
3
Release: 1
Release: 1
Summary: A massively parallel eigenvector solver
Summary: A massively parallel eigenvector solver
License: LGPL-3.0
License: LGPL-3.0
...
...
elpa/elpa_constants.h.in
View file @
f20ce195
...
@@ -40,7 +40,13 @@ enum ELPA_SOLVERS {
...
@@ -40,7 +40,13 @@ enum ELPA_SOLVERS {
X(ELPA_2STAGE_REAL_AVX512_BLOCK2, 15, @ELPA_2STAGE_REAL_AVX512_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_AVX512_BLOCK2, 15, @ELPA_2STAGE_REAL_AVX512_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_AVX512_BLOCK4, 16, @ELPA_2STAGE_REAL_AVX512_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_AVX512_BLOCK4, 16, @ELPA_2STAGE_REAL_AVX512_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_AVX512_BLOCK6, 17, @ELPA_2STAGE_REAL_AVX512_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_AVX512_BLOCK6, 17, @ELPA_2STAGE_REAL_AVX512_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_GPU, 18, @ELPA_2STAGE_REAL_GPU_COMPILED@, __VA_ARGS__)
X(ELPA_2STAGE_REAL_GPU, 18, @ELPA_2STAGE_REAL_GPU_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SPARC64_BLOCK2, 19, @ELPA_2STAGE_REAL_SPARC64_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SPARC64_BLOCK4, 20, @ELPA_2STAGE_REAL_SPARC64_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_SPARC64_BLOCK6, 21, @ELPA_2STAGE_REAL_SPARC64_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK2, 22, @ELPA_2STAGE_REAL_VSX_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK4, 23, @ELPA_2STAGE_REAL_VSX_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK6, 24, @ELPA_2STAGE_REAL_VSX_BLOCK6_COMPILED@, __VA_ARGS__)
#define ELPA_FOR_ALL_2STAGE_REAL_KERNELS_AND_DEFAULT(X) \
#define ELPA_FOR_ALL_2STAGE_REAL_KERNELS_AND_DEFAULT(X) \
ELPA_FOR_ALL_2STAGE_REAL_KERNELS(X) \
ELPA_FOR_ALL_2STAGE_REAL_KERNELS(X) \
...
@@ -66,7 +72,7 @@ enum ELPA_REAL_KERNELS {
...
@@ -66,7 +72,7 @@ enum ELPA_REAL_KERNELS {
X(ELPA_2STAGE_COMPLEX_AVX2_BLOCK2, 11, @ELPA_2STAGE_COMPLEX_AVX2_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_COMPLEX_AVX2_BLOCK2, 11, @ELPA_2STAGE_COMPLEX_AVX2_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_COMPLEX_AVX512_BLOCK1, 12, @ELPA_2STAGE_COMPLEX_AVX512_BLOCK1_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_COMPLEX_AVX512_BLOCK1, 12, @ELPA_2STAGE_COMPLEX_AVX512_BLOCK1_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_COMPLEX_AVX512_BLOCK2, 13, @ELPA_2STAGE_COMPLEX_AVX512_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_COMPLEX_AVX512_BLOCK2, 13, @ELPA_2STAGE_COMPLEX_AVX512_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_COMPLEX_GPU, 14, @ELPA_2STAGE_COMPLEX_GPU_COMPILED@, __VA_ARGS__)
X(ELPA_2STAGE_COMPLEX_GPU, 14, @ELPA_2STAGE_COMPLEX_GPU_COMPILED@, __VA_ARGS__)
#define ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS_AND_DEFAULT(X) \
#define ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS_AND_DEFAULT(X) \
ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(X) \
ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(X) \
...
...
generate_gitlab_ci_tests.py
0 → 100755
View file @
f20ce195
This diff is collapsed.
Click to expand it.
src/elpa1/elpa_invert_trm.F90
View file @
f20ce195
...
@@ -225,7 +225,7 @@
...
@@ -225,7 +225,7 @@
do i=1,nb
do i=1,nb
#ifdef WITH_MPI
#ifdef WITH_MPI
call obj%timer%start("
mpi_communication
")
call obj%timer%start("
mpi_communication
")
call MPI_Bcast(tmat
2
(1,i), l_row1-1, MPI_MATH_DATATYPE_PRECISION, &
call MPI_Bcast(tmat
1
(1,i), l_row1-1, MPI_MATH_DATATYPE_PRECISION, &
pcol(n, nblk, np_cols), mpi_comm_cols, mpierr)
pcol(n, nblk, np_cols), mpi_comm_cols, mpierr)
call obj%timer%stop("
mpi_communication
")
call obj%timer%stop("
mpi_communication
")
...
...
src/elpa1/legacy_interface/elpa_cholesky_template.F90
View file @
f20ce195
...
@@ -62,7 +62,6 @@
...
@@ -62,7 +62,6 @@
complex
(
kind
=
COMPLEX_DATATYPE
)
::
a
(
lda
,
matrixCols
)
complex
(
kind
=
COMPLEX_DATATYPE
)
::
a
(
lda
,
matrixCols
)
#endif
#endif
#endif
#endif
integer
(
kind
=
ik
)
::
nev
logical
,
intent
(
in
)
::
wantDebug
logical
,
intent
(
in
)
::
wantDebug
logical
::
success
logical
::
success
integer
(
kind
=
ik
)
::
successInternal
integer
(
kind
=
ik
)
::
successInternal
...
@@ -86,7 +85,6 @@
...
@@ -86,7 +85,6 @@
e
=>
elpa_allocate
()
e
=>
elpa_allocate
()
call
e
%
set
(
"na"
,
na
)
call
e
%
set
(
"na"
,
na
)
call
e
%
set
(
"nev"
,
nev
)
call
e
%
set
(
"local_nrows"
,
lda
)
call
e
%
set
(
"local_nrows"
,
lda
)
call
e
%
set
(
"local_ncols"
,
matrixCols
)
call
e
%
set
(
"local_ncols"
,
matrixCols
)
call
e
%
set
(
"nblk"
,
nblk
)
call
e
%
set
(
"nblk"
,
nblk
)
...
@@ -94,6 +92,13 @@
...
@@ -94,6 +92,13 @@
call
e
%
set
(
"mpi_comm_rows"
,
mpi_comm_rows
)
call
e
%
set
(
"mpi_comm_rows"
,
mpi_comm_rows
)
call
e
%
set
(
"mpi_comm_cols"
,
mpi_comm_cols
)
call
e
%
set
(
"mpi_comm_cols"
,
mpi_comm_cols
)
!! the elpa object needs nev to be set (in case the EVP-solver is
!! called later. Thus it is set by user, do nothing, otherwise,
!! set it to na as default
!if (e%is_set("nev")) .ne. 1) then
! call e%set("nev", na)
!endif
if
(
e
%
setup
()
.ne.
ELPA_OK
)
then
if
(
e
%
setup
()
.ne.
ELPA_OK
)
then
print
*
,
"Cannot setup ELPA instance"
print
*
,
"Cannot setup ELPA instance"
success
=
.false.
success
=
.false.
...
...
src/elpa2/compute_hh_trafo.F90
View file @
f20ce195
This diff is collapsed.
Click to expand it.
src/elpa2/elpa2_template.F90
View file @
f20ce195
...
@@ -172,7 +172,7 @@
...
@@ -172,7 +172,7 @@
if
(
gpu
==
1
)
then
if
(
gpu
==
1
)
then
if
(
kernel
.ne.
ELPA_2STAGE_REAL_GPU
)
then
if
(
kernel
.ne.
ELPA_2STAGE_REAL_GPU
)
then
write
(
error_unit
,
*
)
"ELPA: Warning, GPU usage has been requested but compute kernel is defined as non-GPU!"
write
(
error_unit
,
*
)
"ELPA: Warning, GPU usage has been requested but compute kernel is defined as non-GPU!"
write
(
error_unit
,
*
)
"The compute kernel will be executed on CPUs!"
write
(
error_unit
,
*
)
"The compute kernel will be executed on CPUs!"
else
if
(
nblk
.ne.
128
)
then
else
if
(
nblk
.ne.
128
)
then
kernel
=
ELPA_2STAGE_REAL_GENERIC
kernel
=
ELPA_2STAGE_REAL_GENERIC
endif
endif
...
@@ -182,6 +182,26 @@
...
@@ -182,6 +182,26 @@
write
(
error_unit
,
*
)
"ELPA: Warning, GPU usage has been requested but compute kernel is defined as non-GPU!"
write
(
error_unit
,
*
)
"ELPA: Warning, GPU usage has been requested but compute kernel is defined as non-GPU!"
endif
endif
endif
endif
#ifdef SINGLE_PRECISION_REAL
! special case at the moment NO single precision kernels on POWER 8 -> set GENERIC for now
if
(
kernel
.eq.
ELPA_2STAGE_REAL_VSX_BLOCK2
.or.
&
kernel
.eq.
ELPA_2STAGE_REAL_VSX_BLOCK4
.or.
&
kernel
.eq.
ELPA_2STAGE_REAL_VSX_BLOCK6
)
then