Commit 377ac3b8 authored by Andreas Marek's avatar Andreas Marek
Browse files

Merge branch 'master_pre_stage' into 'master'

ELPA can compile on Fugaku

See merge request !65
parents 85449317 b9099779
...@@ -2,11 +2,14 @@ Changelog for next release ...@@ -2,11 +2,14 @@ Changelog for next release
- not yet decided - not yet decided
Upcoming changes for ELPA 2021.05.001 Changelog for ELPA 2021.05.001.rc
- allow the user to set the mapping of MPI tasks to GPU id per set/get - allow the user to set the mapping of MPI tasks to GPU id per set/get
- experimental feature: port to AMD GPUS, works correctly, performance yet - experimental feature: port to AMD GPUS, works correctly, performance yet
unclear; only tested --with-mpi=0 unclear; only tested --with-mpi=0
- On request, ELPA can print the pinning of MPI tasks and OpenMP thread - On request, ELPA can print the pinning of MPI tasks and OpenMP thread
- support for FUGAKU: some minor fix still have to be fixed due to compiler
issues
Changelog for ELPA 2020.11.001 Changelog for ELPA 2020.11.001
......
...@@ -636,9 +636,12 @@ pkgconfigdir = $(libdir)/pkgconfig ...@@ -636,9 +636,12 @@ pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = @PKG_CONFIG_FILE@ pkgconfig_DATA = @PKG_CONFIG_FILE@
# programs # programs
if BUILD_FUGAKU
bin_PROGRAMS =
else
bin_PROGRAMS = \ bin_PROGRAMS = \
elpa2_print_kernels@SUFFIX@ elpa2_print_kernels@SUFFIX@
endif
noinst_PROGRAMS = noinst_PROGRAMS =
check_SCRIPTS = check_SCRIPTS =
...@@ -672,10 +675,12 @@ libelpatest@SUFFIX@_la_SOURCES += \ ...@@ -672,10 +675,12 @@ libelpatest@SUFFIX@_la_SOURCES += \
test/shared/test_redirect.F90 test/shared/test_redirect.F90
endif endif
if BUILD_FUGAKU
else
elpa2_print_kernels@SUFFIX@_SOURCES = src/elpa2/elpa2_print_kernels.F90 elpa2_print_kernels@SUFFIX@_SOURCES = src/elpa2/elpa2_print_kernels.F90
elpa2_print_kernels@SUFFIX@_LDADD = libelpa@SUFFIX@.la elpa2_print_kernels@SUFFIX@_LDADD = libelpa@SUFFIX@.la
elpa2_print_kernels@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODINC)modules elpa2_print_kernels@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODINC)modules
endif
include test_programs.am include test_programs.am
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
## Current Release ## ## Current Release ##
The current release is ELPA 2020.11.001 The current supported API version The current release is ELPA 2021.05.001.rc1 The current supported API version
is 20200417. This release supports the earliest API version 20170403. is 20210430. This release supports the earliest API version 20170403.
The release ELPA 2018.11.001 was the last release, where the legacy API has been The release ELPA 2018.11.001 was the last release, where the legacy API has been
enabled by default (and can be disabled at build time). enabled by default (and can be disabled at build time).
...@@ -130,7 +130,7 @@ the possible configure options. ...@@ -130,7 +130,7 @@ the possible configure options.
## Using *ELPA* ## Using *ELPA*
Please have a look at the [USERS_GUIDE](USERS_GUIDE.md) file, to get a documentation or at the [online](http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2020.11.001/html/index.html) doxygen documentation, where you find the definition of the interfaces. You might want to have a look at the [PERFORMANCE tuning document](./documentation/PERFORMANCE_TUNING.md) to avoid some usual pitfalls. Please have a look at the [USERS_GUIDE](USERS_GUIDE.md) file, to get a documentation or at the [online](http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2021.05.001.rc1/html/index.html) doxygen documentation, where you find the definition of the interfaces. You might want to have a look at the [PERFORMANCE tuning document](./documentation/PERFORMANCE_TUNING.md) to avoid some usual pitfalls.
## Contributing to *ELPA* ## Contributing to *ELPA*
......
This file contains the release notes for the ELPA 2020.11.001 version This file contains the release notes for the ELPA 2021.05.001.rc1 version
What is new? What is new?
------------- -------------
For detailed information about changes since release ELPA 2020.05.001 please have a look at the Changelog file For detailed information about changes since release ELPA 2020.11.001 please have a look at the Changelog file
- bugfixes - bugfixes
- experimental feature: support for ARM SVE - experimental feature: support for AMD GPUs
- experimental feature: complex kernels for arch64 NEON
ABI change ABI change
......
...@@ -26,7 +26,7 @@ AM_SILENT_RULES([yes]) ...@@ -26,7 +26,7 @@ AM_SILENT_RULES([yes])
# by the current interface, as they are ABI compatible (e.g. only new symbols # by the current interface, as they are ABI compatible (e.g. only new symbols
# were added by the new interface) # were added by the new interface)
# #
AC_SUBST([ELPA_SO_VERSION], [15:2:0]) AC_SUBST([ELPA_SO_VERSION], [15:3:0])
# AC_DEFINE_SUBST(NAME, VALUE, DESCRIPTION) # AC_DEFINE_SUBST(NAME, VALUE, DESCRIPTION)
# ----------------------------------------- # -----------------------------------------
...@@ -40,15 +40,15 @@ AX_BUILD_DATE_EPOCH(ELPA_BUILDTIME) ...@@ -40,15 +40,15 @@ AX_BUILD_DATE_EPOCH(ELPA_BUILDTIME)
# API Version # API Version
AC_DEFINE([EARLIEST_API_VERSION], [20170403], [Earliest supported ELPA API version]) AC_DEFINE([EARLIEST_API_VERSION], [20170403], [Earliest supported ELPA API version])
print_earliest_api_version=20170403 print_earliest_api_version=20170403
AC_DEFINE_SUBST(CURRENT_API_VERSION, 20200417, "Current ELPA API version") AC_DEFINE_SUBST(CURRENT_API_VERSION, 20210430, "Current ELPA API version")
print_current_api_version=20200417 print_current_api_version=20210430
# Autotune Version # Autotune Version
AC_DEFINE([EARLIEST_AUTOTUNE_VERSION], [20171201], [Earliest ELPA API version, which supports autotuning]) AC_DEFINE([EARLIEST_AUTOTUNE_VERSION], [20171201], [Earliest ELPA API version, which supports autotuning])
print_earliest_autotune_version=20171201 print_earliest_autotune_version=20171201
AC_DEFINE([CURRENT_AUTOTUNE_VERSION], [20200417], [Current ELPA autotune version]) AC_DEFINE([CURRENT_AUTOTUNE_VERSION], [20210430], [Current ELPA autotune version])
print_current_autotune_version=20200417 print_current_autotune_version=20210430
AC_DEFINE([CURRENT_AUTOTUNE_VERSION], [20200417], [Current ELPA autotune version]) AC_DEFINE([CURRENT_AUTOTUNE_VERSION], [20210430], [Current ELPA autotune version])
AC_DEFINE_SUBST(CURRENT_AUTOTUNE_VERSION, 20200417, "Current ELPA autotune version") AC_DEFINE_SUBST(CURRENT_AUTOTUNE_VERSION, 20210430, "Current ELPA autotune version")
AC_DEFINE_UNQUOTED([ELPA_BUILDTIME], [$ELPA_BUILDTIME], ["Time of build"]) AC_DEFINE_UNQUOTED([ELPA_BUILDTIME], [$ELPA_BUILDTIME], ["Time of build"])
AX_COMPARE_VERSION([$ELPA_BUILDTIME], [gt], [1623715200],[old_elpa_version=yes],[old_elpa_version=no]) AX_COMPARE_VERSION([$ELPA_BUILDTIME], [gt], [1623715200],[old_elpa_version=yes],[old_elpa_version=no])
...@@ -398,11 +398,22 @@ if test x"${enable_openmp}" = x"yes"; then ...@@ -398,11 +398,22 @@ if test x"${enable_openmp}" = x"yes"; then
FCFLAGS="$OPENMP_FCFLAGS $FCFLAGS" FCFLAGS="$OPENMP_FCFLAGS $FCFLAGS"
fi fi
want_mpi_launcher="no"
AC_MSG_CHECKING(whether mpi-launcher should be detected)
AC_ARG_ENABLE(detect-mpi-launcher,
[AS_HELP_STRING([--enable-detect-mpi-launcher],
[detect mpi launcher])],
[want_mpi_launcher="$enableval"],
[want_mpi_launcher="yes"])
AC_MSG_RESULT([${want_mpi_launcher}])
dnl check which MPI binray invokes a MPI job dnl check which MPI binray invokes a MPI job
if test x"$with_mpi" = x"yes"; then if test x"$with_mpi" = x"yes"; then
AC_CHECK_PROGS([MPI_BINARY], [mpiexec.hydra mpiexec mpirun poe runjob srun aprun], [no]) if test x"$want_mpi_launcher" = x"yes"; then
if test x"$MPI_BINARY" = x"no"; then AC_CHECK_PROGS([MPI_BINARY], [mpiexec.hydra mpiexec mpirun poe runjob srun aprun], [no])
AC_MSG_ERROR([Could not find either of the MPI binaries: mpiexec.hydra, mpiexec, mpirun, poe, runjob, srun, aprun]) if test x"$MPI_BINARY" = x"no"; then
AC_MSG_ERROR([Could not find either of the MPI binaries: mpiexec.hydra, mpiexec, mpirun, poe, runjob, srun, aprun])
fi
fi fi
fi fi
...@@ -985,7 +996,7 @@ m4_foreach_w([elpa_m4_kind],[real complex],[ ...@@ -985,7 +996,7 @@ m4_foreach_w([elpa_m4_kind],[real complex],[
fi fi
]) ])
AC_ARG_WITH(nvidia-gpu-support-only, [AS_HELP_STRING([--with-nvidia-gpu-support-only], AC_ARG_WITH(NVIDIA-gpu-support-only, [AS_HELP_STRING([--with-NVIDIA-gpu-support-only],
[Compile and always use the NVIDIA GPU version])], [Compile and always use the NVIDIA GPU version])],
[],[with_nvidia_gpu_support_only=no]) [],[with_nvidia_gpu_support_only=no])
if test x"$with_nvidia_gpu_support_only" = x"yes" ; then if test x"$with_nvidia_gpu_support_only" = x"yes" ; then
...@@ -996,7 +1007,7 @@ if test x"$with_nvidia_gpu_support_only" = x"yes" ; then ...@@ -996,7 +1007,7 @@ if test x"$with_nvidia_gpu_support_only" = x"yes" ; then
use_complex_nvidia_gpu=yes use_complex_nvidia_gpu=yes
fi fi
AC_ARG_WITH(amd-gpu-support-only, [AS_HELP_STRING([--with-amd-gpu-support-only], AC_ARG_WITH(AMD-gpu-support-only, [AS_HELP_STRING([--with-AMD-gpu-support-only],
[Compile and always use the AMD GPU version])], [Compile and always use the AMD GPU version])],
[],[with_amd_gpu_support_only=no]) [],[with_amd_gpu_support_only=no])
if test x"$with_amd_gpu_support_only" = x"yes" ; then if test x"$with_amd_gpu_support_only" = x"yes" ; then
...@@ -1007,7 +1018,7 @@ if test x"$with_amd_gpu_support_only" = x"yes" ; then ...@@ -1007,7 +1018,7 @@ if test x"$with_amd_gpu_support_only" = x"yes" ; then
use_complex_amd_gpu=yes use_complex_amd_gpu=yes
fi fi
AC_ARG_WITH(intel-gpu-support-only, [AS_HELP_STRING([--with-intel-gpu-support-only], AC_ARG_WITH(INTEL-gpu-support-only, [AS_HELP_STRING([--with-INTEL-gpu-support-only],
[Compile and always use the INTEL GPU version])], [Compile and always use the INTEL GPU version])],
[],[with_intel_gpu_support_only=no]) [],[with_intel_gpu_support_only=no])
if test x"$with_intel_gpu_support_only" = x"yes" ; then if test x"$with_intel_gpu_support_only" = x"yes" ; then
...@@ -1505,7 +1516,7 @@ AC_LANG_POP([Fortran]) ...@@ -1505,7 +1516,7 @@ AC_LANG_POP([Fortran])
AC_MSG_CHECKING(whether GPU version should be used) AC_MSG_CHECKING(whether GPU version should be used)
AC_ARG_ENABLE([gpu], AC_ARG_ENABLE([gpu],
AS_HELP_STRING([--enable-gpu], AS_HELP_STRING([--enable-gpu],
[do use Nvidia GPU version (compatibility flag, better set explicitely)]), [do use NVIDIA GPU version (compatibility flag, better set explicitely)]),
[if test x"$enableval" = x"yes"; then [if test x"$enableval" = x"yes"; then
use_gpu=yes use_gpu=yes
else else
...@@ -1519,10 +1530,10 @@ if test x"${use_gpu}" = x"yes" ; then ...@@ -1519,10 +1530,10 @@ if test x"${use_gpu}" = x"yes" ; then
use_complex_nvidia_gpu=yes use_complex_nvidia_gpu=yes
fi fi
AC_MSG_CHECKING(whether Nvidia-GPU version should be used) AC_MSG_CHECKING(whether NVIDIA-GPU version should be used)
AC_ARG_ENABLE([Nvidia-gpu], AC_ARG_ENABLE([nvidia-gpu],
AS_HELP_STRING([--enable-Nvidia-gpu], AS_HELP_STRING([--enable-nvidia-gpu],
[do use Nvidia GPU version]), [do use NVIDIA GPU version]),
[if test x"$enableval" = x"yes"; then [if test x"$enableval" = x"yes"; then
use_nvidia_gpu=yes use_nvidia_gpu=yes
else else
...@@ -1554,8 +1565,8 @@ fi ...@@ -1554,8 +1565,8 @@ fi
#fi #fi
AC_MSG_CHECKING(whether AMD-GPU version should be used) AC_MSG_CHECKING(whether AMD-GPU version should be used)
AC_ARG_ENABLE([AMD-gpu], AC_ARG_ENABLE([amd-gpu],
AS_HELP_STRING([--enable-AMD-gpu], AS_HELP_STRING([--enable-amd-gpu],
[do use AMD GPU version]), [do use AMD GPU version]),
[if test x"$enableval" = x"yes"; then [if test x"$enableval" = x"yes"; then
use_amd_gpu=yes use_amd_gpu=yes
...@@ -1653,9 +1664,9 @@ if test x"${need_intel_gpu}" = x"yes" ; then ...@@ -1653,9 +1664,9 @@ if test x"${need_intel_gpu}" = x"yes" ; then
#AC_LANG_POP([C]) #AC_LANG_POP([C])
fi fi
AC_MSG_CHECKING(whether Nvidia GPU memory debugging should be enabled) AC_MSG_CHECKING(whether NVIDIA GPU memory debugging should be enabled)
AC_ARG_ENABLE([nvidia-gpu-memory-debug], AC_ARG_ENABLE([NVIDIA-gpu-memory-debug],
AS_HELP_STRING([--enable-nvidia-gpu-memory-debug], AS_HELP_STRING([--enable-NVIDIA-gpu-memory-debug],
[Output information on Nvidia GPU memory to be processed by utils/memory/check_memory.py]), [Output information on Nvidia GPU memory to be processed by utils/memory/check_memory.py]),
[if test x"$enableval" = x"yes"; then [if test x"$enableval" = x"yes"; then
enable_nvidia_gpu_memory_debug=yes enable_nvidia_gpu_memory_debug=yes
...@@ -1962,6 +1973,23 @@ if test x"${enable_kcomputer}" = x"yes"; then ...@@ -1962,6 +1973,23 @@ if test x"${enable_kcomputer}" = x"yes"; then
fi fi
fi fi
AC_MSG_CHECKING(whether we build for FUGAKU)
AC_ARG_ENABLE([FUGAKU],
AS_HELP_STRING([--enable-FUGAKU],
[enable builds on FUGAKU, default no.]),
[if test x"$enableval" = x"yes"; then
enable_fugaku=yes
else
enable_fugaku=no
fi],
[enable_fugaku=no])
AC_MSG_RESULT([${enable_fugaku}])
AM_CONDITIONAL([BUILD_FUGAKU],[test x"$enable_fugaku" = x"yes"])
if test x"${enable_fugaku}" = x"yes"; then
AC_DEFINE([BUILD_FUGAKU], [1], [build for FUGAKU])
FC_MODINC="-I"
fi
AC_MSG_CHECKING(whether we build for NEC SX-Auroa) AC_MSG_CHECKING(whether we build for NEC SX-Auroa)
AC_ARG_ENABLE([SX-Aurora], AC_ARG_ENABLE([SX-Aurora],
AS_HELP_STRING([--enable-SX-Aurora], AS_HELP_STRING([--enable-SX-Aurora],
...@@ -2098,7 +2126,11 @@ AC_CONFIG_FILES([ ...@@ -2098,7 +2126,11 @@ AC_CONFIG_FILES([
m4_include([m4/ax_fc_check_define.m4]) m4_include([m4/ax_fc_check_define.m4])
AC_MSG_CHECKING([if workaround for broken preprocessor is needed]) AC_MSG_CHECKING([if workaround for broken preprocessor is needed])
need_manual_cpp=no if test x"${enable_fugaku}" = x"yes"; then
need_manual_cpp=yes
else
need_manual_cpp=no
fi
AX_FC_CHECK_DEFINE([__INTEL_COMPILER],[is_intel=yes],[]) AX_FC_CHECK_DEFINE([__INTEL_COMPILER],[is_intel=yes],[])
AX_FC_CHECK_DEFINE([__PGI],[is_pgi=yes],[]) AX_FC_CHECK_DEFINE([__PGI],[is_pgi=yes],[])
ACTUAL_FC="$FC" ACTUAL_FC="$FC"
...@@ -2271,13 +2303,13 @@ if test x"${ax_cv_have_avx512f_cpu_ext}" = x"yes" -a x"${need_avx512}" = x"no"; ...@@ -2271,13 +2303,13 @@ if test x"${ax_cv_have_avx512f_cpu_ext}" = x"yes" -a x"${need_avx512}" = x"no";
echo " " echo " "
fi fi
#echo " " echo " "
#echo "***********************************************************************" echo "***********************************************************************"
#echo "* This is the first release candidate of ELPA 2020.11.001.rc1 *" echo "* This is the first release candidate of ELPA 2021.05.001.rc1 *"
#echo "* There might be still some changes until the final release of *" echo "* There might be still some changes until the final release of *"
#echo "* ELPA 2020.11.001 *" echo "* ELPA 2021.05.001 *"
#echo "***********************************************************************" echo "***********************************************************************"
#echo " " echo " "
if test x"$enable_kcomputer" = x"yes" ; then if test x"$enable_kcomputer" = x"yes" ; then
echo " " echo " "
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
## Preamble ## ## Preamble ##
This file provides documentation on how to build the *ELPA* library in **version ELPA-2020.11.001**. This file provides documentation on how to build the *ELPA* library in **version ELPA-2021.05.001.rc1**.
With release of **version ELPA-2017.05.001** the build process has been significantly simplified, With release of **version ELPA-2017.05.001** the build process has been significantly simplified,
which makes it easier to install the *ELPA* library. which makes it easier to install the *ELPA* library.
...@@ -10,7 +10,7 @@ The release ELPA 2018.11.001 was the last release, where the legacy API has been ...@@ -10,7 +10,7 @@ The release ELPA 2018.11.001 was the last release, where the legacy API has been
enabled by default (and can be disabled at build time). enabled by default (and can be disabled at build time).
With the release ELPA 2019.11.001, the legacy API has been deprecated and the support has been closed. With the release ELPA 2019.11.001, the legacy API has been deprecated and the support has been closed.
The release of ELPA 2020.11.001 does change the API and ABI compared to the release 2019.11.001, since The release of ELPA 2021.05.001.rc1 does change the API and ABI compared to the release 2019.11.001, since
the legacy API has been dropped. the legacy API has been dropped.
## How to install *ELPA* ## ## How to install *ELPA* ##
...@@ -62,7 +62,10 @@ An excerpt of the most important (*ELPA* specific) options reads as follows: ...@@ -62,7 +62,10 @@ An excerpt of the most important (*ELPA* specific) options reads as follows:
| `--enable-sve128` | Experimental feature build ARM SVE128 kernels, default: disabled | | `--enable-sve128` | Experimental feature build ARM SVE128 kernels, default: disabled |
| `--enable-sve256` | Experimental feature build ARM SVE256 kernels, default: disabled | | `--enable-sve256` | Experimental feature build ARM SVE256 kernels, default: disabled |
| `--enable-sve512` | Experimental feature build ARM SVE512 kernels, default: disabled | | `--enable-sve512` | Experimental feature build ARM SVE512 kernels, default: disabled |
| `--enable-gpu` | build GPU kernels, default: disabled | | `--enable-nvidia-gpu` | build NVIDIA GPU kernels, default: disabled |
| `--enable-gpu` | same as --enable-nvidia-gpu |
| `--enable-amd-gpu` | EXPERIMENTAL: build AMD GPU kernels, default: disabled |
| `--enable-intel-gpu` | VERY EXPERIMENTAL: build INTEL GPU kernels, default: disabled |
| `--enable-bgp` | build BGP kernels, default: disabled | | `--enable-bgp` | build BGP kernels, default: disabled |
| `--enable-bgq` | build BGQ kernels, default: disabled | | `--enable-bgq` | build BGQ kernels, default: disabled |
| `--with-mpi=[yes|no]` | compile with MPI. Default: yes | | `--with-mpi=[yes|no]` | compile with MPI. Default: yes |
...@@ -71,7 +74,9 @@ An excerpt of the most important (*ELPA* specific) options reads as follows: ...@@ -71,7 +74,9 @@ An excerpt of the most important (*ELPA* specific) options reads as follows:
| `--with-GPU-compute-capability=VALUE` | use compute capability VALUE for GPU version, <br> default: "sm_35" | | `--with-GPU-compute-capability=VALUE` | use compute capability VALUE for GPU version, <br> default: "sm_35" |
| `--with-fixed-real-kernel=KERNEL` | compile with only a single specific real kernel. | | `--with-fixed-real-kernel=KERNEL` | compile with only a single specific real kernel. |
| `--with-fixed-complex-kernel=KERNEL` | compile with only a single specific complex kernel. | | `--with-fixed-complex-kernel=KERNEL` | compile with only a single specific complex kernel. |
| `--with-gpu-support-only` | Compile and always use the GPU version | | `--with-nvidia-gpu-support-only` | Compile and always use the NVIDIA GPU version |
| `--with-amd-gpu-support-only` | EXPERIMENTAL: Compile and always use the AMD GPU version |
| `--with-intel-gpu-support-only` | EXPERIMENTAL: Compile and always use the INTEL GPU version |
| `--with-likwid=[yes|no|PATH]` | use the likwid tool to measure performance (has an performance impact!), default: no | | `--with-likwid=[yes|no|PATH]` | use the likwid tool to measure performance (has an performance impact!), default: no |
| `--with-default-real-kernel=KERNEL` | set the real kernel KERNEL as default | | `--with-default-real-kernel=KERNEL` | set the real kernel KERNEL as default |
| `--with-default-complex-kernel=KERNEL`| set the compplex kernel KERNEL as default | | `--with-default-complex-kernel=KERNEL`| set the compplex kernel KERNEL as default |
...@@ -499,4 +504,10 @@ In order to build *ELPA* for AMD GPUs please ensure that you have a working inst ...@@ -499,4 +504,10 @@ In order to build *ELPA* for AMD GPUs please ensure that you have a working inst
./configure CXX=hipcc CXXFLAGS="-I/opt/rocm-4.0.0/hip/include/ -I/opt/rocm-4.0.0/rocblas/inlcude -g" CC=hipcc CFLAGS="-I/opt/rocm-4.0.0/hip/include/ -I/opt/rocm-4.0.0/rocblas/include -g" LIBS="-L/opt/rocm-4.0.0/rocblas/lib" --enable-option-checking=fatal --with-mpi=0 FC=gfortran FCFLAGS="-g -LPATH_TO_YOUR_LAPACK_INSTALLATION -lopenblas -llapack" --disable-sse --disable-sse-assembly --disable-avx --disable-avx2 --disable-avx512 --enable-AMD-gpu --enable-single-precision ./configure CXX=hipcc CXXFLAGS="-I/opt/rocm-4.0.0/hip/include/ -I/opt/rocm-4.0.0/rocblas/inlcude -g" CC=hipcc CFLAGS="-I/opt/rocm-4.0.0/hip/include/ -I/opt/rocm-4.0.0/rocblas/include -g" LIBS="-L/opt/rocm-4.0.0/rocblas/lib" --enable-option-checking=fatal --with-mpi=0 FC=gfortran FCFLAGS="-g -LPATH_TO_YOUR_LAPACK_INSTALLATION -lopenblas -llapack" --disable-sse --disable-sse-assembly --disable-avx --disable-avx2 --disable-avx512 --enable-AMD-gpu --enable-single-precision
``` ```
#### Problems of building with clang-12.0 ####
The libtool tool adds some flags to the compiler commands (to be used for linking by ld) which are not known
by the clang-12 compiler. One way to solve this issue is by calling directly after the configue step
```
sed -i 's/\\$wl-soname \\$wl\\$soname/-fuse-ld=ld -Wl,-soname,\\$soname/g' libtool
sed -i 's/\\$wl--whole-archive\\$convenience \\$wl--no-whole-archive//g' libtool
```
...@@ -146,7 +146,7 @@ Local documentation (via man pages) should be available (if *ELPA* has been inst ...@@ -146,7 +146,7 @@ Local documentation (via man pages) should be available (if *ELPA* has been inst
For example `man elpa2_print_kernels` should provide the documentation for the *ELPA* program which prints all For example `man elpa2_print_kernels` should provide the documentation for the *ELPA* program which prints all
the available kernels. the available kernels.
Also a [online doxygen documentation](http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2020.11.001/html/index.html) Also a [online doxygen documentation](http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2021.05.001.rc1/html/index.html)
for each *ELPA* release is available. for each *ELPA* release is available.
...@@ -13,7 +13,7 @@ Local documentation (via man pages) should be available (if *ELPA* has been inst ...@@ -13,7 +13,7 @@ Local documentation (via man pages) should be available (if *ELPA* has been inst
For example `man elpa2_print_kernels` should provide the documentation for the *ELPA* program, which prints all For example `man elpa2_print_kernels` should provide the documentation for the *ELPA* program, which prints all
the available kernels. the available kernels.
Also a [online doxygen documentation](http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2020.11.001/html/index.html) Also a [online doxygen documentation](http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2021.05.001.rc1/html/index.html)
for each *ELPA* release is available. for each *ELPA* release is available.
...@@ -180,7 +180,7 @@ The following table gives a list of all supported parameters which can be used t ...@@ -180,7 +180,7 @@ The following table gives a list of all supported parameters which can be used t
## III) List of computational routines ## ## III) List of computational routines ##
The following compute routines are available in *ELPA*: Please have a look at the man pages or [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2020.11.001/html/index.html) for details. The following compute routines are available in *ELPA*: Please have a look at the man pages or [online doxygen documentation] (http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2021.05.001.rc1/html/index.html) for details.
| Name | Purpose | since API version | | Name | Purpose | since API version |
......
...@@ -22,7 +22,7 @@ The *ELPA* library consists of two main parts: ...@@ -22,7 +22,7 @@ The *ELPA* library consists of two main parts:
Both variants of the *ELPA* solvers are available for real or complex singe and double precision valued matrices. Both variants of the *ELPA* solvers are available for real or complex singe and double precision valued matrices.
Thus *ELPA* provides the following user functions (see man pages or [online](http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2020.11.001/html/index.html) for details): Thus *ELPA* provides the following user functions (see man pages or [online](http://elpa.mpcdf.mpg.de/html/Documentation/ELPA-2021.05.001.rc1/html/index.html) for details):
- elpa_get_communicators : set the row / column communicators for *ELPA* - elpa_get_communicators : set the row / column communicators for *ELPA*
- elpa_solve_evp_complex_1stage_{single|double} : solve a {single|double} precision complex eigenvalue proplem with the *ELPA 1stage* solver - elpa_solve_evp_complex_1stage_{single|double} : solve a {single|double} precision complex eigenvalue proplem with the *ELPA 1stage* solver
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
%define with_openmp 0 %define with_openmp 0
Name: elpa Name: elpa
Version: 2020.11.001 Version: 2021.05.001.rc1
Release: 1 Release: 1
Summary: A massively parallel eigenvector solver Summary: A massively parallel eigenvector solver
License: LGPL-3.0 License: LGPL-3.0
......
...@@ -74,6 +74,7 @@ for lang, m, g, gid, q, t, p, d, s, lay, spl in product(sorted(language_flag.key ...@@ -74,6 +74,7 @@ for lang, m, g, gid, q, t, p, d, s, lay, spl in product(sorted(language_flag.key
sorted(solver_flag.keys()), sorted(solver_flag.keys()),
sorted(layout_flag.keys()), sorted(layout_flag.keys()),
sorted(split_comm_flag.keys())): sorted(split_comm_flag.keys())):
if gid == 1 and (g == 0 ): if gid == 1 and (g == 0 ):
continue continue
...@@ -207,50 +208,107 @@ for lang, m, g, gid, q, t, p, d, s, lay, spl in product(sorted(language_flag.key ...@@ -207,50 +208,107 @@ for lang, m, g, gid, q, t, p, d, s, lay, spl in product(sorted(language_flag.key
layoutsuffix="_all_layouts" if lay == "all_layouts" else "", layoutsuffix="_all_layouts" if lay == "all_layouts" else "",
spl="_split_comm_myself" if spl == "myself" else "") spl="_split_comm_myself" if spl == "myself" else "")
print("if BUILD_KCOMPUTER") if (m == "analytic"):
print("bin_PROGRAMS += " + name) print("if BUILD_FUGAKU")
print("else") print("else")
print("noinst_PROGRAMS += " + name) print("if BUILD_KCOMPUTER")
print("endif") print("bin_PROGRAMS += " + name)
print("else")
if lay == "square" or t == "generalized": print("noinst_PROGRAMS += " + name)
if kernel == "all_kernels": print("endif")
print("check_SCRIPTS += " + name + "_extended.sh") print("endif")
else:
print("check_SCRIPTS += " + name + "_default.sh")
elif lay == "all_layouts":
if kernel == "all_kernels":
print("check_SCRIPTS += " + name + "_extended.sh")
else:
print("check_SCRIPTS += " + name + "_extended.sh")
else: else:
raise Exception("Unknown layout {0}".format(lay)) print("if BUILD_KCOMPUTER")
print("bin_PROGRAMS += " + name)
if lang == "Fortran": print("else")
print(name + "_SOURCES = test/Fortran/test.F90") print("noinst_PROGRAMS += " + name)
print(name + "_LDADD = $(test_program_ldadd)") print("endif")
print(name + "_FCFLAGS = $(test_program_fcflags) \\")
if (m == "analytic"):
elif lang == "C": print("if BUILD_FUGAKU")
print(name + "_SOURCES = test/C/test.c") print("else")
print(name + "_LDADD = $(test_program_ldadd) $(FCLIBS)") if lay == "square" or t == "generalized":
print(name + "_CFLAGS = $(test_program_cflags) \\") if kernel == "all_kernels":
print("check_SCRIPTS += " + name + "_extended.sh")
else:
print("check_SCRIPTS += " + name + "_default.sh")
elif lay == "all_layouts":
if kernel == "all_kernels":
print("check_SCRIPTS += " + name + "_extended.sh")
else:
print("check_SCRIPTS += " + name + "_extended.sh")
else:
raise Exception("Unknown layout {0}".format(lay))
if lang == "Fortran":
print(name + "_SOURCES = test/Fortran/test.F90")
print(name + "_LDADD = $(test_program_ldadd)")
print(name + "_FCFLAGS = $(test_program_fcflags) \\")
elif lang == "C":
print(name + "_SOURCES = test/C/test.c")
print(name + "_LDADD = $(test_program_ldadd) $(FCLIBS)")
print(name + "_CFLAGS = $(test_program_cflags) \\")
else:
raise Exception("Unknown language")
print(" -DTEST_CASE=\\\"{0}\\\" \\".format(name))
print(" " + " \\\n ".join([
domain_flag[d],
prec_flag[p],
test_type_flag[t],
solver_flag[s],
gpu_flag[g],
gpu_id_flag[gid],
qr_flag[q],
matrix_flag[m]] + extra_flags))
print("endif\n" * endifs)
print("")
print("endif")
print("")
else: else:
raise Exception("Unknown language") if lay == "square" or t == "generalized":
if kernel == "all_kernels":
print(" -DTEST_CASE=\\\"{0}\\\" \\".format(name)) print("check_SCRIPTS += " + name + "_extended.sh")
print(" " + " \\\n ".join([ else:
domain_flag[d], print("check_SCRIPTS += " + name + "_default.sh")
prec_flag[p], elif lay == "all_layouts":
test_type_flag[t], if kernel == "all_kernels":
solver_flag[s], print("check_SCRIPTS += " + name + "_extended.sh")
gpu_flag[g], else:
gpu_id_flag[gid], print("check_SCRIPTS += " + name + "_extended.sh")
qr_flag[q], else:
matrix_flag[m]] + extra_flags)) raise Exception("Unknown layout {0}".format(lay))
print("endif\n" * endifs)