Commit 81c1bf30 authored by Andreas Marek's avatar Andreas Marek
Browse files

MPI thread level checks at configure

parent 82ad3340
......@@ -398,6 +398,9 @@ if test x"${enable_openmp}" = x"yes"; then
FCFLAGS="$OPENMP_FCFLAGS $FCFLAGS"
fi
#AC_LANG_POP([Fortran])
want_mpi_launcher="no"
AC_MSG_CHECKING(whether mpi-launcher should be detected)
AC_ARG_ENABLE(detect-mpi-launcher,
......@@ -596,6 +599,8 @@ if test x"$can_compile_with_mkl" = x"yes" ; then
AC_MSG_RESULT([${have_mkl}])
fi
#AC_LANG_POP([Fortran])
dnl if not mkl, check all the necessary individually
if test x"${have_mkl}" = x"yes" ; then
WITH_MKL=1
......@@ -657,6 +662,7 @@ else
AC_MSG_ERROR([could not link with scalapack: specify path])
fi
fi
AC_LANG_PUSH([Fortran])
dnl check whether we can link alltogehter
AC_MSG_CHECKING([whether we can link a Fortran program with all blacs/scalapack])
......@@ -704,7 +710,7 @@ AC_MSG_RESULT([${fortran_can_check_environment}])
if test x"${fortran_can_check_environment}" = x"yes" ; then
AC_DEFINE([HAVE_ENVIRONMENT_CHECKING],[1],[Fortran can query environment variables])
fi
AC_LANG_POP([Fortran])
dnl check whether BAND_TO_FULL_BLOCKING is set
AC_MSG_CHECKING(whether BAND_TO_FLULL_BLOCKING is requested)
......@@ -754,7 +760,7 @@ if test x"${user_sets_nvidia_gpu_compute_capability}" = x"yes" ; then
fi
fi
AC_LANG_PUSH([Fortran])
dnl Test possibility of 'use mpi', if requested
if test x"${with_mpi}" = x"yes" ; then
AC_ARG_ENABLE([mpi-module],
......@@ -788,6 +794,115 @@ if test x"${with_mpi}" = x"yes" ; then
fi
fi
fi
if test x"$with_mpi" = x"yes" && test x"$enable_openmp" = x"yes"; then
AC_MSG_CHECKING(whether the threading support of the MPI library should be checked)
AC_ARG_ENABLE([threading-support-checks],
AS_HELP_STRING([--disable-threading-support-checks],
[do neither at build nor at runtime check the required threading support of the MPI library. USE AT YOUR OWN RISK!]),
[
if test x"$enableval" = x"yes"; then
enable_threading_support_checks=yes
else
enable_threading_support_checks=no
fi
],
[enable_threading_support_checks=yes])
AC_MSG_RESULT([${enable_threading_support_checks}])
if test x"${enable_threading_support_checks}" = x"yes" ; then
AC_DEFINE([THREADING_SUPPORT_CHECK],[1],[can check at runtime the threading support level of MPI])
fi
AC_MSG_CHECKING(whether ELPA is allowed to limit the number of OpenMP threads at runtime)
AC_ARG_ENABLE([allow-thread-limiting],
AS_HELP_STRING([--enable-allow-thread-limiting],
[do a runtime check whether threading support of the MPI library is sufficient. If not ELPA will limit the number of OpenMP threads to 1 during the run]),
[
if test x"$enableval" = x"yes"; then
enable_allow_thread_limiting=yes
else
enable_allow_thread_limiting=no
fi
],
[enable_allow_thread_limiting=no])
AC_MSG_RESULT([${enable_allow_thread_limiting}])
if test x"${enable_allow_thread_limiting}" = x"yes" ; then
AC_DEFINE([ALLOW_THREAD_LIMITING],[1],[ELPA can at runtime limit the number of OpenMP threads to 1 if needed])
fi
#consistency check
if test x"${enable_allow_thread_limiting}" = x"yes" && test x"${enable_threading_support_checks}" = x"no"; then
AC_MSG_ERROR([You cannot set --enable-allow-thread-limiting and --disable-threading-support-checks at the same time. Thread limiting needs support checks!])
fi
AC_ARG_WITH([threading-support-check-during-build],[AS_HELP_STRING([--with-threading-support-check-during-build],[Do checks at build time whether the MPI threading level support is sufficient])],
[
if test x"$withval" = x"yes"; then
with_threading_support_check_during_build=yes
else
with_threading_support_check_during_build=no
fi
],
[with_threading_support_check_during_build=yes])
#if test x"$with_threading_support_check_during_build" = x"no" && test x"$enable_allow_thread_limiting" = x"no"; then
#
#fi
fi
if test x"${enable_openmp}" = x"yes" && test x"${with_mpi}" = x"yes" && test x"${enable_threading_support_checks}" = x"yes" && test x"${enable_allow_thread_limiting}" = x"no" && test x"${with_threading_support_check_during_build}" = x"yes"; then
mpi_threading_level_sufficient=no
AC_MSG_NOTICE([**************************************************************************************************************************])
AC_MSG_NOTICE([* Please notice if the following step hangs or aborts abnormaly then you cannot run a short MPI-program during configure *])
AC_MSG_NOTICE([* In this case please re-run configure with '--with-threading-support-check-during-build=no' _AND_ follow the hints in *])
AC_MSG_NOTICE([* the INSTALL and USER_GUIDE documents *])
AC_MSG_NOTICE([* In case you get some other warnings about threading support follow on of the steps detailed there *])
AC_MSG_NOTICE([**************************************************************************************************************************])
AC_MSG_CHECKING(what threading level is supported by the MPI library)
AC_RUN_IFELSE([AC_LANG_SOURCE([
program testit
use iso_c_binding
implicit none
include "mpif.h"
integer(kind=c_int) :: provided, error, status
status = 1
call mpi_init_thread(MPI_THREAD_MULTIPLE, provided,error)
!call mpi_query_thread(provided, error)
print *,provided,MPI_THREAD_SINGLE,MPI_THREAD_FUNNELED,MPI_THREAD_SERIALIZED,MPI_THREAD_MULTIPLE
if (provided .eq. MPI_THREAD_SERIALIZED .or. provided .eq. MPI_THREAD_MULTIPLE) then
status = 0
endif
call mpi_finalize(error)
call exit(status)
end
])],
[mpi_threading_level_sufficient=yes],
[mpi_threading_level_sufficient=no],
[mpi_threading_level_sufficient=yes]
)
AC_MSG_RESULT([${mpi_threading_level_sufficient}])
if test x"${mpi_threading_level_sufficient}" = x"yes" ; then
AC_DEFINE([HAVE_SUFFICIENT_MPI_THREADING_SUPPORT],[1],[MPI threading support is sufficient])
else
AC_MSG_WARN([Your MPI implementation does not provide a sufficient threading level for OpenMP])
AC_MSG_WARN([You do have several options:])
AC_MSG_WARN([ * disable OpenMP (--disable-openmp): this will ensure correct results, but maybe some performance drop])
AC_MSG_WARN([ * use an MPI-library with the required threading support level (see the INSTALL and USER_GUIDE): this will ensure correct results and best performance])
AC_MSG_WARN([ * allow ELPA at runtime to change the number of threads to 1 (--enable-allow-thread-limiting): this will ensure correct results, ])
AC_MSG_WARN([ but maybe also not the best performance (dependence on the threading of your blas/lapack libraries), see the USER_GUIDE])
AC_MSG_WARN([ * switch of the checking of threading support (--disable-threading-support-checks): DO THIS AT YOUR OWN RISK! This will be fast, ])
AC_MSG_WARN([ but might (depending on your MPI library sometimes) lead to wrong results])
AC_MSG_ERROR([You do have to take an action of the choices above!])
fi
fi
AC_LANG_POP([Fortran])
dnl Assemble the list of kernels to build
......@@ -2329,6 +2444,13 @@ else
make -f $srcdir/generated_headers.am generated-headers top_srcdir="$srcdir" CPP="$CPP"
fi
if test x"$enable_threading_support_checks" = x"no" && test x"$with_mpi" = x"yes" && test x"${enable_openmp}" = x"yes"; then
echo " "
echo " You disabled the checking whether your MPI library offers a sufficient level of threading support!"
echo " You 'convince' ELPA that everything is ok, do not complain about problems with ELPA in this build!"
echo " "
fi
if test x"$old_elpa_version" = x"yes"; then
echo " "
echo " It is possible that your current version of ELPA is not the latest one."
......
......@@ -30,7 +30,7 @@ If you obtained *ELPA* from the official git repository, you will not find
the needed configure script! You will have to create the configure script with autoconf. You can also run the `autogen.sh` script that does this step for you.
## (A): Installing *ELPA* as library with configure ##
## Installing *ELPA* as library with configure ##
*ELPA* can be installed with the build steps
- `configure`
......@@ -94,6 +94,8 @@ An excerpt of the most important (*ELPA* specific) options reads as follows:
| `--64bit-integer-math-support` | assumes that BLAS/LAPACK/SCALAPACK use 64bit integers (experimentatl) |
| `--64bit-integer-mpi-support` | assumes that MPI uses 64bit integers (experimental) |
| `--heterogenous-cluster-support` | allows ELPA to run on clusters of nodes with different Intel CPUs (experimental) |
| `--enable_threading_support_checks` | in case of MPI and OPENMP builds, check during the configure step whether the provided threading support level is sufficient |
| `--enable-allow-thread-limiting` | in case of MPI and OPENMP builds, ELPA is allowed to limit the number of OpenMP threads, if the threading support level is not sufficient |
We recommend that you do not build ELPA in its main directory but that you use it
in a sub-directory:
......@@ -221,6 +223,62 @@ To enable OpenMP support, add
as configure option.
In any case, whether you are building MPI+OpenMP or only OpenMP (without MPI) it is recommended (for performance reasons)
to use BLAS and LAPACK libraries which _also_ do have threading support. For example, you can link against with the Intel MKL
library in the flavor without threading or with threading. Please consult the documentation of your BLAS and LAPACK libraries.
If you want to build a hybrid version of *ELPA* with MPI and with OpenMP support, your
MPI library **should** provide a sufficient level of threading support (i.e. "MPI_THREAD_SERIALIZED" or
"MPI_THREAD_MULTIPLE"). On HPC systems this is almost always the case. In many MPI packages available with
Linux distributions, however, the threading support is quite often limited and **not** sufficient for *ELPA*.
Since release 2021.05.001 ELPA does check during the build time in the configure if the threading support of
your MPI library is sufficient. This option (--enable-threading-support-checks) is enabled by default. **DO NOT
SWITCH THIS OFF, UNLESS YOU KNOW WHAT YOU ARE DOING OR UNLESS CONFIGURE INSTRUCTS YOU TO DO SO.**
If this test passes without an abort of configure and *no* instructions how to cure a threading level support issue,
your hybrid MPI-OpenMP build will be fine and for performance reasons **runtime** checks for threading support will
be disabled.
In the case that configure aborts with these messages
```
configure: WARNING: Your MPI implementation does not provide a sufficient threading level for OpenMP
configure: WARNING: You do have several options:
configure: WARNING: * disable OpenMP (--disable-openmp): this will ensure correct results, but maybe some performance drop
configure: WARNING: * use an MPI-library with the required threading support level (see the INSTALL and USER_GUIDE): this will ensure correct results and best performance
configure: WARNING: * allow ELPA at runtime to change the number of threads to 1 (--enable-allow-thread-limiting): this will ensure correct results,
configure: WARNING: but maybe also not the best performance (dependence on the threading of your blas/lapack libraries), see the USER_GUIDE
configure: WARNING: * switch of the checking of threading support (--disable-threading-support-checks): DO THIS AT YOUR OWN RISK! This will be fast,
configure: WARNING: but might (depending on your MPI library sometimes) lead to wrong results
configure: error: You do have to take an action of the choices above!
```
you can cure the problem in several ways:
- disable OpenMP (by setting --disable-openmp): this will ensure (if the build is successful) that the results of *ELPA* will be correct. However, this option might
lead to a performance drop, if your application calling *ELPA* does use OpenMP threading, since in this situation you will have less MPI tasks than cores on your machine and *ELPA* will only use MPI and thus not utilize all cores.
- the best solution will be to use an MPI library which does offer the required level of threading support. However, this _might_ require some work on your side to build your own MPI library. In any way it does not harm to search the internet whether for your Linux distribution their exist already such MPI packages (quite often they do but they are not the default onces).
- if you do not want to disable OpenMP and you cannot provide a MPI library with a sufficient level of threading support, you can re-run configure with the option "--enable-allow-thread-limiting". If this option is enabled, *ELPA* will always do a runtime check whether the MPI library does provide a sufficient level of threading support. If this is not the case, **internally** to *ELPA* (i.e. not affecting your application calling *ELPA*) only **1** OpenMP thread will be used. In case you do use a threaded implementation of BLAS and LAPACK (which performance wise you should always do when using an OpenMP build of *ELPA*), one can still use more than one thread within the BLAS and LAPACK library, **if** the number of threads in these libraries can be controlled with another mechanism then setting the **OMP_NUM_THREADS** environment variable. For example, in case of Intel's MKL library one can controll the number of threads with the MKL_NUM_THREADS environment variable.
- by switching of the threading level support checks (with --disable-threading-support-checks) *ELPA* **assumes** that your MPI library does provide a sufficient level. **DO NOT USE THIS OPTION UNLESS YOU ARE SURE WHAT YOU ARE DOING !** This setting could cause different problems like crashes, sporadic wrong results and so forth since this will lead to undefined behaviour! The *ELPA* developers **will not accept bug reports if this option is used, unless you can document in a detailed way that you did not set this option light heartedly (see below)!** You might wonder why this option is then at all available. Simply, because some very experienced HPC-experts did ask for this option because of a the situation we will discuss now.
Last but not least we want to mention that prior to executing this check, configure will print this information:
```
configure: **************************************************************************************************************************
configure: * Please notice if the following step hangs or aborts abnormaly then you cannot run a short MPI-program during configure *
configure: * In this case please re-run configure with '--with-threading-support-check-during-build=no' *
configure: * In case you get some other warnings about threading support follow on of the steps detailed there *
configure: **************************************************************************************************************************
```
You do not have to care about this, unless configure hangs after printing this message, or configure aborts **without** printing the messages discussed before.
This behaviour might occure, if:
- you (and also configure) does not have the rights to run an MPI program on the compilation machine. Sometimes HPC centers implement this, in order to ensure that login nodes are only used for compilation but not for compute.
- you do have to cross-compile (i.e. you build *ELPA* for a specific architecture on a different architecture)
- some other reason why an MPI program cannot run successfully
If you encounter this situation you can switch of this check during configure by setting "--with-threading-support-check-during-build=no". However, of course *ELPA* cannot know then whether your MPI library does provide a sufficient level of threading support or not. Thus you will have to tell configure what to do by either
- also setting "--enable-allow-thread-limiting" (see above)
- or setting "--disable-threading-support-checks" (see above, especially the warnings)
We recommend the followin in a first step you set "--with-threading-support-check-during-build=no" and "--enable-allow-thread-limiting". After a successful build you do run *ELPA* on the target machine with the environment variable "OMP_NUM_THREADS" set to 2. Now, carefully inspect the output (stdout **and** stderr). If *ELPA* does not give a warning that it will limit the number of OpenMP threads to 1 due to an insufficent level of threading support in the MPI library, you can assume that your MPI library does provide a sufficient level. Then **and only then** you can rebuild *ELPA* with the settings "--with-threading-support-check-during-build=no" and "--disable-threading-support-checks".
Note that as in case with/without MPI, you can also build and install versions of *ELPA*
with/without OpenMP support at the same time.
......
......@@ -225,14 +225,18 @@ function elpa_solve_evp_&
omp_threads_caller = omp_get_max_threads()
! check the number of threads that ELPA should use internally
#if defined(THREADING_SUPPORT_CHECK) && defined(ALLOW_THREAD_LIMITING) && !defined(HAVE_SUFFICIENT_MPI_THREADING_SUPPORT)
call obj%get("limit_openmp_threads",limitThreads,error)
if (limitThreads .eq. 0) then
#endif
call obj%get("omp_threads",nrThreads,error)
call omp_set_num_threads(nrThreads)
#if defined(THREADING_SUPPORT_CHECK) && defined(ALLOW_THREAD_LIMITING) && !defined(HAVE_SUFFICIENT_MPI_THREADING_SUPPORT)
else
nrThreads = 1
call omp_set_num_threads(nrThreads)
endif
#endif
#else
nrThreads = 1
#endif
......
......@@ -88,14 +88,18 @@
omp_threads_caller = omp_get_max_threads()
! check the number of threads that ELPA should use internally
#if defined(THREADING_SUPPORT_CHECK) && defined(ALLOW_THREAD_LIMITING) && !defined(HAVE_SUFFICIENT_MPI_THREADING_SUPPORT)
call obj%get("limit_openmp_threads",limitThreads,error)
if (limitThreads .eq. 0) then
#endif
call obj%get("omp_threads",nrThreads,error)
call omp_set_num_threads(nrThreads)
#if defined(THREADING_SUPPORT_CHECK) && defined(ALLOW_THREAD_LIMITING) && !defined(HAVE_SUFFICIENT_MPI_THREADING_SUPPORT)
else
nrThreads = 1
call omp_set_num_threads(nrThreads)
endif
#endif
#else
nrThreads=1
......
......@@ -99,14 +99,18 @@
omp_threads_caller = omp_get_max_threads()
! check the number of threads that ELPA should use internally
#if defined(THREADING_SUPPORT_CHECK) && defined(ALLOW_THREAD_LIMITING) && !defined(HAVE_SUFFICIENT_MPI_THREADING_SUPPORT)
call obj%get("limit_openmp_threads",limitThreads,error)
if (limitThreads .eq. 0) then
#endif
call obj%get("omp_threads",nrThreads,error)
call omp_set_num_threads(nrThreads)
#if defined(THREADING_SUPPORT_CHECK) && defined(ALLOW_THREAD_LIMITING) && !defined(HAVE_SUFFICIENT_MPI_THREADING_SUPPORT)
else
nrThreads = 1
call omp_set_num_threads(nrThreads)
endif
#endif
#else
nrThreads=1
#endif
......
......@@ -263,14 +263,18 @@
omp_threads_caller = omp_get_max_threads()
! check the number of threads that ELPA should use internally
#if defined(THREADING_SUPPORT_CHECK) && defined(ALLOW_THREAD_LIMITING) && !defined(HAVE_SUFFICIENT_MPI_THREADING_SUPPORT)
call obj%get("limit_openmp_threads",limitThreads,error)
if (limitThreads .eq. 0) then
#endif
call obj%get("omp_threads",nrThreads,error)
call omp_set_num_threads(nrThreads)
#if defined(THREADING_SUPPORT_CHECK) && defined(ALLOW_THREAD_LIMITING) && !defined(HAVE_SUFFICIENT_MPI_THREADING_SUPPORT)
else
nrThreads = 1
call omp_set_num_threads(nrThreads)
endif
#endif
#else
nrThreads = 1
......
......@@ -634,15 +634,22 @@ module elpa_impl
endif
endif
#ifdef WITH_OPENMP_TRADITIONAL
#if defined(WITH_OPENMP_TRADITIONAL) && defined(THREADING_SUPPORT_CHECK) && !defined(HAVE_SUFFICIENT_MPI_THREADING_SUPPORT)
! check the threading level supported by the MPI library
call mpi_query_thread(providedMPI, mpierr)
if ((providedMPI .ne. MPI_THREAD_SERIALIZED) .or. (providedMPI .ne. MPI_THREAD_MULTIPLE)) then
#if defined(ALLOW_THREAD_LIMITING)
write(error_unit,*) "WARNING elpa_setup: MPI threading level MPI_THREAD_SERALIZED or MPI_THREAD_MULTIPLE required but &
&your implementation does not support this. The number of OpenMP threads within ELPA will be &
&your implementation does not support this! The number of OpenMP threads within ELPA will be &
&limited to 1"
call self%set("limit_openmp_threads", 1, error)
if (check_elpa_set(error, ELPA_ERROR_SETUP)) return
#else
write(error_unit,*) "WARNING elpa_setup: MPI threading level MPI_THREAD_SERALIZED or MPI_THREAD_MULTIPLE required but &
&your implementation does not support this! Since you did not build elpa with &
&--enable-allow-thread-limiting, this is just a warning. ELPA will not try to cure this problem and&
&the results might be wrong. USE AT YOUR OWN RISK"
#endif
endif
#endif
......
......@@ -286,7 +286,9 @@ static const elpa_index_int_entry_t int_entries[] = {
#endif
INT_ENTRY("cannon_buffer_size", "Increasing the buffer size might make it faster, but costs memory", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
cannon_buffer_size_cardinality, cannon_buffer_size_enumerate, cannon_buffer_size_is_valid, NULL, PRINT_YES),
#if defined(THREADING_SUPPORT_CHECK) && defined(ALLOW_THREAD_LIMITING) && !defined(HAVE_SUFFICIENT_MPI_THREADING_SUPPORT)
BOOL_ENTRY("limit_openmp_threads", "Limit the number if openmp threads to 1", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_NO),
#endif
BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_REAL, PRINT_YES),
BOOL_ENTRY("timings", "Enable time measurement", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
BOOL_ENTRY("debug", "Emit verbose debugging messages", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment