...
 
Commits (55)
This diff is collapsed.
......@@ -40,6 +40,7 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/helpers/mod_precision.F90 \
src/helpers/mod_mpi.F90 \
src/helpers/mod_mpi_stubs.F90 \
src/helpers/mod_omp.F90 \
src/elpa_generated_fortran_interfaces.F90 \
src/elpa2/mod_redist_band.F90 \
src/elpa2/mod_pack_unpack_cpu.F90 \
......@@ -109,7 +110,6 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa2/kernels/complex_template.F90 \
src/elpa2/kernels/simple_template.F90 \
src/elpa2/kernels/simple_block4_template.F90 \
src/elpa2/kernels/simple_block6_template.F90 \
src/elpa2/pack_unpack_cpu.F90 \
src/elpa2/pack_unpack_gpu.F90 \
src/elpa2/compute_hh_trafo.F90 \
......@@ -194,9 +194,9 @@ if WITH_REAL_GENERIC_SIMPLE_BLOCK4_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_simple_block4.F90
endif
if WITH_REAL_GENERIC_SIMPLE_BLOCK6_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_simple_block6.F90
endif
#if WITH_REAL_GENERIC_SIMPLE_BLOCK6_KERNEL
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_simple_block6.F90
#endif
if WITH_REAL_BGP_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_bgp.f90
endif
......@@ -455,6 +455,7 @@ nobase_nodist_elpa_include_HEADERS = \
elpa/elpa_version.h \
elpa/elpa_constants.h \
elpa/elpa_generated.h \
elpa/elpa_generated_c_api.h \
elpa/elpa_generated_legacy.h
dist_man_MANS = \
......@@ -686,7 +687,8 @@ test_python.sh:
include doxygen.am
CLEANFILES = \
elpa-generated.h \
elpa_generated.h \
elpa_generated_c_api.h \
elpa1_test* \
elpa2_test*\
elpa2_real* \
......@@ -718,16 +720,14 @@ clean-local:
-rm -rf $(generated_headers)
distclean-local:
-rm -rf ./m4
-rm -rf ./src
-rm -rf ./test
-rm -rf ./modules
-rm -rf .fortran_dependencies
-rm config-f90.h
-rm -rf ./src/elpa2/kernels/.deps
-rm -rf ./src/.deps
-rm -rf ./test/.deps
-rmdir ./src/elpa2/kernels/
-rmdir ./src
-rmdir ./test
-rmdir ./m4
-rmdir modules/
-rmdir .fortran_dependencies/
-rm -rf elpa/elpa_generated_c_api.h
EXTRA_DIST = \
elpa.spec \
......@@ -790,7 +790,6 @@ EXTRA_DIST = \
src/elpa2/kernels/real_template.F90 \
src/elpa2/kernels/simple_template.F90 \
src/elpa2/kernels/simple_block4_template.F90 \
src/elpa2/kernels/simple_block6_template.F90 \
src/elpa2/pack_unpack_cpu.F90 \
src/elpa2/pack_unpack_gpu.F90 \
src/elpa2/qr/elpa_pdgeqrf_template.F90 \
......
......@@ -91,6 +91,13 @@ Nonetheless, we are grateful if you cite the following publications:
structure theory and computational science",
Journal of Physics Condensed Matter, 26 (2014)
doi:10.1088/0953-8984/26/21/213201
Kus, P; Marek, A.; Lederer, H.
"GPU Optimization of Large-Scale Eigenvalue Solver",
In: Radu F., Kumar K., Berre I., Nordbotten J., Pop I. (eds)
Numerical Mathematics and Advanced Applications ENUMATH 2017. ENUMATH 2017.
Lecture Notes in Computational Science and Engineering, vol 126. Springer, Cham
## Installation of the *ELPA* library
......
......@@ -2,24 +2,24 @@
if [ "$(hostname)" == "freya01" ]; then module purge && source /mpcdf/soft/obs_modules.sh && module load git intel/18.0.3 impi/2018.3 mkl/2018.4 anaconda/3/5.1 mpi4py/3.0.0 gcc/8 autoconf automake libtool pkg-config && unset SLURM_MPI_TYPE I_MPI_SLURM_EXT I_MPI_PMI_LIBRARY I_MPI_PMI2 I_MPI_HYDRA_BOOTSTRAP; fi
if [ "$(hostname)" == "buildtest-rzg" ]; then module load impi/5.1.3 intel/16.0 gcc/6.3 mkl/11.3 autotools pkg-config; fi
#if [ "$(hostname)" == "buildtest-rzg" ]; then module load impi/5.1.3 intel/16.0 gcc/6.3 mkl/11.3 autotools pkg-config; fi
if [ "$(hostname)" == "knl1" -o "$(hostname)" == "knl2" -o "$(hostname)" == "knl3" -o "$(hostname)" == "knl4" ] ; then module load impi/2017.4 intel/17.0 gcc/7.2 mkl/2017 pkg-config; fi
if [ "$(hostname)" == "maik" ]; then module load impi/5.1.3 intel intel/17.0 gcc/6.3 mkl/2017 pkg-config/0.29.1; fi
#if [ "$(hostname)" == "knl1" -o "$(hostname)" == "knl2" -o "$(hostname)" == "knl3" -o "$(hostname)" == "knl4" ] ; then module load impi/2017.4 intel/17.0 gcc/7.2 mkl/2017 pkg-config; fi
#if [ "$(hostname)" == "maik" ]; then module load impi/5.1.3 intel intel/17.0 gcc/6.3 mkl/2017 pkg-config/0.29.1; fi
if [ "$(hostname)" == "gp02" ] ; then module load impi/5.1.3 intel/17.0 gcc/6.3 mkl/2017 pkg-config; fi
#if [ "$(hostname)" == "gp02" ] ; then module load impi/5.1.3 intel/17.0 gcc/6.3 mkl/2017 pkg-config; fi
if [ "$(hostname)" == "amarek-elpa-gitlab-runner-1" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" == "amarek-elpa-gitlab-runner-2" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" == "amarek-elpa-gitlab-runner-3" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" == "amarek-elpa-gitlab-runner-4" ]; then module load intel/16.0 gcc mkl/11.3 autoconf automake libtool impi/5.1.3; fi
if [ "$(hostname)" == "dvl01" ]; then module load intel/17.0 gcc/6.4 mkl/2017 impi/2017.4 cuda/9.2; fi
if [ "$(hostname)" == "dvl02" ]; then module load intel/17.0 gcc/6.4 mkl/2017 impi/2017.4 cuda/9.2; fi
#if [ "$(hostname)" == "dvl01" ]; then module load intel/17.0 gcc/6.4 mkl/2017 impi/2017.4 cuda/9.2; fi
#if [ "$(hostname)" == "dvl02" ]; then module load intel/17.0 gcc/6.4 mkl/2017 impi/2017.4 cuda/9.2; fi
if [ "$(hostname)" == "miy01" ]; then module purge && module load gcc/5.4 smpi essl/5.5 cuda pgi/17.9 && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH && export OMPI_CC=gcc && export OMPI_FC=gfortran; fi
if [ "$(hostname)" == "miy02" ]; then module load gcc/5.4 pgi/17.9 ompi/pgi/17.9/1.10.2 essl/5.5 cuda && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH; fi
if [ "$(hostname)" == "miy03" ]; then module load gcc/5.4 pgi/17.9 ompi/pgi/17.9/1.10.2 essl/5.5 cuda && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH; fi
#if [ "$(hostname)" == "miy01" ]; then module purge && module load gcc/5.4 smpi essl/5.5 cuda pgi/17.9 && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH && export OMPI_CC=gcc && export OMPI_FC=gfortran; fi
#if [ "$(hostname)" == "miy02" ]; then module load gcc/5.4 pgi/17.9 ompi/pgi/17.9/1.10.2 essl/5.5 cuda && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH; fi
#if [ "$(hostname)" == "miy03" ]; then module load gcc/5.4 pgi/17.9 ompi/pgi/17.9/1.10.2 essl/5.5 cuda && export LD_LIBRARY_PATH=/opt/ibm/spectrum_mpi/lib:/opt/ibm/spectrum_mpi/profilesupport/lib:$LD_LIBRARY_PATH && export PATH=/opt/ibm/spectrum_mpi/bin:$PATH; fi
if [ "$(hostname)" != "miy01" -a "$(hostname)" != "miy02" -a "$(hostname)" != "miy03" ]; then
......
This diff is collapsed.
......@@ -13,6 +13,7 @@ configueArg=""
skipStep=0
batchCommand=""
interactiveRun="yes"
slurmBatch="no"
function usage() {
cat >&2 <<-EOF
......@@ -20,7 +21,7 @@ function usage() {
Call all the necessary steps to perform an ELPA CI test
Usage:
run_ci_tests [-c configure arguments] [-j makeTasks] [-h] [-t MPI Tasks] [-m matrix size] [-n number of eigenvectors] [-b block size] [-o OpenMP threads] [-s skipStep] [-q submit command] [-i interactive run]
run_ci_tests [-c configure arguments] [-j makeTasks] [-h] [-t MPI Tasks] [-m matrix size] [-n number of eigenvectors] [-b block size] [-o OpenMP threads] [-s skipStep] [-q submit command] [-i interactive run] [-S submit to Slurm]"
Options:
-c configure arguments
......@@ -40,7 +41,7 @@ function usage() {
-o OpenMP threads
Number of OpenMP threads used during runs of ELPA tests
-j makeTaks
-j makeTasks
Number of processes make should use during build (default 1)
-s skipStep
......@@ -52,13 +53,16 @@ function usage() {
-i interactive_run
if "yes" NO no batch command will be triggered
-S submit to slurm
if "yes" a SLURM batch job will be submitted
-h
Print this help text
EOF
}
while getopts "c:t:j:m:n:b:o:s:q:i:h" opt; do
while getopts "c:t:j:m:n:b:o:s:q:i:S:h" opt; do
case $opt in
j)
makeTasks=$OPTARG;;
......@@ -80,6 +84,8 @@ while getopts "c:t:j:m:n:b:o:s:q:i:h" opt; do
batchCommand=$OPTARG;;
i)
interactiveRun=$OPTARG;;
S)
slurmBatch=$OPTARG;;
:)
echo "Option -$OPTARG requires an argument" >&2;;
h)
......@@ -90,17 +96,127 @@ while getopts "c:t:j:m:n:b:o:s:q:i:h" opt; do
esac
done
if [ $skipStep -eq 1 ]
then
echo "Skipping the test since option -s has been specified"
exit 0
else
fi
if [ "$slurmBatch" == "yes" ]
then
# default exit code
exitCode=1
CLUSTER=""
if [[ "$HOST" =~ "cobra" ]]
then
CLUSTER="cobra"
fi
if [[ "$HOST" =~ "talos" ]]
then
CLUSTER="talos"
fi
if [[ "$HOST" =~ "freya" ]]
then
CLUSTER="freya"
fi
if [[ "$HOST" =~ "draco" ]]
then
CLUSTER="draco"
fi
echo "Running on $CLUSTER with runner $CI_RUNNER_DESCRIPTION with tag $CI_RUNNER_TAGS"
# GPU runners
if [ "$CI_RUNNER_TAGS" == "gpu" ]
then
cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node_2GPU.sh .
echo "./configure " "$configureArgs" >> ./run_${CLUSTER}_1node_2GPU.sh
echo " " >> ./run_${CLUSTER}_1node_2GPU.sh
echo "make -j 16" >> ./run_${CLUSTER}_1node_2GPU.sh
echo " " >> ./run_${CLUSTER}_1node_2GPU.sh
echo "export OMP_NUM_THREADS=$ompThreads" >> ./run_${CLUSTER}_1node_2GPU.sh
echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node_2GPU.sh
echo "make check TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" " >> ./run_${CLUSTER}_1node_2GPU.sh
echo " "
echo "Job script for the run"
cat ./run_${CLUSTER}_1node_2GPU.sh
echo " "
echo "Submitting to SLURM"
sbatch -W ./run_${CLUSTER}_1node_2GPU.sh
exitCode=$?
echo " "
echo "Exit Code of sbatch: $exitCode"
echo " "
if (( $exitCode > 0 ))
then
cat ./ELPA_CI_2gpu.err.*
fi
fi
#SSE, AVX, AVX2, and AVX-512 runners
if [[ "$CI_RUNNER_TAGS" =~ "sse" ]] || [[ "$CI_RUNNER_TAGS" =~ "avx" ]] || [[ "$CI_RUNNER_TAGS" =~ "avx2" ]] || [ ["$CI_RUNNER_TAGS" =~ "avx512" ]]
then
cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node.sh .
echo "./configure " "$configureArgs" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "make -j 16" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "export OMP_NUM_THREADS=$ompThreads" >> ./run_${CLUSTER}_1node.sh
echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node.sh
echo "make check TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" " >> ./run_${CLUSTER}_1node.sh
echo " "
echo "Job script for the run"
cat ./run_${CLUSTER}_1node.sh
echo " "
echo "Submitting to SLURM"
sbatch -W ./run_${CLUSTER}_1node.sh
exitCode=$?
echo " "
echo "Exit Code of sbatch: $exitCode"
echo " "
cat ./ELPA_CI.out.*
if [ $exitCode -ne 0 ]
then
cat ./ELPA_CI.err.*
fi
fi
if [ $exitCode -ne 0 ]
then
cat ./test-suite.log
fi
exit $exitCode
fi
# not skipped then proceed
if [ "$slurmBatch" == "no" ]
then
# this is the old, messy implementation for
# - appdev
# - freya-interactive
# - draco
# - buildtest
# - virtual machine runners
# hopefully this can be removed soon
echo "Using old CI logic for appdev"
if [ "$batchCommand" == "srun" ]
then
# use srun to start mpi jobs
if [ "$interactiveRun" == "no" ]
then
# interactive runs are not possible
echo "Running with $batchCommand with $SRUN_COMMANDLINE_CONFIGURE"
# $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=1 $SRUN_COMMANDLINE_CONFIGURE bash -c ' {source /etc/profile.d/modules.sh && source ./ci_test_scripts/ci-env-vars && eval ./configure $configureArgs; }'
# $batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=1 $SRUN_COMMANDLINE_CONFIGURE bash -c ' {source /etc/profile.d/modules.sh && source ./ci_test_scripts/ci-env-vars && eval ./configure $configureArgs; }'
$batchCommand --ntasks-per-core=1 --ntasks=1 --cpus-per-task=1 $SRUN_COMMANDLINE_CONFIGURE ./ci_test_scripts/configure_step.sh "$configureArgs"
if [ $? -ne 0 ]; then cat config.log && exit 1; fi
......@@ -112,22 +228,26 @@ else
grep -i "Expected %stop" test-suite.log && exit 1 || true ;
if [ $? -ne 0 ]; then exit 1; fi
else
#eval ./configure $configureArgs
./ci_test_scripts/configure_step.sh "$configureArgs"
# interactive runs are possible
#eval ./configure $configureArgs
./ci_test_scripts/configure_step.sh "$configureArgs"
if [ $? -ne 0 ]; then cat config.log && exit 1; fi
if [ $? -ne 0 ]; then cat config.log && exit 1; fi
make -j $makeTasks
if [ $? -ne 0 ]; then exit 1; fi
make -j $makeTasks
if [ $? -ne 0 ]; then exit 1; fi
OMP_NUM_THREADS=$ompThreads make check TASKS=$mpiTasks TEST_FLAGS="$matrixSize $nrEV $blockSize" || { cat test-suite.log; exit 1; }
if [ $? -ne 0 ]; then exit 1; fi
OMP_NUM_THREADS=$ompThreads make check TASKS=$mpiTasks TEST_FLAGS="$matrixSize $nrEV $blockSize" || { cat test-suite.log; exit 1; }
if [ $? -ne 0 ]; then exit 1; fi
grep -i "Expected %stop" test-suite.log && exit 1 || true ;
if [ $? -ne 0 ]; then exit 1; fi
grep -i "Expected %stop" test-suite.log && exit 1 || true ;
if [ $? -ne 0 ]; then exit 1; fi
fi
else
# do not use srun to start mpi applications
#eval ./configure $configureArgs
./ci_test_scripts/configure_step.sh "$configureArgs"
......@@ -142,5 +262,8 @@ else
grep -i "Expected %stop" test-suite.log && exit 1 || true ;
if [ $? -ne 0 ]; then exit 1; fi
fi
else
# a submission to SLURM via a batch script will be done
echo "Submitting to a SLURM batch system"
fi
#!/bin/bash
set -e
set -x
#some defaults
mpiTasks=2
matrixSize=150
nrEV=$matrixSize
blockSize=16
ompThreads=1
configueArg=""
batchCommand=""
slurmBatch="no"
function usage() {
cat >&2 <<-EOF
Call all the necessary steps to perform an ELPA CI test
Usage:
run_distcheck_tests [-c configure arguments] [-h] [-t MPI Tasks] [-m matrix size] [-n number of eigenvectors] [-b block size] [-o OpenMP threads] [-q submit command] [-S submit to Slurm]"
Options:
-c configure arguments
Line of arguments passed to configure call
-t MPI Tasks
Number of MPI processes used during test runs of ELPA tests
-m Matrix size
Size of the mxm matrix used during runs of ELPA tests
-n Number of eigenvectors
Number of eigenvectors to be computed during runs of ELPA tests
-b block size
Block size of block-cyclic distribution during runs of ELPA tests
-o OpenMP threads
Number of OpenMP threads used during runs of ELPA tests
-q submit command
Job steps will be submitted via command to a batch system (default no submission)
-S submit to slurm
if "yes" a SLURM batch job will be submitted
-h
Print this help text
EOF
}
while getopts "c:t:j:m:n:b:o:s:q:i:S:h" opt; do
case $opt in
t)
mpiTasks=$OPTARG;;
m)
matrixSize=$OPTARG;;
n)
nrEV=$OPTARG;;
b)
blockSize=$OPTARG;;
o)
ompThreads=$OPTARG;;
c)
configureArgs=$OPTARG;;
q)
batchCommand=$OPTARG;;
S)
slurmBatch=$OPTARG;;
:)
echo "Option -$OPTARG requires an argument" >&2;;
h)
usage
exit 1;;
*)
exit 1;;
esac
done
if [ "$slurmBatch" == "yes" ]
then
# default exit code
exitCode=1
CLUSTER=""
if [[ "$HOST" =~ "cobra" ]]
then
CLUSTER="cobra"
fi
if [[ "$HOST" =~ "talos" ]]
then
CLUSTER="talos"
fi
if [[ "$HOST" =~ "freya" ]]
then
CLUSTER="freya"
fi
if [[ "$HOST" =~ "draco" ]]
then
CLUSTER="draco"
fi
echo "Running on $CLUSTER with runner $CI_RUNNER_DESCRIPTION with tag $CI_RUNNER_TAGS"
#distcheck
if [[ "$CI_RUNNER_TAGS" =~ "distcheck" ]]
then
cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node.sh .
echo "./configure " "$configureArgs" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node.sh
echo "export DISTCHECK_CONFIGURE_FLAGS=\" $configureArgs \" " >> ./run_${CLUSTER}_1node.sh
echo "make distcheck TEST_FLAGS=\" $matrixSize $nrEV $blockSize \" || { chmod u+rwX -R . ; exit 1 ; } " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " "
echo "Job script for the run"
cat ./run_${CLUSTER}_1node.sh
echo " "
echo "Submitting to SLURM"
sbatch -W ./run_${CLUSTER}_1node.sh
exitCode=$?
echo " "
echo "Exit Code of sbatch: $exitCode"
echo " "
cat ./ELPA_CI.out.*
if [ $exitCode -ne 0 ]
then
cat ./ELPA_CI.err.*
fi
fi
if [ $exitCode -ne 0 ]
then
cat ./test-suite.log
fi
exit $exitCode
fi
#!/bin/bash
set -e
set -x
#some defaults
mpiTasks=2
matrixSize=150
nrEV=$matrixSize
blockSize=16
ompThreads=1
configueArg=""
batchCommand=""
slurmBatch="no"
projectName="unknown"
projectExecutable=""
projectConfigureArg=""
function usage() {
cat >&2 <<-EOF
Call all the necessary steps to perform an ELPA CI test
Usage:
run_project_tests [-c configure arguments] [-h] [-t MPI Tasks] [-m matrix size] [-n number of eigenvectors] [-b block size] [-o OpenMP threads] [-q submit command] [-S submit to Slurm] [-p projectName] [-e projectExecutable] [-C project configure arguments]"
Options:
-c configure arguments
Line of arguments passed to configure call
-t MPI Tasks
Number of MPI processes used during test runs of ELPA tests
-m Matrix size
Size of the mxm matrix used during runs of ELPA tests
-n Number of eigenvectors
Number of eigenvectors to be computed during runs of ELPA tests
-b block size
Block size of block-cyclic distribution during runs of ELPA tests
-o OpenMP threads
Number of OpenMP threads used during runs of ELPA tests
-q submit command
Job steps will be submitted via command to a batch system (default no submission)
-S submit to slurm
if "yes" a SLURM batch job will be submitted
-p project name
specifies which project to build and test
-e project executable
specifies which executable to run
-C project configure arguments
arguments for the configure of the project
-h
Print this help text
EOF
}
while getopts "c:t:j:m:n:b:o:s:q:i:S:p:e:C:h" opt; do
case $opt in
t)
mpiTasks=$OPTARG;;
m)
matrixSize=$OPTARG;;
n)
nrEV=$OPTARG;;
b)
blockSize=$OPTARG;;
o)
ompThreads=$OPTARG;;
c)
configureArgs=$OPTARG;;
q)
batchCommand=$OPTARG;;
S)
slurmBatch=$OPTARG;;
p)
projectName=$OPTARG;;
e)
projectExecutable=$OPTARG;;
C)
projectConfigureArgs=$OPTARG;;
:)
echo "Option -$OPTARG requires an argument" >&2;;
h)
usage
exit 1;;
*)
exit 1;;
esac
done
if [ "$slurmBatch" == "yes" ]
then
# default exit code
exitCode=1
CLUSTER=""
if [[ "$HOST" =~ "cobra" ]]
then
CLUSTER="cobra"
fi
if [[ "$HOST" =~ "talos" ]]
then
CLUSTER="talos"
fi
if [[ "$HOST" =~ "freya" ]]
then
CLUSTER="freya"
fi
if [[ "$HOST" =~ "draco" ]]
then
CLUSTER="draco"
fi
echo "Running on $CLUSTER with runner $CI_RUNNER_DESCRIPTION with tag $CI_RUNNER_TAGS"
#project_test
if [[ "$CI_RUNNER_TAGS" =~ "project_test" ]]
then
cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node.sh .
echo "mkdir -p build" >> ./run_${CLUSTER}_1node.sh
echo "pushd build" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "Running autogen " >> ./run_${CLUSTER}_1node.sh
echo "../autogen.sh" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "Running configure " >> ./run_${CLUSTER}_1node.sh
echo "../configure " "$configureArgs" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "export TASKS=$mpiTasks" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "Running make " >> ./run_${CLUSTER}_1node.sh
echo "make -j 8" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "Running make install" >> ./run_${CLUSTER}_1node.sh
echo "make install" >> ./run_${CLUSTER}_1node.sh
echo "popd" >> ./run_${CLUSTER}_1node.sh
echo "mkdir -p $projectName/build" >> ./run_${CLUSTER}_1node.sh
echo "pushd $projectName/build" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " Testting project " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "Running autogen " >> ./run_${CLUSTER}_1node.sh
echo "../autogen.sh" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "Running configure " >> ./run_${CLUSTER}_1node.sh
echo "../configure " "$projectConfigureArgs " >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo "Running make " >> ./run_${CLUSTER}_1node.sh
echo "make -j 8" >> ./run_${CLUSTER}_1node.sh
echo "export LD_LIBRARY_PATH=$MKL_HOME/lib/intel64:\$LD_LIBRARY_PATH" >> ./run_${CLUSTER}_1node.sh
echo "./$projectExecutable" >> ./run_${CLUSTER}_1node.sh
echo "make distclean" >> ./run_${CLUSTER}_1node.sh
echo "popd" >> ./run_${CLUSTER}_1node.sh
echo "pushd build" >> ./run_${CLUSTER}_1node.sh
echo "make distclean" >> ./run_${CLUSTER}_1node.sh
echo "rm -rf installdest" >> ./run_${CLUSTER}_1node.sh
echo "popd" >> ./run_${CLUSTER}_1node.sh
echo " " >> ./run_${CLUSTER}_1node.sh
echo " "
echo "Job script for the run"
cat ./run_${CLUSTER}_1node.sh
echo " "
echo "Submitting to SLURM"
sbatch -W ./run_${CLUSTER}_1node.sh
exitCode=$?
echo " "
echo "Exit Code of sbatch: $exitCode"
echo " "
cat ./ELPA_CI.out.*
if [ $exitCode -ne 0 ]
then
cat ./ELPA_CI.err.*
fi
fi
if [ $exitCode -ne 0 ]
then
cat ./test-suite.log
fi
exit $exitCode
fi
......@@ -68,6 +68,24 @@ if test x"${enable_legacy}" = x"yes"; then
AC_DEFINE([ENABLE_LEGACY], [1], [enable legacy interface])
fi
AC_MSG_CHECKING(whether in C interface the error argument should be optional)
AC_ARG_ENABLE([optional-argument-in-C-API],
AS_HELP_STRING([--enable-optional-argument-in-C-API],
[do not build C API with error argument as optional, default no]),
[
if test x"$enableval" = x"yes"; then
optional_c_error_argument=yes
else
optional_c_error_argument=no
fi
],
[optional_c_error_argument=no])
AC_MSG_RESULT([${optional_c_error_argument}])
AM_CONDITIONAL([OPTIONAL_C_ERROR_ARGUMENT],[test x"$enable_legacy" = x"yes"])
if test x"${optional_c_error_argument}" = x"yes"; then
AC_DEFINE([OPTIONAL_C_ERROR_ARGUMENT], [1], [enable error argument in C-API to be optional])
fi
# gnu-make fortran module dependencies
m4_include([fdep/fortran_dependencies.m4])
......@@ -512,7 +530,7 @@ dnl Test possibility of 'use mpi', if requested
if test x"${with_mpi}" = x"yes" ; then
AC_ARG_ENABLE([mpi-module],
AS_HELP_STRING([--disable-mpi-module],
[do not use the Fortran MPI module, get interfaces by 'include "mpif.h')]),
[do not use the Fortran MPI module, get interfaces by 'include "mpif.h']),
[
if test x"$enableval" = x"yes"; then
enable_mpi_module=yes
......@@ -550,7 +568,6 @@ m4_define(elpa_m4_generic_kernels, [
real_generic
real_generic_simple
real_generic_simple_block4
real_generic_simple_block6
complex_generic
complex_generic_simple
])
......@@ -1455,5 +1472,11 @@ if test x"$enable_kcomputer" = x"yes" ; then
echo "call: make -f ../generated_headers.am generated-headers top_srcdir=.."
echo "BEFORE triggering the build with make!"
else
if test x"$optional_c_error_argument" = x"yes" ; then
echo "#define OPTIONAL_C_ERROR_ARGUMENT" > elpa/elpa_generated_c_api.h
else
echo "#undef OPTIONAL_C_ERROR_ARGUMENT" > elpa/elpa_generated_c_api.h
fi
make -f $srcdir/generated_headers.am generated-headers top_srcdir="$srcdir" CPP="$CPP"
fi
......@@ -14,6 +14,7 @@ typedef struct elpa_autotune_struct *elpa_autotune_t;
#include <elpa/elpa_constants.h>
#include <elpa/elpa_generated_c_api.h>
#include <elpa/elpa_generated.h>
#include <elpa/elpa_generic.h>
......
......@@ -47,8 +47,7 @@ enum ELPA_SOLVERS {
X(ELPA_2STAGE_REAL_VSX_BLOCK2, 22, @ELPA_2STAGE_REAL_VSX_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK4, 23, @ELPA_2STAGE_REAL_VSX_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_VSX_BLOCK6, 24, @ELPA_2STAGE_REAL_VSX_BLOCK6_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4, 25, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6, 26, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6_COMPILED@, __VA_ARGS__)
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4, 25, @ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4_COMPILED@, __VA_ARGS__)
#define ELPA_FOR_ALL_2STAGE_REAL_KERNELS_AND_DEFAULT(X) \
ELPA_FOR_ALL_2STAGE_REAL_KERNELS(X) \
......
......@@ -227,6 +227,7 @@ for lang, m, g, q, t, p, d, s, lay, spl in product(sorted(language_flag.keys()),
print("endif\n" * endifs)
for lang, p, d in product(sorted(language_flag.keys()), sorted(prec_flag.keys()), sorted(domain_flag.keys())):
endifs = 0
if (p == "single"):
......@@ -286,3 +287,12 @@ print(" " + " \\\n ".join([
prec_flag['double']]))
print("endif")
name = "test_split_comm_real_double"
print("check_SCRIPTS += " + name + "_extended.sh")
print("noinst_PROGRAMS += " + name)
print(name + "_SOURCES = test/Fortran/test_split_comm.F90")
print(name + "_LDADD = $(test_program_ldadd)")
print(name + "_FCFLAGS = $(test_program_fcflags) \\")
print(" " + " \\\n ".join([
domain_flag['real'],
prec_flag['double']]))
......@@ -22,6 +22,22 @@ elpa/elpa_generated.h: $(top_srcdir)/src/elpa_impl.F90 \
$(top_srcdir)/src/elpa_api.F90 | elpa
@rm -f $@
$(call extract_interface,!c>)
$(call extract_interface,!c_o>)
$(call extract_interface,!c_no>)
#if OPTIONAL_C_ERROR_ARGUMENT
#
#elpa/elpa_generated.h: $(top_srcdir)/src/elpa_impl.F90 \
# $(top_srcdir)/src/elpa_impl_math_template.F90 \
# $(top_srcdir)/src/elpa_api.F90 | elpa
# $(call extract_interface,!c_o>)
#
#else
#elpa/elpa_generated.h: $(top_srcdir)/src/elpa_impl.F90 \
# $(top_srcdir)/src/elpa_impl_math_template.F90 \
# $(top_srcdir)/src/elpa_api.F90 | elpa
# $(call extract_interface,!c_no>)
#endif
generated_headers += elpa/elpa_generated_legacy.h
elpa/elpa_generated_legacy.h: $(top_srcdir)/src/elpa_driver/legacy_interface/elpa_driver_c_interface.F90 \
......
......@@ -2,5 +2,5 @@
import sys
import os
args = filter(lambda q : q != "-Xcompiler", sys.argv[1:])
args = [q for q in sys.argv[1:] if q != "-Xcompiler"]
os.execvp(args[0], args[0:])
......@@ -66,9 +66,8 @@ function elpa_solve_evp_&
use elpa_abstract_impl
use elpa_mpi
use elpa1_compute
#ifdef WITH_OPENMP
use omp_lib
#endif
use elpa_omp
implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
......@@ -122,7 +121,11 @@ function elpa_solve_evp_&
&")
#ifdef WITH_OPENMP
!nrThreads = omp_get_max_threads()
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
omp_threads_caller = omp_get_max_threads()
! check the number of threads that ELPA should use internally
call obj%get("omp_threads",nrThreads,error)
call omp_set_num_threads(nrThreads)
#else
......@@ -156,6 +159,13 @@ function elpa_solve_evp_&
if (.not.(obj%eigenvalues_only)) then
q(1,1) = ONE
endif
! restore original OpenMP settings
#ifdef WITH_OPENMP
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller)
#endif
call obj%timer%stop("elpa_solve_evp_&
&MATH_DATATYPE&
&_1stage_&
......@@ -411,6 +421,13 @@ function elpa_solve_evp_&
endif
endif
! restore original OpenMP settings
#ifdef WITH_OPENMP
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller)
#endif
call obj%timer%stop("elpa_solve_evp_&
&MATH_DATATYPE&
&_1stage_&
......
......@@ -62,9 +62,10 @@
use precision
use elpa_abstract_impl
implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: n
real(kind=REAL_DATATYPE) :: v(n),s
real(kind=rk) :: v(n),s
v(:) = v(:) + s
end subroutine v_add_s_&
......@@ -76,9 +77,10 @@
use precision
use elpa_abstract_impl
implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
real(kind=REAL_DATATYPE) :: g_col(nlen), l_col(*) ! chnage this to proper 2d 1d matching ! remove assumed size
real(kind=rk) :: g_col(nlen), l_col(*) ! chnage this to proper 2d 1d matching ! remove assumed size
integer(kind=ik) :: noff, nlen, my_prow, np_rows, nblk
integer(kind=ik) :: nbs, nbe, jb, g_off, l_off, js, je
......@@ -160,10 +162,10 @@
#include "../../src/general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: n, i
real(kind=REAL_DATATYPE) :: d(n), z(n), delta(n), rho, dlam
real(kind=rk) :: d(n), z(n), delta(n), rho, dlam
integer(kind=ik) :: iter
real(kind=REAL_DATATYPE) :: a, b, x, y, dshift
real(kind=rk) :: a, b, x, y, dshift
! In order to obtain sufficient numerical accuracy we have to shift the problem
! either by d(i) or d(i+1), whichever is closer to the solution
......@@ -261,24 +263,25 @@
use precision
use elpa_abstract_impl
implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
logical, intent(in) :: wantDebug
#if REALCASE == 1
real(kind=REAL_DATATYPE), intent(inout) :: alpha
real(kind=rk), intent(inout) :: alpha
#endif
#if COMPLEXCASE == 1
complex(kind=COMPLEX_DATATYPE), intent(inout) :: alpha
complex(kind=ck), intent(inout) :: alpha
#endif
real(kind=REAL_DATATYPE), intent(in) :: xnorm_sq
real(kind=rk), intent(in) :: xnorm_sq
#if REALCASE == 1
real(kind=REAL_DATATYPE), intent(out) :: xf, tau
real(kind=rk), intent(out) :: xf, tau
#endif
#if COMPLEXCASE == 1
complex(kind=COMPLEX_DATATYPE), intent(out) :: xf, tau
real(kind=REAL_DATATYPE) :: ALPHR, ALPHI
complex(kind=ck), intent(out) :: xf, tau
real(kind=rk) :: ALPHR, ALPHI
#endif
real(kind=REAL_DATATYPE) :: BETA
real(kind=rk) :: BETA
if (wantDebug) call obj%timer%start("hh_transform_&
&MATH_DATATYPE&
......@@ -286,29 +289,29 @@
&PRECISION_SUFFIX )
#if COMPLEXCASE == 1
ALPHR = real( ALPHA, kind=REAL_DATATYPE )
ALPHR = real( ALPHA, kind=rk )
ALPHI = PRECISION_IMAG( ALPHA )
#endif
#if REALCASE == 1
if ( XNORM_SQ==0. ) then
if ( XNORM_SQ==0.0_rk ) then
#endif
#if COMPLEXCASE == 1
if ( XNORM_SQ==0. .AND. ALPHI==0. ) then
if ( XNORM_SQ==0.0_rk .AND. ALPHI==0.0_rk ) then
#endif
#if REALCASE == 1
if ( ALPHA>=0. ) then
if ( ALPHA>=0.0_rk ) then
#endif
#if COMPLEXCASE == 1
if ( ALPHR>=0. ) then
if ( ALPHR>=0.0_rk ) then
#endif
TAU = 0.
TAU = 0.0_rk
else
TAU = 2.
TAU = 2.0_rk
ALPHA = -ALPHA
endif
XF = 0.
XF = 0.0_rk
else
......@@ -327,8 +330,8 @@
ALPHA = XNORM_SQ / ALPHA
#endif
#if COMPLEXCASE == 1
ALPHR = ALPHI * (ALPHI/real( ALPHA , kind=KIND_PRECISION))
ALPHR = ALPHR + XNORM_SQ/real( ALPHA, kind=KIND_PRECISION )
ALPHR = ALPHI * (ALPHI/real( ALPHA , kind=rk))
ALPHR = ALPHR + XNORM_SQ/real( ALPHA, kind=rk )
#endif
#if REALCASE == 1
......@@ -340,7 +343,7 @@
ALPHA = PRECISION_CMPLX( -ALPHR, ALPHI )
#endif
END IF
XF = 1.0/ALPHA
XF = 1.0_rk/ALPHA
ALPHA = BETA
endif
......
......@@ -102,9 +102,8 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
use precision
use elpa_abstract_impl
use matrix_plot
#ifdef WITH_OPENMP
use omp_lib
#endif
use elpa_omp
implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
......
......@@ -48,9 +48,8 @@
use elpa_mpi
use precision
use elpa_abstract_impl
#ifdef WITH_OPENMP
use omp_lib
#endif
use elpa_omp
implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
......@@ -80,7 +79,11 @@
&")
#ifdef WITH_OPENMP
!nrThreads=omp_get_max_threads()
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
omp_threads_caller = omp_get_max_threads()
! check the number of threads that ELPA should use internally
call obj%get("omp_threads",nrThreads,error)
call omp_set_num_threads(nrThreads)
#else
......@@ -333,6 +336,14 @@
a(l_row1:l_rows,l_col1) = 0
endif
enddo
! restore original OpenMP settings
#ifdef WITH_OPENMP
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller)
#endif
call obj%timer%stop("elpa_cholesky_&
&MATH_DATATYPE&
&_&
......
......@@ -63,9 +63,7 @@
&_impl
use precision
use elpa_abstract_impl
#ifdef WITH_OPENMP
use omp_lib
#endif
use elpa_omp
implicit none
class(elpa_abstract_impl_t), intent(inout) :: obj
......@@ -95,7 +93,12 @@
matrixCols = obj%local_ncols
#ifdef WITH_OPENMP
!nrThreads=omp_get_max_threads()
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
omp_threads_caller = omp_get_max_threads()
! check the number of threads that ELPA should use internally
call obj%get("omp_threads",nrThreads,error)
#else
nrThreads=1
......@@ -130,6 +133,15 @@
mpi_comm_rows, mpi_comm_cols,.false., wantDebug, success, &
nrThreads)
! restore original OpenMP settings
#ifdef WITH_OPENMP
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller)
#endif
call obj%timer%stop("elpa_solve_tridi_public_&
&MATH_DATATYPE&
&_&
......
......@@ -120,12 +120,6 @@
stop
endif
call e%set("legacy_api", 1, error)
if (error .ne. ELPA_OK) then
print *,"Problem setting option. Aborting..."
stop 1
endif
!! the elpa object needs nev to be set (in case the EVP-solver is
!! called later. Thus it is set by user, do nothing, otherwise,
!! set it to na as default
......@@ -133,6 +127,8 @@
! call e%set("nev", na)
!endif
call e%creating_from_legacy_api()
if (e%setup() .ne. ELPA_OK) then
print *, "Cannot setup ELPA instance"
success = .false.
......
......@@ -143,12 +143,8 @@
print *,"Problem setting option. Aborting..."
stop
endif
call e%set("legacy_api", 1, error)
if (error .ne. ELPA_OK) then
print *,"Problem setting option. Aborting..."
stop
endif
call e%creating_from_legacy_api()
if (e%setup() .ne. ELPA_OK) then
print *, "Cannot setup ELPA instance"
......@@ -156,6 +152,7 @@
return
endif
if (wantDebug) then
call e%set("debug",1, error)
if (error .ne. ELPA_OK) then
......
......@@ -158,11 +158,7 @@
stop
endif
call e%set("legacy_api", 1, error)
if (error .ne. ELPA_OK) then
print *,"Problem setting option. Aborting..."
stop 1
endif
call e%creating_from_legacy_api()
if (e%setup() .ne. ELPA_OK) then
print *, "Cannot setup ELPA instance"
......
......@@ -62,9 +62,8 @@
&_impl
use elpa
use elpa_abstract_impl
#ifdef WITH_OPENMP
use omp_lib
#endif
use elpa_omp
implicit none
integer(kind=ik) :: na, nev, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
real(kind=REAL_DATATYPE) :: d(na), e(na)
......@@ -135,11 +134,8 @@
print *,"Problem setting option. Aborting..."
stop
endif
call obj%set("legacy_api", 1, error)
if (error .ne. ELPA_OK) then
print *,"Problem setting option. Aborting..."
stop
endif
call obj%creating_from_legacy_api()
if (obj%setup() .ne. ELPA_OK) then
print *, "Cannot setup ELPA instance"
......@@ -148,6 +144,12 @@
endif
#ifdef WITH_OPENMP
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
omp_threads_caller = omp_get_max_threads()
! check the number of threads that ELPA should use internally
nrThreads = omp_get_max_threads()
call obj%set("omp_threads", nrThreads, error)
#else
......@@ -167,6 +169,12 @@
if (error /= ELPA_OK) then
print *, "Cannot run solve_tridi"
success = .false.
! restore original OpenMP settings
#ifdef WITH_OPENMP
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller)
#endif
return
else
success = .true.
......@@ -184,6 +192,13 @@
! stop
endif
! restore original OpenMP settings
#ifdef WITH_OPENMP
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller)
#endif
!call timer%stop("elpa_solve_tridi_&
!&PRECISION&
!&_legacy_interface")
......
......@@ -81,9 +81,9 @@
use real_generic_simple_block4_kernel !, only : double_hh_trafo_generic_simple
#endif
#if defined(WITH_REAL_GENERIC_SIMPLE_BLOCK6_KERNEL) && !(defined(USE_ASSUMED_SIZE))
use real_generic_simple_block6_kernel !, only : double_hh_trafo_generic_simple
#endif
!#if defined(WITH_REAL_GENERIC_SIMPLE_BLOCK6_KERNEL) && !(defined(USE_ASSUMED_SIZE))
! use real_generic_simple_block6_kernel !, only : double_hh_trafo_generic_simple
!#endif
#if defined(WITH_REAL_GENERIC_KERNEL) && !(defined(USE_ASSUMED_SIZE))
use real_generic_kernel !, only : double_hh_trafo_generic
......@@ -1450,36 +1450,36 @@
#ifdef WITH_OPENMP
#ifdef USE_ASSUMED_SIZE
!#ifdef USE_ASSUMED_SIZE
call hexa_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple_6hv_&
&PRECISION&
& (a(1,j+off+a_off-5,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
#else
call hexa_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple_6hv_&
&PRECISION&
& (a(1:stripe_width,j+off+a_off-5:j+off+a_off-1,istripe,my_thread), w(1:nbw,1:6), &
nbw, nl, stripe_width, nbw)
#endif
!#else
! call hexa_hh_trafo_&
! &MATH_DATATYPE&
! &_generic_simple_6hv_&
! &PRECISION&
! & (a(1:stripe_width,j+off+a_off-5:j+off+a_off-1,istripe,my_thread), w(1:nbw,1:6), &
! nbw, nl, stripe_width, nbw)
!#endif
#else /* WITH_OPENMP */
#ifdef USE_ASSUMED_SIZE
!#ifdef USE_ASSUMED_SIZE
call hexa_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple_6hv_&
&PRECISION&
& (a(1,j+off+a_off-5,istripe), w, nbw, nl, stripe_width, nbw)
#else
call hexa_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple_6hv_&
&PRECISION&
& (a(1:stripe_width,j+off+a_off-5:j+off+a_off+nbw-1,istripe), w(1:nbw,1:6), &
nbw, nl, stripe_width, nbw)
#endif
!#else
! call hexa_hh_trafo_&
! &MATH_DATATYPE&
! &_generic_simple_6hv_&
! &PRECISION&
! & (a(1:stripe_width,j+off+a_off-5:j+off+a_off+nbw-1,istripe), w(1:nbw,1:6), &
! nbw, nl, stripe_width, nbw)
!#endif
#endif /* WITH_OPENMP */
enddo
do jj = j, 4, -4
......
......@@ -63,9 +63,8 @@
use elpa_mpi
use cuda_functions
use mod_check_for_gpu
#ifdef WITH_OPENMP
use omp_lib
#endif
use elpa_omp
use iso_c_binding
implicit none
#include "../general/precision_kinds.F90"
......@@ -150,7 +149,11 @@
#ifdef WITH_OPENMP
!nrThreads = omp_get_max_threads()
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
omp_threads_caller = omp_get_max_threads()
! check the number of threads that ELPA should use internally
call obj%get("omp_threads",nrThreads,error)
call omp_set_num_threads(nrThreads)
#else
......@@ -209,6 +212,14 @@
if (.not.(obj%eigenvalues_only)) then
q(1,1) = ONE
endif
! restore original OpenMP settings
#ifdef WITH_OPENMP
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller)
#endif
call obj%timer%stop("elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
......@@ -779,6 +790,13 @@
endif
endif
! restore original OpenMP settings
#ifdef WITH_OPENMP
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller)
#endif
call obj%timer%stop("elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
......
......@@ -228,7 +228,7 @@
stop 1
endif
ab = 0 ! needed for lower half, the extra block should also be set to 0 for safety
ab = 0.0_rck ! needed for lower half, the extra block should also be set to 0 for safety
! n_off: Offset of ab within band
n_off = block_limits(my_pe)*nb
......@@ -470,7 +470,7 @@
#else
vnorm2 = sum(real(ab(3:n+1,na_s-n_off),kind=rk4)**2+aimag(ab(3:n+1,na_s-n_off))**2)
#endif
if (n<2) vnorm2 = 0. ! Safety only
if (n<2) vnorm2 = 0.0_rk ! Safety only
#endif /* COMPLEXCASE */
call hh_transform_&
......
......@@ -60,18 +60,18 @@
#endif
#include "config-f90.h"
#ifndef USE_ASSUMED_SIZE
module real_generic_simple_block6_kernel
private
public hexa_hh_trafo_real_generic_simple_6hv_double
#ifdef WANT_SINGLE_PRECISION_REAL
public hexa_hh_trafo_real_generic_simple_6hv_single
#endif
contains
#endif
!#ifndef USE_ASSUMED_SIZE
!module real_generic_simple_block6_kernel
!
! private
! public hexa_hh_trafo_real_generic_simple_6hv_double
!
!#ifdef WANT_SINGLE_PRECISION_REAL
! public hexa_hh_trafo_real_generic_simple_6hv_single
!#endif
!
! contains
!#endif
#define REALCASE 1
#define DOUBLE_PRECISION 1
......@@ -89,7 +89,7 @@ module real_generic_simple_block6_kernel
#undef SINGLE_PRECISION
#endif
#ifndef USE_ASSUMED_SIZE
end module real_generic_simple_block6_kernel
#endif
!#ifndef USE_ASSUMED_SIZE
!end module real_generic_simple_block6_kernel
!#endif
! --------------------------------------------------------------------------------------------------
......@@ -56,7 +56,7 @@ module elpa2_utilities
implicit none
public