There is a maintenance of MPCDF Gitlab on Thursday, April 22st 2020, 9:00 am CEST - Expect some service interruptions during this time

Allow ELPA to be build with single and double precision symbols in one

library

It the configure option "--enable-single-precision" is specified,
ELPA will also be build for single precision usage. The double precision
and single precision will be available at the same time with names
"solve_evp_real_1stage_double" or "solve_evp_real_1stage_single" and
so on...

This change immplied some major refactoring of the ELPA code:
1.) functions/procedures had to be renamed with suffix "_double"

2.) If necessary the same functions have to be available with suffix
"_single"

3.) Variable kind definitions have to be consistent with the
intented use

To avoid uneccessary code duplication this is done (most of the time)
with preprocessor string substitution.

The documentation has been updated.

NOT SUPPORTED are at the moment:

- single precision usage of ELPA2 with kernels, others than "generic"
  and "generic_simple"

- single precision usage of GPU
parent 0f63f761
......@@ -765,7 +765,7 @@ WARN_LOGFILE =
# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
# Note: If this tag is empty the current directory is searched.
INPUT = @top_srcdir@/src @top_srcdir@/test @builddir@/elpa
INPUT = @top_srcdir@/src @top_srcdir@/test @builddir@/elpa @builddir@/config-f90.h
# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
......@@ -2014,7 +2014,7 @@ ENABLE_PREPROCESSING = YES
# The default value is: NO.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
MACRO_EXPANSION = NO
MACRO_EXPANSION = YES
# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
# the macro expansion is limited to the macros specified with the PREDEFINED and
......@@ -2036,7 +2036,7 @@ SEARCH_INCLUDES = YES
# preprocessor.
# This tag requires that the tag SEARCH_INCLUDES is set to YES.
INCLUDE_PATH =
INCLUDE_PATH = @builddir@ @builddir@/elpa
# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
# patterns (like *.h and *.hpp) to filter out the header-files in the
......
......@@ -29,7 +29,7 @@ libelpa@SUFFIX@_la_SOURCES = src/mod_precision.F90 \
src/elpa2.F90 \
src/elpa_c_interface.F90 \
src/elpa_qr/qr_utils.F90 \
src/elpa_qr/elpa_qrkernels.f90 \
src/elpa_qr/elpa_qrkernels.F90 \
src/elpa_qr/elpa_pdlarfb.F90 \
src/elpa_qr/elpa_pdgeqrf.F90
if HAVE_DETAILED_TIMINGS
......@@ -117,15 +117,28 @@ nobase_elpa_include_HEADERS = $(wildcard modules/*)
nobase_elpa_include_HEADERS += elpa/elpa.h elpa/elpa_kernel_constants.h elpa/elpa_generated.h
man_MANS = man/solve_evp_real.3 \
man/solve_evp_real_1stage.3 \
man/solve_evp_real_1stage_double.3 \
man/solve_evp_complex.3 \
man/solve_evp_complex_1stage.3 \
man/solve_evp_real_2stage.3 \
man/solve_evp_complex_2stage.3 \
man/solve_evp_complex_1stage_double.3 \
man/solve_evp_real_2stage_double.3 \
man/solve_evp_complex_2stage_double.3 \
man/get_elpa_row_col_comms.3 \
man/get_elpa_communicators.3 \
man/print_available_elpa2_kernels.1
if WANT_SINGLE_PRECISION_REAL
man_MANS += man/solve_evp_real_1stage_single.3 \
man/solve_evp_real_2stage_single.3
endif
if WANT_SINGLE_PRECISION_COMPLEX
man_MANS += man/solve_evp_complex_1stage_single.3 \
man/solve_evp_complex_2stage_single.3
endif
# other files to distribute
filesdir = $(docdir)/examples
dist_files_DATA = \
......@@ -142,6 +155,34 @@ dist_files_DATA = \
test/fortran_test_programs/test_real_with_c.F90 \
src/print_available_elpa2_kernels.F90
if WANT_SINGLE_PRECISION_COMPLEX
dist_files_DATA += test/fortran_test_programs/test_complex2_single_precision.F90 \
test/fortran_test_programs/test_complex2_default_kernel_single_precision.F90 \
test/fortran_test_programs/test_complex2_choose_kernel_with_api_single_precision.F90 \
test/fortran_test_programs/test_complex_single_precision.F90
endif
if WANT_SINGLE_PRECISION_REAL
dist_files_DATA += test/fortran_test_programs/test_real2_single_precision.F90 \
test/fortran_test_programs/test_real2_default_kernel_single_precision.F90 \
test/fortran_test_programs/test_real2_default_kernel_qr_decomposition_single_precision.F90 \
test/fortran_test_programs/test_real2_choose_kernel_with_api_single_precision.F90 \
test/fortran_test_programs/test_real_single_precision.F90
endif
if WITH_GPU_VERSION
dist_files_DATA += test/fortran_test_programs/test_real2_gpu_version.F90 \
test/fortran_test_programs/test_complex2_gpu_version.F90
if WANT_SINGLE_PRECISION_REAL
dist_files_DATA += test/fortran_test_programs/test_real2_gpu_version_single_precision.F90
endif
if WANT_SINGLE_PRECISION_COMPLEX
dist_files_DATA += test/fortran_test_programs/test_complex2_gpu_version_single_precision.F90
endif
endif
dist_doc_DATA = README COPYING/COPYING COPYING/gpl.txt COPYING/lgpl.txt
# pkg-config stuff
......@@ -156,6 +197,18 @@ bin_PROGRAMS = \
elpa2_test_complex@SUFFIX@ \
elpa2_print_kernels@SUFFIX@
if WANT_SINGLE_PRECISION_COMPLEX
bin_PROGRAMS += \
elpa1_test_complex_single_precision@SUFFIX@ \
elpa2_test_complex_single_precision@SUFFIX@
endif
if WANT_SINGLE_PRECISION_REAL
bin_PROGRAMS += \
elpa1_test_real_single_precision@SUFFIX@ \
elpa2_test_real_single_precision@SUFFIX@
endif
noinst_PROGRAMS = \
elpa2_test_real_default_kernel@SUFFIX@ \
elpa2_test_real_default_kernel_qr_decomposition@SUFFIX@ \
......@@ -168,6 +221,36 @@ noinst_PROGRAMS = \
elpa2_test_complex_c_version@SUFFIX@ \
elpa1_test_real_with_c@SUFFIX@
if WANT_SINGLE_PRECISION_COMPLEX
noinst_PROGRAMS += \
elpa2_test_complex_default_kernel_single_precision@SUFFIX@ \
elpa2_test_complex_choose_kernel_with_api_single_precision@SUFFIX@
endif
if WANT_SINGLE_PRECISION_REAL
noinst_PROGRAMS += \
elpa2_test_real_default_kernel_single_precision@SUFFIX@ \
elpa2_test_real_default_kernel_qr_decomposition_single_precision@SUFFIX@ \
elpa2_test_real_choose_kernel_with_api_single_precision@SUFFIX@
endif
if WITH_GPU_VERSION
noinst_PROGRAMS += \
elpa2_test_complex_gpu_version@SUFFIX@ \
elpa2_test_real_gpu_version@SUFFIX@
if WANT_SINGLE_PRECISION_REAL
noinst_PROGRAMS += \
elpa2_test_real_gpu_version_single_precision@SUFFIX@
endif
if WANT_SINGLE_PRECISION_COMPLEX
noinst_PROGRAMS += \
elpa2_test_complex_gpu_version_single_precision@SUFFIX@
endif
endif
build_lib = libelpa@SUFFIX@.la
......@@ -212,42 +295,84 @@ elpa1_test_real_with_c@SUFFIX@_LDADD = $(build_lib)
#elpa1_test_complex_with_c@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real@SUFFIX@_LDFLAGS = -static
elpa2_test_real@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_default_kernel@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2_default_kernel.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real_default_kernel@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_default_kernel_qr_decomposition@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2_default_kernel_qr_decomposition.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real_default_kernel_qr_decomposition@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_choose_kernel_with_api@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2_choose_kernel_with_api.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real_choose_kernel_with_api@SUFFIX@_LDADD = $(build_lib)
elpa1_test_complex@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex.F90 $(shared_sources) $(redirect_sources)
elpa1_test_complex@SUFFIX@_LDADD = $(build_lib)
elpa2_test_complex@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex2.F90 $(shared_sources) $(redirect_sources)
elpa2_test_complex@SUFFIX@_LDADD = $(build_lib)
elpa2_test_complex_default_kernel@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex2_default_kernel.F90 $(shared_sources) $(redirect_sources)
elpa2_test_complex_default_kernel@SUFFIX@_LDADD = $(build_lib)
elpa2_test_complex_choose_kernel_with_api@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex2_choose_kernel_with_api.F90 $(shared_sources) $(redirect_sources)
elpa2_test_complex_choose_kernel_with_api@SUFFIX@_LDADD = $(build_lib)
elpa2_print_kernels@SUFFIX@_SOURCES = src/print_available_elpa2_kernels.F90 $(shared_sources) $(redirect_sources)
elpa2_print_kernels@SUFFIX@_LDADD = $(build_lib)
if WANT_SINGLE_PRECISION_REAL
elpa1_test_real_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_real_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa1_test_real_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real_single_precision@SUFFIX@_LDFLAGS = -static
elpa2_test_real_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_default_kernel_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2_default_kernel_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real_default_kernel_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_default_kernel_qr_decomposition_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2_default_kernel_qr_decomposition_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real_default_kernel_qr_decomposition_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_choose_kernel_with_api_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2_choose_kernel_with_api_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real_choose_kernel_with_api_single_precision@SUFFIX@_LDADD = $(build_lib)
endif
if WANT_SINGLE_PRECISION_COMPLEX
elpa1_test_complex_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa1_test_complex_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa2_test_complex_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex2_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa2_test_complex_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa2_test_complex_default_kernel_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex2_default_kernel_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa2_test_complex_default_kernel_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa2_test_complex_choose_kernel_with_api_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex2_choose_kernel_with_api_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa2_test_complex_choose_kernel_with_api_single_precision@SUFFIX@_LDADD = $(build_lib)
endif
if WITH_GPU_VERSION
elpa2_test_real_gpu_version@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2_gpu_version.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real_gpu_version@SUFFIX@_LDADD = $(build_lib)
elpa2_test_complex_gpu_version@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex2_gpu_version.F90 $(shared_sources) $(redirect_sources)
elpa2_test_complex_gpu_version@SUFFIX@_LDADD = $(build_lib)
if WANT_SINGLE_PRECISION_REAL
elpa2_test_real_gpu_version_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2_gpu_version_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real_gpu_version_single_precision@SUFFIX@_LDADD = $(build_lib)
endif
if WANT_SINGLE_PRECISION_COMPLEX
elpa2_test_complex_gpu_version_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex2_gpu_version_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa2_test_complex_gpu_version_single_precision@SUFFIX@_LDADD = $(build_lib)
endif
endif
check_SCRIPTS = \
elpa1_test_real.sh \
elpa1_test_real_with_c.sh \
......@@ -265,7 +390,39 @@ check_SCRIPTS = \
elpa2_test_complex_choose_kernel_with_api.sh \
elpa2_print_kernels@SUFFIX@
if WANT_SINGLE_PRECISION_REAL
check_SCRIPTS += \
elpa1_test_real_single_precision.sh \
elpa2_test_real_single_precision.sh \
elpa2_test_real_default_kernel_single_precision.sh \
elpa2_test_real_default_kernel_qr_decomposition_single_precision.sh \
elpa2_test_real_choose_kernel_with_api_single_precision.sh
endif
if WANT_SINGLE_PRECISION_COMPLEX
check_SCRIPTS += \
elpa1_test_complex_single_precision.sh \
elpa2_test_complex_single_precision.sh \
elpa2_test_complex_default_kernel_single_precision.sh
endif
if WITH_GPU_VERSION
check_SCRIPTS += \
elpa2_test_real_gpu_version.sh \
elpa2_test_complex_gpu_version.sh
if WANT_SINGLE_PRECISION_REAL
check_SCRIPTS += \
elpa2_test_real_gpu_version_single_precision.sh
endif
if WANT_SINGLE_PRECISION_COMPLEX
check_SCRIPTS += \
elpa2_test_complex_gpu_version_single_precision.sh
endif
endif
TESTS = $(check_SCRIPTS)
if WITH_MPI
elpa1_test_real.sh:
echo 'mpiexec -n 2 ./elpa1_test_real@SUFFIX@ $$TEST_FLAGS' > elpa1_test_real.sh
chmod +x elpa1_test_real.sh
......@@ -289,6 +446,7 @@ elpa1_test_real_c_version.sh:
elpa1_test_complex_c_version.sh:
echo 'mpiexec -n 2 ./elpa1_test_complex_c_version@SUFFIX@ $$TEST_FLAGS' > elpa1_test_complex_c_version.sh
chmod +x elpa1_test_complex_c_version.sh
elpa2_test_real.sh:
echo 'mpiexec -n 2 ./elpa2_test_real@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real.sh
chmod +x elpa2_test_real.sh
......@@ -320,6 +478,197 @@ elpa2_test_complex_default_kernel.sh:
elpa2_test_complex_choose_kernel_with_api.sh:
echo 'mpiexec -n 2 ./elpa2_test_complex_choose_kernel_with_api@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_choose_kernel_with_api.sh
chmod +x elpa2_test_complex_choose_kernel_with_api.sh
if WANT_SINGLE_PRECISION_REAL
elpa1_test_real_single_precision.sh:
echo 'mpiexec -n 2 ./elpa1_test_real_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa1_test_real_single_precision.sh
chmod +x elpa1_test_real_single_precision.sh
elpa2_test_real_single_precision.sh:
echo 'mpiexec -n 2 ./elpa2_test_real_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_single_precision.sh
chmod +x elpa2_test_real_single_precision.sh
elpa2_test_real_default_kernel_single_precision.sh:
echo 'mpiexec -n 2 ./elpa2_test_real_default_kernel_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_default_kernel_single_precision.sh
chmod +x elpa2_test_real_default_kernel_single_precision.sh
elpa2_test_real_default_kernel_qr_decomposition_single_precision.sh:
echo 'mpiexec -n 2 ./elpa2_test_real_default_kernel_qr_decomposition_single_precision@SUFFIX@' > elpa2_test_real_default_kernel_qr_decomposition_single_precision.sh
chmod +x elpa2_test_real_default_kernel_qr_decomposition_single_precision.sh
elpa2_test_real_choose_kernel_with_api_single_precision.sh:
echo 'mpiexec -n 2 ./elpa2_test_real_choose_kernel_with_api_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_choose_kernel_with_api_single_precision.sh
chmod +x elpa2_test_real_choose_kernel_with_api_single_precision.sh
endif
if WANT_SINGLE_PRECISION_COMPLEX
elpa1_test_complex_single_precision.sh:
echo 'mpiexec -n 2 ./elpa1_test_complex_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa1_test_complex_single_precision.sh
chmod +x elpa1_test_complex_single_precision.sh
elpa2_test_complex_single_precision.sh:
echo 'mpiexec -n 2 ./elpa2_test_complex_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_single_precision.sh
chmod +x elpa2_test_complex_single_precision.sh
elpa2_test_complex_default_kernel_single_precision.sh:
echo 'mpiexec -n 2 ./elpa2_test_complex_default_kernel_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_default_kernel_single_precision.sh
chmod +x elpa2_test_complex_default_kernel_single_precision.sh
elpa2_test_complex_choose_kernel_with_api_single_precision.sh:
echo 'mpiexec -n 2 ./elpa2_test_complex_choose_kernel_with_api_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_choose_kernel_with_api_single_precision.sh
chmod +x elpa2_test_complex_choose_kernel_with_api_single_precision.sh
endif
if WITH_GPU_VERSION
elpa2_test_real_gpu_version.sh:
echo 'mpiexec -n 2 ./elpa2_test_real_gpu_version@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_gpu_version.sh
chmod +x elpa2_test_real_gpu_version.sh
elpa2_test_complex_gpu_version.sh:
echo 'mpiexec -n 2 ./elpa2_test_complex_gpu_version@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_gpu_version.sh
chmod +x elpa2_test_complex_gpu_version.sh
if WANT_SINGLE_PRECISION_REAL
elpa2_test_real_gpu_version_single_precision.sh:
echo 'mpiexec -n 2 ./elpa2_test_real_gpu_version_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_gpu_version_single_precision.sh
chmod +x elpa2_test_real_gpu_version_single_precision.sh
endif
if WANT_SINGLE_PRECISION_COMPLEX
elpa2_test_complex_gpu_version_single_precision.sh:
echo 'mpiexec -n 2 ./elpa2_test_complex_gpu_version_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_gpu_version_single_precision.sh
chmod +x elpa2_test_complex_gpu_version_single_precision.sh
endif
# GPU_VERSION
endif
else
# build tests without mpi support
elpa1_test_real.sh:
echo './elpa1_test_real@SUFFIX@ $$TEST_FLAGS' > elpa1_test_real.sh
chmod +x elpa1_test_real.sh
elpa1_test_real_with_c.sh:
echo './elpa1_test_real_with_c@SUFFIX@ $$TEST_FLAGS' > elpa1_test_real_with_c.sh
chmod +x elpa1_test_real_with_c.sh
elpa2_test_real_c_version.sh:
echo './elpa2_test_real_c_version@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_c_version.sh
chmod +x elpa2_test_real_c_version.sh
elpa2_test_complex_c_version.sh:
echo './elpa2_test_complex_c_version@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_c_version.sh
chmod +x elpa2_test_complex_c_version.sh
elpa1_test_real_c_version.sh:
echo './elpa1_test_real_c_version@SUFFIX@ $$TEST_FLAGS' > elpa1_test_real_c_version.sh
chmod +x elpa1_test_real_c_version.sh
elpa1_test_complex_c_version.sh:
echo './elpa1_test_complex_c_version@SUFFIX@ $$TEST_FLAGS' > elpa1_test_complex_c_version.sh
chmod +x elpa1_test_complex_c_version.sh
elpa2_test_real.sh:
echo './elpa2_test_real@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real.sh
chmod +x elpa2_test_real.sh
elpa2_test_real_default_kernel.sh:
echo './elpa2_test_real_default_kernel@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_default_kernel.sh
chmod +x elpa2_test_real_default_kernel.sh
elpa2_test_real_default_kernel_qr_decomposition.sh:
echo './elpa2_test_real_default_kernel_qr_decomposition@SUFFIX@' > elpa2_test_real_default_kernel_qr_decomposition.sh
chmod +x elpa2_test_real_default_kernel_qr_decomposition.sh
elpa2_test_real_choose_kernel_with_api.sh:
echo './elpa2_test_real_choose_kernel_with_api@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_choose_kernel_with_api.sh
chmod +x elpa2_test_real_choose_kernel_with_api.sh
elpa1_test_complex.sh:
echo './elpa1_test_complex@SUFFIX@ $$TEST_FLAGS' > elpa1_test_complex.sh
chmod +x elpa1_test_complex.sh
elpa2_test_complex.sh:
echo './elpa2_test_complex@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex.sh
chmod +x elpa2_test_complex.sh
elpa2_test_complex_default_kernel.sh:
echo './elpa2_test_complex_default_kernel@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_default_kernel.sh
chmod +x elpa2_test_complex_default_kernel.sh
elpa2_test_complex_choose_kernel_with_api.sh:
echo './elpa2_test_complex_choose_kernel_with_api@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_choose_kernel_with_api.sh
chmod +x elpa2_test_complex_choose_kernel_with_api.sh
if WANT_SINGLE_PRECISION_REAL
elpa1_test_real_single_precision.sh:
echo './elpa1_test_real_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa1_test_real_single_precision.sh
chmod +x elpa1_test_real_single_precision.sh
elpa2_test_real_single_precision.sh:
echo './elpa2_test_real_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_single_precision.sh
chmod +x elpa2_test_real_single_precision.sh
elpa2_test_real_default_kernel_single_precision.sh:
echo './elpa2_test_real_default_kernel_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_default_kernel_single_precision.sh
chmod +x elpa2_test_real_default_kernel_single_precision.sh
elpa2_test_real_default_kernel_qr_decomposition_single_precision.sh:
echo './elpa2_test_real_default_kernel_qr_decomposition_single_precision@SUFFIX@' > elpa2_test_real_default_kernel_qr_decomposition_single_precision.sh
chmod +x elpa2_test_real_default_kernel_qr_decomposition_single_precision.sh
elpa2_test_real_choose_kernel_with_api_single_precision.sh:
echo './elpa2_test_real_choose_kernel_with_api_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_choose_kernel_with_api_single_precision.sh
chmod +x elpa2_test_real_choose_kernel_with_api_single_precision.sh
endif
if WANT_SINGLE_PRECISION_COMPLEX
elpa1_test_complex_single_precision.sh:
echo './elpa1_test_complex_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa1_test_complex_single_precision.sh
chmod +x elpa1_test_complex_single_precision.sh
elpa2_test_complex_single_precision.sh:
echo './elpa2_test_complex_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_single_precision.sh
chmod +x elpa2_test_complex_single_precision.sh
elpa2_test_complex_default_kernel_single_precision.sh:
echo './elpa2_test_complex_default_kernel_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_default_kernel_single_precision.sh
chmod +x elpa2_test_complex_default_kernel_single_precision.sh
elpa2_test_complex_choose_kernel_with_api_single_precision.sh:
echo './elpa2_test_complex_choose_kernel_with_api_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_choose_kernel_with_api_single_precision.sh
chmod +x elpa2_test_complex_choose_kernel_with_api_single_precision.sh
endif
if WITH_GPU_VERSION
elpa2_test_real_gpu_version.sh:
echo './elpa2_test_real_gpu_version@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_gpu_version.sh
chmod +x elpa2_test_real_gpu_version.sh
elpa2_test_complex_gpu_version.sh:
echo './elpa2_test_complex_gpu_version@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_gpu_version.sh
chmod +x elpa2_test_complex_gpu_version.sh
if WANT_SINGLE_PRECISION_REAL
elpa2_test_real_gpu_version_single_precision.sh:
echo './elpa2_test_real_gpu_version_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_gpu_version_single_precision.sh
chmod +x elpa2_test_real_gpu_version_single_precision.sh
endif
if WANT_SINGLE_PRECISION_COMPLEX
elpa2_test_complex_gpu_version_single_precision.sh:
echo './elpa2_test_complex_gpu_version_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_gpu_version_single_precision.sh
chmod +x elpa2_test_complex_gpu_version_single_precision.sh
endif
# GPU_VERSION
endif
# use mpi
endif
mod_precision.i: $(top_srcdir)/src/mod_precision.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -I$(top_srcdir)/ -c $(top_srcdir)/src/mod_precision.F90 -o $@
......@@ -341,6 +690,27 @@ elpa2_kernels_real.i: $(top_srcdir)/src/elpa2_kernels/elpa2_kernels_real.F90
mod_compute_hh_trafo_real.i: $(top_srcdir)/src/mod_compute_hh_trafo_real.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/src/mod_compute_hh_trafo_real.F90 -o $@
test_real.i: $(top_srcdir)/test/fortran_test_programs/test_real.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/test/fortran_test_programs/test_real.F90 -o $@
blacs_infrastructure.i: $(top_srcdir)/test/shared_sources/blacs_infrastructure.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/test/shared_sources/blacs_infrastructure.F90 -o $@
check_correctnes.i: $(top_srcdir)/test/shared_sources/check_correctnes.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/test/shared_sources/check_correctnes.F90 -o $@
prepare_matrix.i: $(top_srcdir)/test/shared_sources/prepare_matrix.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/test/shared_sources/prepare_matrix.F90 -o $@
read_input_parameters.i: $(top_srcdir)/test/shared_sources/read_input_parameters.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/test/shared_sources/read_input_parameters.F90 -o $@
setup_mpi.i: $(top_srcdir)/test/shared_sources/setup_mpi.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/test/shared_sources/setup_mpi.F90 -o $@
cuUtils.i: $(top_srcdir)/src/cuUtils.cu
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/src/cuUtils.cu -o $@
include doxygen.am
CLEANFILES = \
......@@ -383,7 +753,16 @@ EXTRA_DIST = \
test/fortran_test_programs/elpa_test_programs_print_headers.X90 \
src/elpa_reduce_add_vectors.X90 \
src/elpa_transpose_vectors.X90 \
src/elpa1_compute_template_real.X90 \
src/elpa1_compute_template_complex.X90 \
src/elpa2_compute_template_real.X90 \
src/elpa2_compute_template_complex.X90 \
src/redist_band.X90 \
src/elpa_qr/elpa_qrkernels.X90 \
src/ev_tridi_band_gpu_c_v2_complex_template.Xcu \
src/ev_tridi_band_gpu_c_v2_real_template.Xcu \
src/cuUtils_complex_template.Xcu \
src/cuUtils_real_template.Xcu \
nvcc_wrap
# Rules to re-generated the headers
......
......@@ -523,7 +523,7 @@ if test x"${want_gpu}" = x"yes" ; then
fi
dnl check whether single precision is requested
AC_MSG_CHECKING(whether single precision calculations are requested)
AC_MSG_CHECKING(whether ELPA library should contain also single precision functions)
AC_ARG_ENABLE(single-precision,[AS_HELP_STRING([--enable-single-precision],
[build with single precision])],
want_single_precision="yes", want_single_precision="no")
......@@ -747,10 +747,12 @@ if test x"${DESPERATELY_WANT_ASSUMED_SIZE}" = x"yes" ; then
AC_DEFINE([DESPERATELY_WANT_ASSUMED_SIZE],[1],[use assumed size arrays, even if not debuggable])
fi
if test x"${want_single_precision}" = x"no" ; then
AC_DEFINE([DOUBLE_PRECISION_REAL],[1],[use double precision for real calculation])
AC_DEFINE([DOUBLE_PRECISION_COMPLEX],[1],[use double precision for complex calculation])
if test x"${want_single_precision}" = x"yes" ; then
AC_DEFINE([WANT_SINGLE_PRECISION_REAL],[1],[build also single-precision for real calculation])
AC_DEFINE([WANT_SINGLE_PRECISION_COMPLEX],[1],[build also single-precision for complex calculation])
fi
AM_CONDITIONAL([WANT_SINGLE_PRECISION_REAL],[test x"$want_single_precision" = x"yes"])
AM_CONDITIONAL([WANT_SINGLE_PRECISION_COMPLEX],[test x"$want_single_precision" = x"yes"])
AC_SUBST([WITH_MKL])
AC_SUBST([WITH_BLACS])
......
.TH "solve_evp_complex" 3 "Wed Dec 2 2015" "ELPA" \" -*- nroff -*-
.TH "solve_evp_complex" 3 "Thu Mar 17 2016" "ELPA" \" -*- nroff -*-
.ad l
.nh
.SH NAME
solve_evp_complex \- solve the complex eigenvalue problem with the 1-stage ELPA solver.
This interface is old and deprecated. It is recommended to use \fBsolve_evp_complex_1stage\fP(3)
solve_evp_complex \- solve the double-precision complex eigenvalue problem with the 1-stage ELPA solver.
This interface is old and deprecated. It is recommended to use \fBsolve_evp_complex_1stage_double\fP(3)
.br
.SH SYNOPSIS
......@@ -48,4 +48,4 @@ use elpa1
Solve the complex eigenvalue problem with the 1-stage solver. The ELPA communicators \fBmpi_comm_rows\fP and \fBmpi_comm_cols\fP are obtained with the \fBget_elpa_communicators\fP(3) function. The distributed quadratic marix \fBa\fP has global dimensions \fBna\fP x \fBna\fP, and a local size \fBlda\fP x \fBmatrixCols\fP. The solver will compute the first \fBnev\fP eigenvalues, which will be stored on exit in \fBev\fP. The eigenvectors corresponding to the eigenvalues will be stored in \fBq\fP. All memory of the arguments must be allocated outside the call to the solver.
.br
.SH "SEE ALSO"
\fBget_elpa_communicators\fP(3) \fBsolve_evp_real_1stage\fP(3) \fBsolve_evp_real_2stage\fP(3) \fBsolve_evp_complex_2stage\fP(3) \fBprint_available_elpa2_kernels\fP(1)
\fBget_elpa_communicators\fP(3) \fBsolve_evp_real_1stage_double\fP(3) \fBsolve_evp_real_1stage_single\fP(3) \fBsolve_evp_complex_1stage_single\fP(3) \fBsolve_evp_real_2stage_double\fP(3) \fBsolve_evp_real_2stage_single\fP(3) \fBsolve_evp_complex_2stage_double\fP(3) \fBsolve_evp_complex_2stage_single\fP(3) \fBprint_available_elpa2_kernels\fP(1)
.TH "solve_evp_complex_1stage_double" 3 "Thu Mar 17 2016" "ELPA" \" -*- nroff -*-
.ad l
.nh
.SH NAME
solve_evp_complex_1stage_double \- solve the double-precision complex eigenvalue problem with the 1-stage ELPA solver
.br
.SH SYNOPSIS
.br
.SS FORTRAN INTERFACE
use elpa1
.br
.br
.RI "success = \fBsolve_evp_complex_1stage_double\fP (na, nev, a(lda,matrixCols), ev(nev), q(ldq, matrixCols), ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols)"
.br
.RI " "
.br
.RI "With the definintions of the input and output variables:"
.br
.RI "integer, intent(in) \fBna\fP: global dimension of quadratic matrix \fBa\fP to solve"
.br
.RI "integer, intent(in) \fBnev\fP: number of eigenvalues to be computed; the first \fBnev\fP eigenvalules are calculated"
.br
.RI "complex*16, intent(inout) \fBa\fP: locally distributed part of the matrix \fBa\fP. The local dimensions are \fBlda\fP x \fBmatrixCols\fP"
.br