Allow ELPA to be build with single and double precision symbols in one

library

It the configure option "--enable-single-precision" is specified,
ELPA will also be build for single precision usage. The double precision
and single precision will be available at the same time with names
"solve_evp_real_1stage_double" or "solve_evp_real_1stage_single" and
so on...

This change immplied some major refactoring of the ELPA code:
1.) functions/procedures had to be renamed with suffix "_double"

2.) If necessary the same functions have to be available with suffix
"_single"

3.) Variable kind definitions have to be consistent with the
intented use

To avoid uneccessary code duplication this is done (most of the time)
with preprocessor string substitution.

The documentation has been updated.

NOT SUPPORTED are at the moment:

- single precision usage of ELPA2 with kernels, others than "generic"
  and "generic_simple"

- single precision usage of GPU
parent 0f63f761
......@@ -765,7 +765,7 @@ WARN_LOGFILE =
# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
# Note: If this tag is empty the current directory is searched.
INPUT = @top_srcdir@/src @top_srcdir@/test @builddir@/elpa
INPUT = @top_srcdir@/src @top_srcdir@/test @builddir@/elpa @builddir@/config-f90.h
# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
......@@ -2014,7 +2014,7 @@ ENABLE_PREPROCESSING = YES
# The default value is: NO.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
MACRO_EXPANSION = NO
MACRO_EXPANSION = YES
# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
# the macro expansion is limited to the macros specified with the PREDEFINED and
......@@ -2036,7 +2036,7 @@ SEARCH_INCLUDES = YES
# preprocessor.
# This tag requires that the tag SEARCH_INCLUDES is set to YES.
INCLUDE_PATH =
INCLUDE_PATH = @builddir@ @builddir@/elpa
# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
# patterns (like *.h and *.hpp) to filter out the header-files in the
......
......@@ -29,7 +29,7 @@ libelpa@SUFFIX@_la_SOURCES = src/mod_precision.F90 \
src/elpa2.F90 \
src/elpa_c_interface.F90 \
src/elpa_qr/qr_utils.F90 \
src/elpa_qr/elpa_qrkernels.f90 \
src/elpa_qr/elpa_qrkernels.F90 \
src/elpa_qr/elpa_pdlarfb.F90 \
src/elpa_qr/elpa_pdgeqrf.F90
if HAVE_DETAILED_TIMINGS
......@@ -117,15 +117,28 @@ nobase_elpa_include_HEADERS = $(wildcard modules/*)
nobase_elpa_include_HEADERS += elpa/elpa.h elpa/elpa_kernel_constants.h elpa/elpa_generated.h
man_MANS = man/solve_evp_real.3 \
man/solve_evp_real_1stage.3 \
man/solve_evp_real_1stage_double.3 \
man/solve_evp_complex.3 \
man/solve_evp_complex_1stage.3 \
man/solve_evp_real_2stage.3 \
man/solve_evp_complex_2stage.3 \
man/solve_evp_complex_1stage_double.3 \
man/solve_evp_real_2stage_double.3 \
man/solve_evp_complex_2stage_double.3 \
man/get_elpa_row_col_comms.3 \
man/get_elpa_communicators.3 \
man/print_available_elpa2_kernels.1
if WANT_SINGLE_PRECISION_REAL
man_MANS += man/solve_evp_real_1stage_single.3 \
man/solve_evp_real_2stage_single.3
endif
if WANT_SINGLE_PRECISION_COMPLEX
man_MANS += man/solve_evp_complex_1stage_single.3 \
man/solve_evp_complex_2stage_single.3
endif
# other files to distribute
filesdir = $(docdir)/examples
dist_files_DATA = \
......@@ -142,6 +155,34 @@ dist_files_DATA = \
test/fortran_test_programs/test_real_with_c.F90 \
src/print_available_elpa2_kernels.F90
if WANT_SINGLE_PRECISION_COMPLEX
dist_files_DATA += test/fortran_test_programs/test_complex2_single_precision.F90 \
test/fortran_test_programs/test_complex2_default_kernel_single_precision.F90 \
test/fortran_test_programs/test_complex2_choose_kernel_with_api_single_precision.F90 \
test/fortran_test_programs/test_complex_single_precision.F90
endif
if WANT_SINGLE_PRECISION_REAL
dist_files_DATA += test/fortran_test_programs/test_real2_single_precision.F90 \
test/fortran_test_programs/test_real2_default_kernel_single_precision.F90 \
test/fortran_test_programs/test_real2_default_kernel_qr_decomposition_single_precision.F90 \
test/fortran_test_programs/test_real2_choose_kernel_with_api_single_precision.F90 \
test/fortran_test_programs/test_real_single_precision.F90
endif
if WITH_GPU_VERSION
dist_files_DATA += test/fortran_test_programs/test_real2_gpu_version.F90 \
test/fortran_test_programs/test_complex2_gpu_version.F90
if WANT_SINGLE_PRECISION_REAL
dist_files_DATA += test/fortran_test_programs/test_real2_gpu_version_single_precision.F90
endif
if WANT_SINGLE_PRECISION_COMPLEX
dist_files_DATA += test/fortran_test_programs/test_complex2_gpu_version_single_precision.F90
endif
endif
dist_doc_DATA = README COPYING/COPYING COPYING/gpl.txt COPYING/lgpl.txt
# pkg-config stuff
......@@ -156,6 +197,18 @@ bin_PROGRAMS = \
elpa2_test_complex@SUFFIX@ \
elpa2_print_kernels@SUFFIX@
if WANT_SINGLE_PRECISION_COMPLEX
bin_PROGRAMS += \
elpa1_test_complex_single_precision@SUFFIX@ \
elpa2_test_complex_single_precision@SUFFIX@
endif
if WANT_SINGLE_PRECISION_REAL
bin_PROGRAMS += \
elpa1_test_real_single_precision@SUFFIX@ \
elpa2_test_real_single_precision@SUFFIX@
endif
noinst_PROGRAMS = \
elpa2_test_real_default_kernel@SUFFIX@ \
elpa2_test_real_default_kernel_qr_decomposition@SUFFIX@ \
......@@ -168,6 +221,36 @@ noinst_PROGRAMS = \
elpa2_test_complex_c_version@SUFFIX@ \
elpa1_test_real_with_c@SUFFIX@
if WANT_SINGLE_PRECISION_COMPLEX
noinst_PROGRAMS += \
elpa2_test_complex_default_kernel_single_precision@SUFFIX@ \
elpa2_test_complex_choose_kernel_with_api_single_precision@SUFFIX@
endif
if WANT_SINGLE_PRECISION_REAL
noinst_PROGRAMS += \
elpa2_test_real_default_kernel_single_precision@SUFFIX@ \
elpa2_test_real_default_kernel_qr_decomposition_single_precision@SUFFIX@ \
elpa2_test_real_choose_kernel_with_api_single_precision@SUFFIX@
endif
if WITH_GPU_VERSION
noinst_PROGRAMS += \
elpa2_test_complex_gpu_version@SUFFIX@ \
elpa2_test_real_gpu_version@SUFFIX@
if WANT_SINGLE_PRECISION_REAL
noinst_PROGRAMS += \
elpa2_test_real_gpu_version_single_precision@SUFFIX@
endif
if WANT_SINGLE_PRECISION_COMPLEX
noinst_PROGRAMS += \
elpa2_test_complex_gpu_version_single_precision@SUFFIX@
endif
endif
build_lib = libelpa@SUFFIX@.la
......@@ -212,42 +295,84 @@ elpa1_test_real_with_c@SUFFIX@_LDADD = $(build_lib)
#elpa1_test_complex_with_c@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real@SUFFIX@_LDFLAGS = -static
elpa2_test_real@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_default_kernel@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2_default_kernel.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real_default_kernel@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_default_kernel_qr_decomposition@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2_default_kernel_qr_decomposition.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real_default_kernel_qr_decomposition@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_choose_kernel_with_api@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2_choose_kernel_with_api.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real_choose_kernel_with_api@SUFFIX@_LDADD = $(build_lib)
elpa1_test_complex@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex.F90 $(shared_sources) $(redirect_sources)
elpa1_test_complex@SUFFIX@_LDADD = $(build_lib)
elpa2_test_complex@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex2.F90 $(shared_sources) $(redirect_sources)
elpa2_test_complex@SUFFIX@_LDADD = $(build_lib)
elpa2_test_complex_default_kernel@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex2_default_kernel.F90 $(shared_sources) $(redirect_sources)
elpa2_test_complex_default_kernel@SUFFIX@_LDADD = $(build_lib)
elpa2_test_complex_choose_kernel_with_api@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex2_choose_kernel_with_api.F90 $(shared_sources) $(redirect_sources)
elpa2_test_complex_choose_kernel_with_api@SUFFIX@_LDADD = $(build_lib)
elpa2_print_kernels@SUFFIX@_SOURCES = src/print_available_elpa2_kernels.F90 $(shared_sources) $(redirect_sources)
elpa2_print_kernels@SUFFIX@_LDADD = $(build_lib)
if WANT_SINGLE_PRECISION_REAL
elpa1_test_real_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_real_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa1_test_real_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real_single_precision@SUFFIX@_LDFLAGS = -static
elpa2_test_real_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_default_kernel_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2_default_kernel_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real_default_kernel_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_default_kernel_qr_decomposition_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2_default_kernel_qr_decomposition_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real_default_kernel_qr_decomposition_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_choose_kernel_with_api_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2_choose_kernel_with_api_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real_choose_kernel_with_api_single_precision@SUFFIX@_LDADD = $(build_lib)
endif
if WANT_SINGLE_PRECISION_COMPLEX
elpa1_test_complex_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa1_test_complex_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa2_test_complex_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex2_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa2_test_complex_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa2_test_complex_default_kernel_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex2_default_kernel_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa2_test_complex_default_kernel_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa2_test_complex_choose_kernel_with_api_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex2_choose_kernel_with_api_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa2_test_complex_choose_kernel_with_api_single_precision@SUFFIX@_LDADD = $(build_lib)
endif
if WITH_GPU_VERSION
elpa2_test_real_gpu_version@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2_gpu_version.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real_gpu_version@SUFFIX@_LDADD = $(build_lib)
elpa2_test_complex_gpu_version@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex2_gpu_version.F90 $(shared_sources) $(redirect_sources)
elpa2_test_complex_gpu_version@SUFFIX@_LDADD = $(build_lib)
if WANT_SINGLE_PRECISION_REAL
elpa2_test_real_gpu_version_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_real2_gpu_version_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa2_test_real_gpu_version_single_precision@SUFFIX@_LDADD = $(build_lib)
endif
if WANT_SINGLE_PRECISION_COMPLEX
elpa2_test_complex_gpu_version_single_precision@SUFFIX@_SOURCES = test/fortran_test_programs/test_complex2_gpu_version_single_precision.F90 $(shared_sources) $(redirect_sources)
elpa2_test_complex_gpu_version_single_precision@SUFFIX@_LDADD = $(build_lib)
endif
endif
check_SCRIPTS = \
elpa1_test_real.sh \
elpa1_test_real_with_c.sh \
......@@ -265,7 +390,39 @@ check_SCRIPTS = \
elpa2_test_complex_choose_kernel_with_api.sh \
elpa2_print_kernels@SUFFIX@
if WANT_SINGLE_PRECISION_REAL
check_SCRIPTS += \
elpa1_test_real_single_precision.sh \
elpa2_test_real_single_precision.sh \
elpa2_test_real_default_kernel_single_precision.sh \
elpa2_test_real_default_kernel_qr_decomposition_single_precision.sh \
elpa2_test_real_choose_kernel_with_api_single_precision.sh
endif
if WANT_SINGLE_PRECISION_COMPLEX
check_SCRIPTS += \
elpa1_test_complex_single_precision.sh \
elpa2_test_complex_single_precision.sh \
elpa2_test_complex_default_kernel_single_precision.sh
endif
if WITH_GPU_VERSION
check_SCRIPTS += \
elpa2_test_real_gpu_version.sh \
elpa2_test_complex_gpu_version.sh
if WANT_SINGLE_PRECISION_REAL
check_SCRIPTS += \
elpa2_test_real_gpu_version_single_precision.sh
endif
if WANT_SINGLE_PRECISION_COMPLEX
check_SCRIPTS += \
elpa2_test_complex_gpu_version_single_precision.sh
endif
endif
TESTS = $(check_SCRIPTS)
if WITH_MPI
elpa1_test_real.sh:
echo 'mpiexec -n 2 ./elpa1_test_real@SUFFIX@ $$TEST_FLAGS' > elpa1_test_real.sh
chmod +x elpa1_test_real.sh
......@@ -289,6 +446,7 @@ elpa1_test_real_c_version.sh:
elpa1_test_complex_c_version.sh:
echo 'mpiexec -n 2 ./elpa1_test_complex_c_version@SUFFIX@ $$TEST_FLAGS' > elpa1_test_complex_c_version.sh
chmod +x elpa1_test_complex_c_version.sh
elpa2_test_real.sh:
echo 'mpiexec -n 2 ./elpa2_test_real@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real.sh
chmod +x elpa2_test_real.sh
......@@ -320,6 +478,197 @@ elpa2_test_complex_default_kernel.sh:
elpa2_test_complex_choose_kernel_with_api.sh:
echo 'mpiexec -n 2 ./elpa2_test_complex_choose_kernel_with_api@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_choose_kernel_with_api.sh
chmod +x elpa2_test_complex_choose_kernel_with_api.sh
if WANT_SINGLE_PRECISION_REAL
elpa1_test_real_single_precision.sh:
echo 'mpiexec -n 2 ./elpa1_test_real_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa1_test_real_single_precision.sh
chmod +x elpa1_test_real_single_precision.sh
elpa2_test_real_single_precision.sh:
echo 'mpiexec -n 2 ./elpa2_test_real_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_single_precision.sh
chmod +x elpa2_test_real_single_precision.sh
elpa2_test_real_default_kernel_single_precision.sh:
echo 'mpiexec -n 2 ./elpa2_test_real_default_kernel_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_default_kernel_single_precision.sh
chmod +x elpa2_test_real_default_kernel_single_precision.sh
elpa2_test_real_default_kernel_qr_decomposition_single_precision.sh:
echo 'mpiexec -n 2 ./elpa2_test_real_default_kernel_qr_decomposition_single_precision@SUFFIX@' > elpa2_test_real_default_kernel_qr_decomposition_single_precision.sh
chmod +x elpa2_test_real_default_kernel_qr_decomposition_single_precision.sh
elpa2_test_real_choose_kernel_with_api_single_precision.sh:
echo 'mpiexec -n 2 ./elpa2_test_real_choose_kernel_with_api_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_choose_kernel_with_api_single_precision.sh
chmod +x elpa2_test_real_choose_kernel_with_api_single_precision.sh
endif
if WANT_SINGLE_PRECISION_COMPLEX
elpa1_test_complex_single_precision.sh:
echo 'mpiexec -n 2 ./elpa1_test_complex_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa1_test_complex_single_precision.sh
chmod +x elpa1_test_complex_single_precision.sh
elpa2_test_complex_single_precision.sh:
echo 'mpiexec -n 2 ./elpa2_test_complex_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_single_precision.sh
chmod +x elpa2_test_complex_single_precision.sh
elpa2_test_complex_default_kernel_single_precision.sh:
echo 'mpiexec -n 2 ./elpa2_test_complex_default_kernel_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_default_kernel_single_precision.sh
chmod +x elpa2_test_complex_default_kernel_single_precision.sh
elpa2_test_complex_choose_kernel_with_api_single_precision.sh:
echo 'mpiexec -n 2 ./elpa2_test_complex_choose_kernel_with_api_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_choose_kernel_with_api_single_precision.sh
chmod +x elpa2_test_complex_choose_kernel_with_api_single_precision.sh
endif
if WITH_GPU_VERSION
elpa2_test_real_gpu_version.sh:
echo 'mpiexec -n 2 ./elpa2_test_real_gpu_version@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_gpu_version.sh
chmod +x elpa2_test_real_gpu_version.sh
elpa2_test_complex_gpu_version.sh:
echo 'mpiexec -n 2 ./elpa2_test_complex_gpu_version@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_gpu_version.sh
chmod +x elpa2_test_complex_gpu_version.sh
if WANT_SINGLE_PRECISION_REAL
elpa2_test_real_gpu_version_single_precision.sh:
echo 'mpiexec -n 2 ./elpa2_test_real_gpu_version_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_gpu_version_single_precision.sh
chmod +x elpa2_test_real_gpu_version_single_precision.sh
endif
if WANT_SINGLE_PRECISION_COMPLEX
elpa2_test_complex_gpu_version_single_precision.sh:
echo 'mpiexec -n 2 ./elpa2_test_complex_gpu_version_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_gpu_version_single_precision.sh
chmod +x elpa2_test_complex_gpu_version_single_precision.sh
endif
# GPU_VERSION
endif
else
# build tests without mpi support
elpa1_test_real.sh:
echo './elpa1_test_real@SUFFIX@ $$TEST_FLAGS' > elpa1_test_real.sh
chmod +x elpa1_test_real.sh
elpa1_test_real_with_c.sh:
echo './elpa1_test_real_with_c@SUFFIX@ $$TEST_FLAGS' > elpa1_test_real_with_c.sh
chmod +x elpa1_test_real_with_c.sh
elpa2_test_real_c_version.sh:
echo './elpa2_test_real_c_version@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_c_version.sh
chmod +x elpa2_test_real_c_version.sh
elpa2_test_complex_c_version.sh:
echo './elpa2_test_complex_c_version@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_c_version.sh
chmod +x elpa2_test_complex_c_version.sh
elpa1_test_real_c_version.sh:
echo './elpa1_test_real_c_version@SUFFIX@ $$TEST_FLAGS' > elpa1_test_real_c_version.sh
chmod +x elpa1_test_real_c_version.sh
elpa1_test_complex_c_version.sh:
echo './elpa1_test_complex_c_version@SUFFIX@ $$TEST_FLAGS' > elpa1_test_complex_c_version.sh
chmod +x elpa1_test_complex_c_version.sh
elpa2_test_real.sh:
echo './elpa2_test_real@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real.sh
chmod +x elpa2_test_real.sh
elpa2_test_real_default_kernel.sh:
echo './elpa2_test_real_default_kernel@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_default_kernel.sh
chmod +x elpa2_test_real_default_kernel.sh
elpa2_test_real_default_kernel_qr_decomposition.sh:
echo './elpa2_test_real_default_kernel_qr_decomposition@SUFFIX@' > elpa2_test_real_default_kernel_qr_decomposition.sh
chmod +x elpa2_test_real_default_kernel_qr_decomposition.sh
elpa2_test_real_choose_kernel_with_api.sh:
echo './elpa2_test_real_choose_kernel_with_api@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_choose_kernel_with_api.sh
chmod +x elpa2_test_real_choose_kernel_with_api.sh
elpa1_test_complex.sh:
echo './elpa1_test_complex@SUFFIX@ $$TEST_FLAGS' > elpa1_test_complex.sh
chmod +x elpa1_test_complex.sh
elpa2_test_complex.sh:
echo './elpa2_test_complex@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex.sh
chmod +x elpa2_test_complex.sh
elpa2_test_complex_default_kernel.sh:
echo './elpa2_test_complex_default_kernel@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_default_kernel.sh
chmod +x elpa2_test_complex_default_kernel.sh
elpa2_test_complex_choose_kernel_with_api.sh:
echo './elpa2_test_complex_choose_kernel_with_api@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_choose_kernel_with_api.sh
chmod +x elpa2_test_complex_choose_kernel_with_api.sh
if WANT_SINGLE_PRECISION_REAL
elpa1_test_real_single_precision.sh:
echo './elpa1_test_real_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa1_test_real_single_precision.sh
chmod +x elpa1_test_real_single_precision.sh
elpa2_test_real_single_precision.sh:
echo './elpa2_test_real_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_single_precision.sh
chmod +x elpa2_test_real_single_precision.sh
elpa2_test_real_default_kernel_single_precision.sh:
echo './elpa2_test_real_default_kernel_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_default_kernel_single_precision.sh
chmod +x elpa2_test_real_default_kernel_single_precision.sh
elpa2_test_real_default_kernel_qr_decomposition_single_precision.sh:
echo './elpa2_test_real_default_kernel_qr_decomposition_single_precision@SUFFIX@' > elpa2_test_real_default_kernel_qr_decomposition_single_precision.sh
chmod +x elpa2_test_real_default_kernel_qr_decomposition_single_precision.sh
elpa2_test_real_choose_kernel_with_api_single_precision.sh:
echo './elpa2_test_real_choose_kernel_with_api_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_choose_kernel_with_api_single_precision.sh
chmod +x elpa2_test_real_choose_kernel_with_api_single_precision.sh
endif
if WANT_SINGLE_PRECISION_COMPLEX
elpa1_test_complex_single_precision.sh:
echo './elpa1_test_complex_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa1_test_complex_single_precision.sh
chmod +x elpa1_test_complex_single_precision.sh
elpa2_test_complex_single_precision.sh:
echo './elpa2_test_complex_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_single_precision.sh
chmod +x elpa2_test_complex_single_precision.sh
elpa2_test_complex_default_kernel_single_precision.sh:
echo './elpa2_test_complex_default_kernel_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_default_kernel_single_precision.sh
chmod +x elpa2_test_complex_default_kernel_single_precision.sh
elpa2_test_complex_choose_kernel_with_api_single_precision.sh:
echo './elpa2_test_complex_choose_kernel_with_api_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_choose_kernel_with_api_single_precision.sh
chmod +x elpa2_test_complex_choose_kernel_with_api_single_precision.sh
endif
if WITH_GPU_VERSION
elpa2_test_real_gpu_version.sh:
echo './elpa2_test_real_gpu_version@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_gpu_version.sh
chmod +x elpa2_test_real_gpu_version.sh
elpa2_test_complex_gpu_version.sh:
echo './elpa2_test_complex_gpu_version@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_gpu_version.sh
chmod +x elpa2_test_complex_gpu_version.sh
if WANT_SINGLE_PRECISION_REAL
elpa2_test_real_gpu_version_single_precision.sh:
echo './elpa2_test_real_gpu_version_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_gpu_version_single_precision.sh
chmod +x elpa2_test_real_gpu_version_single_precision.sh
endif
if WANT_SINGLE_PRECISION_COMPLEX
elpa2_test_complex_gpu_version_single_precision.sh:
echo './elpa2_test_complex_gpu_version_single_precision@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_gpu_version_single_precision.sh
chmod +x elpa2_test_complex_gpu_version_single_precision.sh
endif
# GPU_VERSION
endif
# use mpi
endif
mod_precision.i: $(top_srcdir)/src/mod_precision.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -I$(top_srcdir)/ -c $(top_srcdir)/src/mod_precision.F90 -o $@
......@@ -341,6 +690,27 @@ elpa2_kernels_real.i: $(top_srcdir)/src/elpa2_kernels/elpa2_kernels_real.F90
mod_compute_hh_trafo_real.i: $(top_srcdir)/src/mod_compute_hh_trafo_real.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/src/mod_compute_hh_trafo_real.F90 -o $@
test_real.i: $(top_srcdir)/test/fortran_test_programs/test_real.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/test/fortran_test_programs/test_real.F90 -o $@
blacs_infrastructure.i: $(top_srcdir)/test/shared_sources/blacs_infrastructure.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/test/shared_sources/blacs_infrastructure.F90 -o $@
check_correctnes.i: $(top_srcdir)/test/shared_sources/check_correctnes.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/test/shared_sources/check_correctnes.F90 -o $@
prepare_matrix.i: $(top_srcdir)/test/shared_sources/prepare_matrix.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/test/shared_sources/prepare_matrix.F90 -o $@
read_input_parameters.i: $(top_srcdir)/test/shared_sources/read_input_parameters.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/test/shared_sources/read_input_parameters.F90 -o $@
setup_mpi.i: $(top_srcdir)/test/shared_sources/setup_mpi.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/test/shared_sources/setup_mpi.F90 -o $@
cuUtils.i: $(top_srcdir)/src/cuUtils.cu
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/src/cuUtils.cu -o $@
include doxygen.am
CLEANFILES = \
......@@ -383,7 +753,16 @@ EXTRA_DIST = \
test/fortran_test_programs/elpa_test_programs_print_headers.X90 \
src/elpa_reduce_add_vectors.X90 \
src/elpa_transpose_vectors.X90 \
src/elpa1_compute_template_real.X90 \
src/elpa1_compute_template_complex.X90 \
src/elpa2_compute_template_real.X90 \
src/elpa2_compute_template_complex.X90 \
src/redist_band.X90 \
src/elpa_qr/elpa_qrkernels.X90 \
src/ev_tridi_band_gpu_c_v2_complex_template.Xcu \
src/ev_tridi_band_gpu_c_v2_real_template.Xcu \
src/cuUtils_complex_template.Xcu \
src/cuUtils_real_template.Xcu \
nvcc_wrap
# Rules to re-generated the headers
......
......@@ -523,7 +523,7 @@ if test x"${want_gpu}" = x"yes" ; then
fi
dnl check whether single precision is requested
AC_MSG_CHECKING(whether single precision calculations are requested)
AC_MSG_CHECKING(whether ELPA library should contain also single precision functions)
AC_ARG_ENABLE(single-precision,[AS_HELP_STRING([--enable-single-precision],
[build with single precision])],
want_single_precision="yes", want_single_precision="no")
......@@ -747,10 +747,12 @@ if test x"${DESPERATELY_WANT_ASSUMED_SIZE}" = x"yes" ; then
AC_DEFINE([DESPERATELY_WANT_ASSUMED_SIZE],[1],[use assumed size arrays, even if not debuggable])
fi
if test x"${want_single_precision}" = x"no" ; then
AC_DEFINE([DOUBLE_PRECISION_REAL],[1],[use double precision for real calculation])
AC_DEFINE([DOUBLE_PRECISION_COMPLEX],[1],[use double precision for complex calculation])
if test x"${want_single_precision}" = x"yes" ; then
AC_DEFINE([WANT_SINGLE_PRECISION_REAL],[1],[build also single-precision for real calculation])
AC_DEFINE([WANT_SINGLE_PRECISION_COMPLEX],[1],[build also single-precision for complex calculation])
fi
AM_CONDITIONAL([WANT_SINGLE_PRECISION_REAL],[test x"$want_single_precision" = x"yes"])
AM_CONDITIONAL([WANT_SINGLE_PRECISION_COMPLEX],[test x"$want_single_precision" = x"yes"])
AC_SUBST([WITH_MKL])
AC_SUBST([WITH_BLACS])
......
.TH "solve_evp_complex" 3 "Wed Dec 2 2015" "ELPA" \" -*- nroff -*-
.TH "solve_evp_complex" 3 "Thu Mar 17 2016" "ELPA" \" -*- nroff -*-
.ad l
.nh
.SH NAME
solve_evp_complex \- solve the complex eigenvalue problem with the 1-stage ELPA solver.
This interface is old and deprecated. It is recommended to use \fBsolve_evp_complex_1stage\fP(3)
solve_evp_complex \- solve the double-precision complex eigenvalue problem with the 1-stage ELPA solver.
This interface is old and deprecated. It is recommended to use \fBsolve_evp_complex_1stage_double\fP(3)
.br
.SH SYNOPSIS
......@@ -48,4 +48,4 @@ use elpa1
Solve the complex eigenvalue problem with the 1-stage solver. The ELPA communicators \fBmpi_comm_rows\fP and \fBmpi_comm_cols\fP are obtained with the \fBget_elpa_communicators\fP(3) function. The distributed quadratic marix \fBa\fP has global dimensions \fBna\fP x \fBna\fP, and a local size \fBlda\fP x \fBmatrixCols\fP. The solver will compute the first \fBnev\fP eigenvalues, which will be stored on exit in \fBev\fP. The eigenvectors corresponding to the eigenvalues will be stored in \fBq\fP. All memory of the arguments must be allocated outside the call to the solver.
.br
.SH "SEE ALSO"
\fBget_elpa_communicators\fP(3) \fBsolve_evp_real_1stage\fP(3) \fBsolve_evp_real_2stage\fP(3) \fBsolve_evp_complex_2stage\fP(3) \fBprint_available_elpa2_kernels\fP(1)
\fBget_elpa_communicators\fP(3) \fBsolve_evp_real_1stage_double\fP(3) \fBsolve_evp_real_1stage_single\fP(3) \fBsolve_evp_complex_1stage_single\fP(3) \fBsolve_evp_real_2stage_double\fP(3) \fBsolve_evp_real_2stage_single\fP(3) \fBsolve_evp_complex_2stage_double\fP(3) \fBsolve_evp_complex_2stage_single\fP(3) \fBprint_available_elpa2_kernels\fP(1)
.TH "solve_evp_complex_1stage_double" 3 "Thu Mar 17 2016" "ELPA" \" -*- nroff -*-
.ad l
.nh
.SH NAME
solve_evp_complex_1stage_double \- solve the double-precision complex eigenvalue problem with the 1-stage ELPA solver
.br
.SH SYNOPSIS
.br
.SS FORTRAN INTERFACE
use elpa1
.br
.br
.RI "success = \fBsolve_evp_complex_1stage_double\fP (na, nev, a(lda,matrixCols), ev(nev), q(ldq, matrixCols), ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols)"
.br
.RI " "
.br
.RI "With the definintions of the input and output variables:"
.br
.RI "integer, intent(in) \fBna\fP: global dimension of quadratic matrix \fBa\fP to solve"
.br
.RI "integer, intent(in) \fBnev\fP: number of eigenvalues to be computed; the first \fBnev\fP eigenvalules are calculated"
.br
.RI "complex*16, intent(inout) \fBa\fP: locally distributed part of the matrix \fBa\fP. The local dimensions are \fBlda\fP x \fBmatrixCols\fP"
.br
.RI "integer, intent(in) \fBlda\fP: leading dimension of locally distributed matrix \fBa\fP"
.br
.RI "real*8, intent(inout) \fBev\fP: on output the first \fBnev\fP computed eigenvalues"
.br
.RI "complex*16, intent(inout) \fBq\fP: on output the first \fBnev\fP computed eigenvectors"
.br
.RI "integer, intent(in) \fBldq\fP: leading dimension of matrix \fBq\fP which stores the eigenvectors"
.br
.RI "integer, intent(in) \fBnblk\fP: blocksize of block cyclic distributin, must be the same in both directions"
.br
.RI "integer, intent(in) \fBmatrixCols\fP: number of columns of locally distributed matrices \fBa\fP and \fBq\fP"
.br
.RI "integer, intent(in) \fBmpi_comm_rows\fP: communicator for communication in rows. Constructed with \fBget_elpa_communicators\fP(3)"
.br
.RI "integer, intent(in) \fBmpi_comm_cols\fP: communicator for communication in colums. Constructed with \fBget_elpa_communicators\fP(3)"
.br
.RI "logical \fBsuccess\fP: return value indicating success or failure"
.br
.SS C INTERFACE
#include "elpa.h"
.br
#include <complex.h>