Commit bb158d87 authored by Andreas Marek's avatar Andreas Marek

Allow to compile build configuration into the object

parent d5ba941f
......@@ -454,6 +454,12 @@ if WANT_SINGLE_PRECISION_COMPLEX
endif
endif
if STORE_BUILD_CONFIG
libelpa@SUFFIX@_private_la_SOURCES += src/helpers/print_build_config.c
endif
# Cuda files
.cu.lo:
NVCC="$(NVCC)" libtool --mode=compile --tag=CC $(top_srcdir)/nvcc_wrap $(NVCCFLAGS) $(LDFLAGS) -I$(top_builddir)/ -I$(top_srcdir)/ -c $< -o $@
......@@ -669,7 +675,6 @@ wrapper_la_CFLAGS = $(PYTHON_INCLUDE) $(NUMPY_INCLUDE) $(AM_CFLAGS)
python/pyelpa/wrapper.c: python/pyelpa/wrapper.pyx
cython $< -o $@
# test scripts
TASKS ?= 2
if WITH_MPI
......
......@@ -1376,6 +1376,28 @@ else
fi
fi
dnl store-build-config
AC_MSG_CHECKING(whether build config should be compiled into the library)
AC_CHECK_PROG(xxd_CHECK,xxd,yes)
AS_IF([test x"$xxd_CHECK" != x"yes"], [AC_MSG_ERROR([Please install xxd before configuring.])])
AC_ARG_ENABLE([store-build-config],
AS_HELP_STRING([--enable-store-build-config],
[compile build config into the library object, default no]),
[
if test x"$enableval" = x"yes"; then
store_build_config=yes
else
store_build_config=no
fi
],
[store_build_config=no])
AC_MSG_RESULT([${store_build_config}])
AM_CONDITIONAL([STORE_BUILD_CONFIG],[test x"$store_build_config" = x"yes"])
if test x"${store_build_config}" = x"yes"; then
AC_DEFINE([STORE_BUILD_CONFIG], [1], [compile build config into the library object])
fi
AC_SUBST([SUFFIX])
AC_SUBST([PKG_CONFIG_FILE],[elpa${SUFFIX}-${PACKAGE_VERSION}.pc])
......@@ -1385,6 +1407,7 @@ AC_CONFIG_FILES([
${PKG_CONFIG_FILE}:elpa.pc.in
elpa/elpa_constants.h
elpa/elpa_version.h
elpa/elpa_build_config.h
])
m4_include([m4/ax_fc_check_define.m4])
......@@ -1501,8 +1524,6 @@ if test x"${enable_python_tests}" = x"yes"; then
AC_MSG_ERROR([pytest not found.])
fi
fi
AC_OUTPUT
echo ""
......@@ -1552,6 +1573,11 @@ else
else
echo "#undef OPTIONAL_C_ERROR_ARGUMENT" > elpa/elpa_generated_c_api.h
fi
if test x"$store_build_config" = x"yes"; then
cat config.log > elpa_build_object
xxd -i elpa_build_object >> elpa/elpa_build_config.h
fi
make -f $srcdir/generated_headers.am generated-headers top_srcdir="$srcdir" CPP="$CPP"
fi
// The stored build config
......@@ -53,7 +53,7 @@ test/shared/generated.h: $(wildcard $(top_srcdir)/test/shared/*.*90) | test/shar
$(call extract_interface,!c>)
generated_headers += src/elpa_generated_fortran_interfaces.h
src/elpa_generated_fortran_interfaces.h: $(filter-out $(wildcard $(top_srcdir)/src/*generated*), $(wildcard $(top_srcdir)/src/elpa2/kernels/*.c $(top_srcdir)/src/elpa2/kernels/*.s $(top_srcdir)/src/*.[ch] $(top_srcdir)/src/elpa_generalized/*.[ch])) | src
src/elpa_generated_fortran_interfaces.h: $(filter-out $(wildcard $(top_srcdir)/src/*generated*), $(wildcard $(top_srcdir)/src/helpers/*.c $(top_srcdir)/src/elpa2/kernels/*.c $(top_srcdir)/src/elpa2/kernels/*.s $(top_srcdir)/src/*.[ch] $(top_srcdir)/src/elpa_generalized/*.[ch])) | src
@rm -f $@
$(call extract_interface,!f>)
$(call extract_interface,#!f>)
......
......@@ -808,6 +808,7 @@ module elpa_api
write(error_unit, "(a,i0,a)") "ELPA: Error API version ", api_version," is not supported by this library"
error = ELPA_ERROR_API_VERSION
endif
end function
......
......@@ -194,8 +194,7 @@ module elpa_impl
#else
integer, intent(out) :: error
#endif
integer :: error2
integer :: error2, output_build_config
allocate(obj, stat=error2)
if (error2 .ne. 0) then
......@@ -460,9 +459,6 @@ module elpa_impl
end subroutine
#ifdef ENABLE_AUTOTUNING
#ifdef OPTIONAL_C_ERROR_ARGUMENT
!c_o> #ifdef OPTIONAL_C_ERROR_ARGUMENT
......@@ -521,7 +517,7 @@ module elpa_impl
!> \result error integer, the error code
function elpa_setup(self) result(error)
class(elpa_impl_t), intent(inout) :: self
integer :: error, timings, performance
integer :: error, timings, performance, build_config
#ifdef WITH_MPI
integer :: mpi_comm_parent, mpi_comm_rows, mpi_comm_cols, np_rows, np_cols, my_id, &
......@@ -533,6 +529,7 @@ module elpa_impl
#endif
#ifdef HAVE_LIKWID
!initialize likwid
call likwid_markerInit()
......@@ -711,6 +708,18 @@ module elpa_impl
if (check_elpa_set(error, ELPA_ERROR_SETUP)) return
#endif
#if STORE_BUILD_CONFIG
call self%get("output_build_config",build_config, error)
if ( build_config .eq. 1) then
#ifdef WITH_MPI
if (my_id .eq. 0) then
#endif
call print_build_config()
#ifdef WITH_MPI
endif
#endif
endif
#endif
end function
......
......@@ -116,6 +116,7 @@ static int cannon_buffer_size_is_valid(elpa_index_t index, int n, int new_value)
static int na_is_valid(elpa_index_t index, int n, int new_value);
static int nev_is_valid(elpa_index_t index, int n, int new_value);
static int bw_is_valid(elpa_index_t index, int n, int new_value);
static int output_build_config_is_valid(elpa_index_t index, int n, int new_value);
static int gpu_is_valid(elpa_index_t index, int n, int new_value);
static int is_positive(elpa_index_t index, int n, int new_value);
......@@ -187,53 +188,57 @@ static const elpa_index_int_entry_t int_entries[] = {
INT_ANY_ENTRY("mpi_comm_cols", "Communicator for inter-column communication", PRINT_NO),
INT_ANY_ENTRY("mpi_comm_parent", "Parent communicator", PRINT_NO),
INT_ANY_ENTRY("blacs_context", "BLACS context", PRINT_NO),
#ifdef STORE_BUILD_CONFIG
INT_ENTRY("output_build_config", "Output the build config", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
cardinality_bool, enumerate_identity, output_build_config_is_valid, NULL, PRINT_NO),
#endif
INT_ENTRY("solver", "Solver to use", ELPA_SOLVER_1STAGE, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name, PRINT_YES),
INT_ENTRY("gpu", "Use GPU acceleration", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
INT_ENTRY("gpu", "Use GPU acceleration", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
cardinality_bool, enumerate_identity, gpu_is_valid, NULL, PRINT_YES),
//default of gpu ussage for individual phases is 1. However, it is only evaluated, if GPU is used at all, which first has to be determined
//by the parameter gpu and presence of the device
INT_ENTRY("gpu_tridiag", "Use GPU acceleration for ELPA1 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
INT_ENTRY("gpu_tridiag", "Use GPU acceleration for ELPA1 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
cardinality_bool, enumerate_identity, valid_with_gpu_elpa1, NULL, PRINT_YES),
INT_ENTRY("gpu_solve_tridi", "Use GPU acceleration for ELPA solve tridi", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
INT_ENTRY("gpu_solve_tridi", "Use GPU acceleration for ELPA solve tridi", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
cardinality_bool, enumerate_identity, valid_with_gpu, NULL, PRINT_YES),
INT_ENTRY("gpu_trans_ev", "Use GPU acceleration for ELPA1 trans ev", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
INT_ENTRY("gpu_trans_ev", "Use GPU acceleration for ELPA1 trans ev", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
cardinality_bool, enumerate_identity, valid_with_gpu_elpa1, NULL, PRINT_YES),
INT_ENTRY("gpu_bandred", "Use GPU acceleration for ELPA2 band reduction", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
INT_ENTRY("gpu_bandred", "Use GPU acceleration for ELPA2 band reduction", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
INT_ENTRY("gpu_tridiag_band", "Use GPU acceleration for ELPA2 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
INT_ENTRY("gpu_tridiag_band", "Use GPU acceleration for ELPA2 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
INT_ENTRY("gpu_trans_ev_tridi_to_band", "Use GPU acceleration for ELPA2 trans_ev_tridi_to_band", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
INT_ENTRY("gpu_trans_ev_tridi_to_band", "Use GPU acceleration for ELPA2 trans_ev_tridi_to_band", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
INT_ENTRY("gpu_trans_ev_band_to_full", "Use GPU acceleration for ELPA2 trans_ev_band_to_full", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
INT_ENTRY("gpu_trans_ev_band_to_full", "Use GPU acceleration for ELPA2 trans_ev_band_to_full", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
INT_ENTRY("real_kernel", "Real kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_REAL_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, \
number_of_real_kernels, real_kernel_enumerate, real_kernel_is_valid, real_kernel_name, PRINT_YES),
INT_ENTRY("complex_kernel", "Complex kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_COMPLEX_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_COMPLEX, \
number_of_complex_kernels, complex_kernel_enumerate, complex_kernel_is_valid, complex_kernel_name, PRINT_YES),
INT_ENTRY("min_tile_size", "Minimal tile size used internally in elpa1_tridiag and elpa2_bandred", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
INT_ENTRY("min_tile_size", "Minimal tile size used internally in elpa1_tridiag and elpa2_bandred", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
min_tile_size_cardinality, min_tile_size_enumerate, min_tile_size_is_valid, NULL, PRINT_YES),
INT_ENTRY("intermediate_bandwidth", "Specifies the intermediate bandwidth in ELPA2 full->banded step. Must be a multiple of nblk", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
INT_ENTRY("intermediate_bandwidth", "Specifies the intermediate bandwidth in ELPA2 full->banded step. Must be a multiple of nblk", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
intermediate_bandwidth_cardinality, intermediate_bandwidth_enumerate, intermediate_bandwidth_is_valid, NULL, PRINT_YES),
INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_ANY,
INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_ANY, \
band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL, PRINT_YES),
INT_ENTRY("stripewidth_real", "Stripewidth_real, default 48. Must be a multiple of 4", 48, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_REAL,
INT_ENTRY("stripewidth_real", "Stripewidth_real, default 48. Must be a multiple of 4", 48, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_REAL, \
stripewidth_real_cardinality, stripewidth_real_enumerate, stripewidth_real_is_valid, NULL, PRINT_YES),
INT_ENTRY("stripewidth_complex", "Stripewidth_complex, default 96. Must be a multiple of 8", 96, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_COMPLEX,
INT_ENTRY("stripewidth_complex", "Stripewidth_complex, default 96. Must be a multiple of 8", 96, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_COMPLEX, \
stripewidth_complex_cardinality, stripewidth_complex_enumerate, stripewidth_complex_is_valid, NULL, PRINT_YES),
INT_ENTRY("max_stored_rows", "Maximum number of stored rows used in ELPA 1 backtransformation, default 63", 63, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_ANY,
INT_ENTRY("max_stored_rows", "Maximum number of stored rows used in ELPA 1 backtransformation, default 63", 63, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_ANY, \
max_stored_rows_cardinality, max_stored_rows_enumerate, max_stored_rows_is_valid, NULL, PRINT_YES),
#ifdef WITH_OPENMP
INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY,
INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL, PRINT_YES),
#else
INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL, PRINT_YES),
#endif
INT_ENTRY("cannon_buffer_size", "Increasing the buffer size might make it faster, but costs memory", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
INT_ENTRY("cannon_buffer_size", "Increasing the buffer size might make it faster, but costs memory", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
cannon_buffer_size_cardinality, cannon_buffer_size_enumerate, cannon_buffer_size_is_valid, NULL, PRINT_YES),
//BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_REAL),
BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_REAL, PRINT_YES),
......@@ -733,6 +738,10 @@ static int bw_is_valid(elpa_index_t index, int n, int new_value) {
return (0 <= new_value) && (new_value < na);
}
static int output_build_config_is_valid(elpa_index_t index, int n, int new_value) {
return new_value == 0 || new_value == 1;
}
static int gpu_is_valid(elpa_index_t index, int n, int new_value) {
return new_value == 0 || new_value == 1;
}
......
#include "config.h"
#include "elpa/elpa_build_config.h"
#include <stdio.h>
/*
!f>#ifdef STORE_BUILD_CONFIG
!f> interface
!f> subroutine print_build_config() &
!f> bind(C, name="print_build_config")
!f> use, intrinsic :: iso_c_binding
!f> end subroutine
!f> end interface
!f>#endif
*/
void print_build_config(){
#ifdef STORE_BUILD_CONFIG
printf("===============================================================\n");
printf(" Output of the autoconf config.log created at build time \n\n");
printf(" In case of troubles with the ELPA library, please send the \n follwing output together with a problem description \n at elpa-library@mpcdf.mpg.de \n\n");
printf("%s \n",elpa_build_object);
printf("===============================================================\n");
#endif
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment