Commit 460b754c authored by Lorenz Huedepohl's avatar Lorenz Huedepohl

Work-In-Progress commit for new ELPA interface

parent dbe258d9
......@@ -20,6 +20,7 @@ libelpa@SUFFIX@_public_la_SOURCES = \
src/elpa1/legacy_interface/elpa1_auxiliary_legacy.F90 \
src/elpa1/elpa1_auxiliary.F90 \
src/elpa_t.F90 \
src/elpa_constants.F90 \
src/general/elpa_utilities.F90
# internal parts
......@@ -53,8 +54,8 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/elpa2/qr/elpa_pdgeqrf.F90 \
src/elpa1/elpa1.F90 \
src/elpa2/elpa2.F90 \
src/elpa_index.c \
src/elpa_options.c
src/elpa_constants.c \
src/elpa_index.c
EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa1/elpa_reduce_add_vectors.X90 \
......@@ -322,10 +323,9 @@ endif
include generated_headers.am
BUILT_SOURCES = $(generated_headers)
# install public Fortran modules files in the include/ dir
# install public headers and Fortran modules files in the include/ dir
elpa_includedir = $(includedir)/elpa@SUFFIX@-@PACKAGE_VERSION@
nobase_elpa_include_HEADERS = $(wildcard modules/*)
nobase_elpa_include_HEADERS += elpa/elpa.h elpa/elpa_kernel_constants.h elpa/elpa_solver_constants.h elpa/elpa_constants.h elpa/elpa_generated.h
nobase_elpa_include_HEADERS = $(wildcard modules/*) $(wildcard elpa/*)
dist_man_MANS = \
man/solve_evp_real.3 \
......
This diff is collapsed.
#define ELPA_INVALID_INT INT_MIN
#define ELPA_C_ERROR 0
#define ELPA_C_OK 1
#ifdef ELPA_H
#define ELPA_ERROR ELPA_C_ERROR
#define ELPA_OK ELPA_C_OK
#endif
#include <elpa/elpa_kernel_constants.h>
#include <elpa/elpa_solver_constants.h>
#pragma once
/* This might seem over-engineered, but helps to re-use this file also on the
* Fortran side and thus to keep the definitions in this one place here
*/
/* Private helper macros */
#define ELPA_ENUM_ENTRY(name, value) \
name = value,
#define ELPA_ENUM_SUM(name, value) +1
/* Solver constants */
#define ELPA_SOLVER_MAP(X) \
X(ELPA_SOLVER_1STAGE, 1) \
X(ELPA_SOLVER_2STAGE, 2)
enum ELPA_SOLVERS {
ELPA_SOLVER_MAP(ELPA_ENUM_ENTRY)
};
#define ELPA_NUMBER_OF_SOLVERS (0 ELPA_SOLVER_MAP(ELPA_ENUM_SUM))
/* Kernel constants */
#define ELPA_REAL_KERNEL_MAP(X) \
X(ELPA_2STAGE_REAL_GENERIC, 1) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE, 2) \
X(ELPA_2STAGE_REAL_BGP, 3) \
X(ELPA_2STAGE_REAL_BGQ, 4) \
X(ELPA_2STAGE_REAL_SSE, 5) \
X(ELPA_2STAGE_REAL_SSE_BLOCK2, 6) \
X(ELPA_2STAGE_REAL_SSE_BLOCK4, 7) \
X(ELPA_2STAGE_REAL_SSE_BLOCK6, 8) \
X(ELPA_2STAGE_REAL_AVX_BLOCK2, 9) \
X(ELPA_2STAGE_REAL_AVX_BLOCK4, 10) \
X(ELPA_2STAGE_REAL_AVX_BLOCK6, 11) \
X(ELPA_2STAGE_REAL_AVX2_BLOCK2, 12) \
X(ELPA_2STAGE_REAL_AVX2_BLOCK4, 13) \
X(ELPA_2STAGE_REAL_AVX2_BLOCK6, 14) \
X(ELPA_2STAGE_REAL_AVX512_BLOCK2, 15) \
X(ELPA_2STAGE_REAL_AVX512_BLOCK4, 16) \
X(ELPA_2STAGE_REAL_AVX512_BLOCK6, 17) \
X(ELPA_2STAGE_REAL_GPU, 18) \
X(ELPA_2STAGE_REAL_DEFAULT, @ELPA_2STAGE_REAL_DEFAULT@)
enum ELPA_REAL_KERNELS {
ELPA_REAL_KERNEL_MAP(ELPA_ENUM_ENTRY)
};
#define ELPA_COMPLEX_KERNEL_MAP(X) \
X(ELPA_2STAGE_COMPLEX_GENERIC, 1) \
X(ELPA_2STAGE_COMPLEX_GENERIC_SIMPLE, 2) \
X(ELPA_2STAGE_COMPLEX_BGP, 3) \
X(ELPA_2STAGE_COMPLEX_BGQ, 4) \
X(ELPA_2STAGE_COMPLEX_SSE, 5) \
X(ELPA_2STAGE_COMPLEX_SSE_BLOCK1, 6) \
X(ELPA_2STAGE_COMPLEX_SSE_BLOCK2, 7) \
X(ELPA_2STAGE_COMPLEX_AVX_BLOCK1, 8) \
X(ELPA_2STAGE_COMPLEX_AVX_BLOCK2, 9) \
X(ELPA_2STAGE_COMPLEX_AVX2_BLOCK1, 10) \
X(ELPA_2STAGE_COMPLEX_AVX2_BLOCK2, 11) \
X(ELPA_2STAGE_COMPLEX_AVX512_BLOCK1, 12) \
X(ELPA_2STAGE_COMPLEX_AVX512_BLOCK2, 13) \
X(ELPA_2STAGE_COMPLEX_GPU, 14) \
X(ELPA_2STAGE_COMPLEX_DEFAULT, @ELPA_2STAGE_COMPLEX_DEFAULT@)
enum ELPA_COMPLEX_KERNELS {
ELPA_COMPLEX_KERNEL_MAP(ELPA_ENUM_ENTRY)
};
/* General constants */
#define ELPA_CONSTANTS_MAP(X) \
X(ELPA_ERROR, -1) \
X(ELPA_NO, 0) \
X(ELPA_OK, 1)
enum ELPA_CONSTANTS {
ELPA_CONSTANTS_MAP(ELPA_ENUM_ENTRY)
/* -1 to take out the default kernel again */
ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS = (0 ELPA_COMPLEX_KERNEL_MAP(ELPA_ENUM_SUM) -1),
ELPA_2STAGE_NUMBER_OF_REAL_KERNELS = (0 ELPA_REAL_KERNEL_MAP(ELPA_ENUM_SUM) -1),
};
#define ELPA_C_2STAGE_REAL_GENERIC 1
#define ELPA_C_2STAGE_REAL_GENERIC_SIMPLE 2
#define ELPA_C_2STAGE_REAL_BGP 3
#define ELPA_C_2STAGE_REAL_BGQ 4
#define ELPA_C_2STAGE_REAL_SSE 5
#define ELPA_C_2STAGE_REAL_SSE_BLOCK2 6
#define ELPA_C_2STAGE_REAL_SSE_BLOCK4 7
#define ELPA_C_2STAGE_REAL_SSE_BLOCK6 8
#define ELPA_C_2STAGE_REAL_AVX_BLOCK2 9
#define ELPA_C_2STAGE_REAL_AVX_BLOCK4 10
#define ELPA_C_2STAGE_REAL_AVX_BLOCK6 11
#define ELPA_C_2STAGE_REAL_AVX2_BLOCK2 12
#define ELPA_C_2STAGE_REAL_AVX2_BLOCK4 13
#define ELPA_C_2STAGE_REAL_AVX2_BLOCK6 14
#define ELPA_C_2STAGE_REAL_AVX512_BLOCK2 15
#define ELPA_C_2STAGE_REAL_AVX512_BLOCK4 16
#define ELPA_C_2STAGE_REAL_AVX512_BLOCK6 17
#define ELPA_C_2STAGE_REAL_GPU 18
#define ELPA_C_2STAGE_NUMBER_OF_REAL_KERNELS 18
#define ELPA_C_2STAGE_COMPLEX_GENERIC 1
#define ELPA_C_2STAGE_COMPLEX_GENERIC_SIMPLE 2
#define ELPA_C_2STAGE_COMPLEX_BGP 3
#define ELPA_C_2STAGE_COMPLEX_BGQ 4
#define ELPA_C_2STAGE_COMPLEX_SSE 5
#define ELPA_C_2STAGE_COMPLEX_SSE_BLOCK1 6
#define ELPA_C_2STAGE_COMPLEX_SSE_BLOCK2 7
#define ELPA_C_2STAGE_COMPLEX_AVX_BLOCK1 8
#define ELPA_C_2STAGE_COMPLEX_AVX_BLOCK2 9
#define ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK1 10
#define ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK2 11
#define ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK1 12
#define ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK2 13
#define ELPA_C_2STAGE_COMPLEX_GPU 14
#define ELPA_C_2STAGE_NUMBER_OF_COMPLEX_KERNELS 14
#if defined(WITH_REAL_AVX_BLOCK2_KERNEL)
#ifndef WITH_ONE_SPECIFIC_REAL_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GENERIC
#else /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#ifdef WITH_REAL_GENERIC_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GENERIC
#endif
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GENERIC_SIMPLE
#endif
#ifdef WITH_REAL_SSE_ASSEMBLY_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE
#endif
#if defined(WITH_REAL_SSE_BLOCK2_KERNEL) || defined(WITH_REAL_SSE_BLOCK4_KERNEL) || defined(WITH_REAL_SSE_BLOCK6_KERNEL)
#ifdef WITH_REAL_SSE_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE_BLOCK6
#else
#ifdef WITH_REAL_SSE_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE_BLOCK4
#else
#ifdef WITH_REAL_SSE_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_SSE_BLOCK2_KERNEL) || defined(WITH_REAL_SSE_BLOCK4_KERNEL) || defined(WITH_REAL_SSE_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BLOCK6
#else
#ifdef WITH_REAL_AVX_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BLOCK4
#else
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX2_BLOCK2_KERNEL) || defined(WITH_REAL_AVX2_BLOCK4_KERNEL) || defined(WITH_REAL_AVX2_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX2_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX2_BLOCK6
#else
#ifdef WITH_REAL_AVX2_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX2_BLOCK4
#else
#ifdef WITH_REAL_AVX2_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX2_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX2_BLOCK2_KERNEL) || defined(WITH_REAL_AVX2_BLOCK4_KERNEL) || defined(WITH_REAL_AVX2_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX512_BLOCK2_KERNEL) || defined(WITH_REAL_AVX512_BLOCK4_KERNEL) || defined(WITH_REAL_AVX512_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX512_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX512_BLOCK6
#else
#ifdef WITH_REAL_AVX512_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX512_BLOCK4
#else
#ifdef WITH_REAL_AVX512_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX512_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX512_BLOCK2_KERNEL) || defined(WITH_REAL_AVX512_BLOCK4_KERNEL) || defined(WITH_REAL_AVX512_BLOCK6_KERNEL) */
#ifdef WITH_REAL_BGP_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BGP
#endif
#ifdef WITH_REAL_BGQ_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BGQ
#endif
#ifdef WITH_GPU_VERSION
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GPU
#endif
#endif /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#else /* WITH_REAL_AVX_BLOCK2_KERNEL */
#ifndef WITH_ONE_SPECIFIC_REAL_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GENERIC
#else /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#ifdef WITH_REAL_GENERIC_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GENERIC
#endif
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GENERIC_SIMPLE
#endif
#ifdef WITH_REAL_SSE_ASSEMBLY_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE
#endif
#if defined(WITH_REAL_SSE_BLOCK2_KERNEL) || defined(WITH_REAL_SSE_BLOCK4_KERNEL) || defined(WITH_REAL_SSE_BLOCK6_KERNEL)
#ifdef WITH_REAL_SSE_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE_BLOCK6
#else
#ifdef WITH_REAL_SSE_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE_BLOCK4
#else
#ifdef WITH_REAL_SSE_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_SSE_BLOCK2_KERNEL) || defined(WITH_REAL_SSE_BLOCK4_KERNEL) || defined(WITH_REAL_SSE_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BLOCK6
#else
#ifdef WITH_REAL_AVX_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BLOCK4
#else
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX2_BLOCK2_KERNEL) || defined(WITH_REAL_AVX2_BLOCK4_KERNEL) || defined(WITH_REAL_AVX2_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX2_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX2_BLOCK6
#else
#ifdef WITH_REAL_AVX2_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX2_BLOCK4
#else
#ifdef WITH_REAL_AVX2_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX2_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX2_BLOCK2_KERNEL) || defined(WITH_REAL_AVX2_BLOCK4_KERNEL) || defined(WITH_REAL_AVX2_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX512_BLOCK2_KERNEL) || defined(WITH_REAL_AVX512_BLOCK4_KERNEL) || defined(WITH_REAL_AVX512_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX512_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX512_BLOCK6
#else
#ifdef WITH_REAL_AVX512_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX512_BLOCK4
#else
#ifdef WITH_REAL_AVX512_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX512_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX512_BLOCK2_KERNEL) || defined(WITH_REAL_AVX512_BLOCK4_KERNEL) || defined(WITH_REAL_AVX512_BLOCK6_KERNEL) */
#ifdef WITH_REAL_BGP_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BGP
#endif
#ifdef WITH_REAL_BGQ_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BGQ
#endif
#ifdef WITH_GPU_VERSION
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GPU
#endif
#endif /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#endif /* WITH_REAL_AVX_BLOCK2_KERNEL */
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL)
#ifndef WITH_ONE_SPECIFIC_COMPLEX_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GENERIC
#else /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#ifdef WITH_COMPLEX_GENERIC_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GENERIC
#endif
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GENERIC_SIMPLE
#endif
#ifdef WITH_COMPLEX_SSE_ASSEMBLY_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_SSE
#endif
#if defined(WITH_COMPLEX_SSE_BLOCK1_KERNEL) || defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_SSE_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_SSE_BLOCK2
#else
#ifdef WITH_COMPLEX_SSE_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_SSE_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEXL_SSE_BLOCK1_KERNEL) || defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX2_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX2_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX512_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX512_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX512_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEX_AVX512_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL) */
#ifdef WITH_GPU_VERSION
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GPU
#endif
#endif /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#else /* WITH_COMPLEX_AVX_BLOCK1_KERNEL */
#ifndef WITH_ONE_SPECIFIC_COMPLEX_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GENERIC
#else /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#ifdef WITH_COMPLEX_GENERIC_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GENERIC
#endif
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GENERIC_SIMPLE
#endif
#ifdef WITH_COMPLEX_SSE_ASSEMBLY_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_SSE
#endif
#if defined(WITH_COMPLEX_SSE_BLOCK1_KERNEL) || defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_SSE_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_SSE_BLOCK2
#else
#ifdef WITH_COMPLEX_SSE_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_SSE_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEXL_SSE_BLOCK1_KERNEL) || defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX2_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX2_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX512_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX512_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX512_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEX_AVX512_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL) */
#ifdef WITH_GPU_VERSION
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GPU
#endif
#endif /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_AVX_BLOCK1_KERNEL */
#ifdef ELPA_H
#define ELPA_2STAGE_REAL_GENERIC ELPA_C_2STAGE_REAL_GENERIC
#define ELPA_2STAGE_REAL_GENERIC_SIMPLE ELPA_C_2STAGE_REAL_GENERIC_SIMPLE
#define ELPA_2STAGE_REAL_BGP ELPA_C_2STAGE_REAL_BGP
#define ELPA_2STAGE_REAL_BGQ ELPA_C_2STAGE_REAL_BGQ
#define ELPA_2STAGE_REAL_SSE ELPA_C_2STAGE_REAL_SSE
#define ELPA_2STAGE_REAL_SSE_BLOCK2 ELPA_C_2STAGE_REAL_SSE_BLOCK2
#define ELPA_2STAGE_REAL_SSE_BLOCK4 ELPA_C_2STAGE_REAL_SSE_BLOCK4
#define ELPA_2STAGE_REAL_SSE_BLOCK6 ELPA_C_2STAGE_REAL_SSE_BLOCK6
#define ELPA_2STAGE_REAL_AVX_BLOCK2 ELPA_C_2STAGE_REAL_AVX_BLOCK2
#define ELPA_2STAGE_REAL_AVX_BLOCK4 ELPA_C_2STAGE_REAL_AVX_BLOCK4
#define ELPA_2STAGE_REAL_AVX_BLOCK6 ELPA_C_2STAGE_REAL_AVX_BLOCK6
#define ELPA_2STAGE_REAL_AVX2_BLOCK2 ELPA_C_2STAGE_REAL_AVX2_BLOCK2
#define ELPA_2STAGE_REAL_AVX2_BLOCK4 ELPA_C_2STAGE_REAL_AVX2_BLOCK4
#define ELPA_2STAGE_REAL_AVX2_BLOCK6 ELPA_C_2STAGE_REAL_AVX2_BLOCK6
#define ELPA_2STAGE_REAL_AVX512_BLOCK2 ELPA_C_2STAGE_REAL_AVX512_BLOCK2
#define ELPA_2STAGE_REAL_AVX512_BLOCK4 ELPA_C_2STAGE_REAL_AVX512_BLOCK4
#define ELPA_2STAGE_REAL_AVX512_BLOCK6 ELPA_C_2STAGE_REAL_AVX512_BLOCK6
#define ELPA_2STAGE_REAL_GPU ELPA_C_2STAGE_REAL_GPU
#define ELPA_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_DEFAULT
#define ELPA_2STAGE_NUMBER_OF_REAL_KERNELS ELPA_C_2STAGE_NUMBER_OF_REAL_KERNELS
#define ELPA_2STAGE_COMPLEX_GENERIC ELPA_C_2STAGE_COMPLEX_GENERIC
#define ELPA_2STAGE_COMPLEX_GENERIC_SIMPLE ELPA_C_2STAGE_COMPLEX_GENERIC_SIMPLE
#define ELPA_2STAGE_COMPLEX_BGP ELPA_C_2STAGE_COMPLEX_BGP
#define ELPA_2STAGE_COMPLEX_BGQ ELPA_C_2STAGE_COMPLEX_BGQ
#define ELPA_2STAGE_COMPLEX_SSE ELPA_C_2STAGE_COMPLEX_SSE
#define ELPA_2STAGE_COMPLEX_SSE_BLOCK1 ELPA_C_2STAGE_COMPLEX_SSE_BLOCK1
#define ELPA_2STAGE_COMPLEX_SSE_BLOCK2 ELPA_C_2STAGE_COMPLEX_SSE_BLOCK2
#define ELPA_2STAGE_COMPLEX_AVX_BLOCK1 ELPA_C_2STAGE_COMPLEX_AVX_BLOCK1
#define ELPA_2STAGE_COMPLEX_AVX_BLOCK2 ELPA_C_2STAGE_COMPLEX_AVX_BLOCK2
#define ELPA_2STAGE_COMPLEX_AVX2_BLOCK1 ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK1
#define ELPA_2STAGE_COMPLEX_AVX2_BLOCK2 ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK2
#define ELPA_2STAGE_COMPLEX_AVX512_BLOCK1 ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK1
#define ELPA_2STAGE_COMPLEX_AVX512_BLOCK2 ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK2
#define ELPA_2STAGE_COMPLEX_GPU ELPA_C_2STAGE_COMPLEX_GPU
#define ELPA_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_DEFAULT
#define ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS ELPA_C_2STAGE_NUMBER_OF_COMPLEX_KERNELS
#endif
#define ELPA_C_SOLVER_1STAGE 1
#define ELPA_C_SOLVER_2STAGE 2
#define ELPA_C_NUMBER_OF_SOLVERS 2
#ifdef ELPA_H
#define ELPA_SOLVER_1STAGE ELPA_C_SOLVER_1STAGE
#define ELPA_SOLVER_2STAGE ELPA_C_SOLVER_2STAGE
#define ELPA_NUMBER_OF_SOLVERS ELPA_C_NUMBER_OF_SOLVERS
#endif
......@@ -28,7 +28,13 @@ elpa/elpa_generated_fortran_interfaces.h: $(wildcard $(top_srcdir)/src/elpa2/ker
$(call extract_interface,!f>)
$(call extract_interface,#!f>)
generated_headers= config-f90.h elpa/elpa_generated.h test/shared/generated.h elpa/elpa_generated_fortran_interfaces.h
src/fortran_constants.X90: $(top_srcdir)/src/fortran_constants.h
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -I$(top_srcdir)/ -I. $< -o $@_ || { rm -f $@; exit 1; }
awk '/!ELPA_C_DEFINE/ {gsub(/!ELPA_C_DEFINE/, "\n"); gsub(/NEWLINE/, "\n"); print;}' < $@_ > $@ || { rm -f $@; exit 1; }
rm $@_
generated_headers= config-f90.h elpa/elpa_generated.h test/shared/generated.h elpa/elpa_generated_fortran_interfaces.h src/fortran_constants.X90
generated-headers: $(generated_headers)
......
dnl macro for testing whether the user wanted to compile only with the GPU version
dnl usage: DEFINE_OPTION([gpu-support-only],[gpu-support],[with_gpu_support],[install_gpu])
AC_DEFUN([DEFINE_OPTION_GPU_SUPPORT_ONLY],[
AC_ARG_WITH([$1],
AS_HELP_STRING([--with-$1],
[only compile $2 ]),
[with_option=yes],[with_option=no])
if test x"${with_option}" = x"yes" ; then
dnl make sure that all the other kernels are unset
install_real_generic=no
install_real_generic_simple=no
install_real_sse=no
install_real_bgp=no
install_real_bgq=no
install_real_avx_block2=no
install_real_avx_block4=no
install_real_avx_block6=no
install_complex_generic=no
install_complex_generic_simple=no
install_complex_sse=no
install_complex_bgp=no
install_complex_bgq=no
install_complex_avx_block1=no
install_complex_avx_block2=no
install_gpu=yes
want_avx=no
build_with_gpu_support_only=yes
use_specific_complex_kernel=yes
use_specific_real_kernel=yes
dnl now set the specific kernel
$3=yes
AC_MSG_NOTICE([ELPA will be build only with $1])
else
build_with_gpu_support_only=no
fi
])
dnl macro for testing whether the user wanted to compile only with one
dnl specific real kernel
dnl usage: DEFINE_OPTION([real-generic-kernel-only],[generic-kernel],[with_real_generic_kernel],[install_real_generic])
AC_DEFUN([DEFINE_OPTION_SPECIFIC_REAL_KERNEL],[
AC_ARG_WITH([$1],
AS_HELP_STRING([--with-$1],
[only compile $2 for real case]),
[with_option=yes],[with_option=no])
if test x"${with_option}" = x"yes" ; then
if test x"${use_specific_real_kernel}" = x"no" ; then
dnl make sure that all the other kernels are unset
install_real_generic=no
install_real_generic_simple=no
install_real_sse_assembly=no
install_real_bgp=no
install_real_bgq=no
install_real_sse_block2=no
install_real_sse_block4=no
install_real_sse_block6=no
install_real_avx_block2=no
install_real_avx_block4=no
install_real_avx_block6=no
install_real_avx2_block2=no
install_real_avx2_block4=no
install_real_avx2_block6=no
install_real_avx512_block2=no
install_real_avx512_block4=no
install_real_avx512_block6=no
want_sse=no
want_avx=no
want_avx2=no