Commit f91c0b4b authored by Lorenz Huedepohl's avatar Lorenz Huedepohl
Browse files

Initial version of new ELPA API

This attempt at a new, more flexible API for ELPA should hopefully
result in less ABI/API breaking changes in the future.

The new API features a generic key/value system for options that can be
extended without changing any exported symbols or function signatures,
so that new, optional features do not influence existing usage of ELPA.

We hope this makes life easier for users of ELPA - at least in the long
run when they migrated to this newest of ABI changes :)

Example usage (explicit documentation to be done in a future commit):

   if (elpa_init(20170403) /= ELPA_OK) then
     error stop "ELPA API version not supported"
   endif

   e = elpa_create(na, nev, na_rows, na_cols, nblk, mpi_comm_world, my_prow, my_pcol, success)

   call e%set("solver", ELPA_SOLVER_2STAGE)
   call e%set("real_kernel", ELPA_2STAGE_REAL_GENERIC)

   call e%solve(a, ev, z, success)

   call e%destroy()

   call elpa_uninit()
parent 5e313282
...@@ -17,7 +17,6 @@ libelpa@SUFFIX@_public_la_SOURCES = \ ...@@ -17,7 +17,6 @@ libelpa@SUFFIX@_public_la_SOURCES = \
src/elpa1_auxiliary.F90 \ src/elpa1_auxiliary.F90 \
src/elpa1_utilities.F90 \ src/elpa1_utilities.F90 \
src/elpa2_utilities.F90 \ src/elpa2_utilities.F90 \
src/elpa_init.F90 \
src/elpa_t.F90 \ src/elpa_t.F90 \
src/elpa_utilities.F90 src/elpa_utilities.F90
...@@ -45,7 +44,8 @@ libelpa@SUFFIX@_private_la_SOURCES = \ ...@@ -45,7 +44,8 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/elpa_qr/qr_utils.F90 \ src/elpa_qr/qr_utils.F90 \
src/elpa_qr/elpa_qrkernels.F90 \ src/elpa_qr/elpa_qrkernels.F90 \
src/elpa_qr/elpa_pdlarfb.F90 \ src/elpa_qr/elpa_pdlarfb.F90 \
src/elpa_qr/elpa_pdgeqrf.F90 src/elpa_qr/elpa_pdgeqrf.F90 \
src/elpa_options.c
EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa_reduce_add_vectors.X90 \ src/elpa_reduce_add_vectors.X90 \
...@@ -301,7 +301,7 @@ BUILT_SOURCES = $(generated_headers) ...@@ -301,7 +301,7 @@ BUILT_SOURCES = $(generated_headers)
# install public Fortran modules files in the include/ dir # install public Fortran modules files in the include/ dir
elpa_includedir = $(includedir)/elpa@SUFFIX@-@PACKAGE_VERSION@ elpa_includedir = $(includedir)/elpa@SUFFIX@-@PACKAGE_VERSION@
nobase_elpa_include_HEADERS = $(wildcard modules/*) nobase_elpa_include_HEADERS = $(wildcard modules/*)
nobase_elpa_include_HEADERS += elpa/elpa.h elpa/elpa_kernel_constants.h elpa/elpa_generated.h nobase_elpa_include_HEADERS += elpa/elpa.h elpa/elpa_kernel_constants.h elpa/elpa_solver_constants.h elpa/elpa_constants.h elpa/elpa_generated.h
dist_man_MANS = \ dist_man_MANS = \
man/solve_evp_real.3 \ man/solve_evp_real.3 \
......
#include <elpa/elpa_kernel_constants.h> #ifndef ELPA_H
#define ELPA_H
#include <limits.h>
#include <elpa/elpa_constants.h>
#include <elpa/elpa_generated.h> #include <elpa/elpa_generated.h>
#endif
#define ELPA_INVALID_INT INT_MIN
#define ELPA_C_ERROR 0
#define ELPA_C_OK 1
#ifdef ELPA_H
#define ELPA_ERROR ELPA_C_ERROR
#define ELPA_OK ELPA_C_OK
#endif
#include <elpa/elpa_kernel_constants.h>
#include <elpa/elpa_solver_constants.h>
#define ELPA2_REAL_KERNEL_GENERIC 1 #define ELPA_C_2STAGE_REAL_GENERIC 1
#define ELPA2_REAL_KERNEL_GENERIC_SIMPLE 2 #define ELPA_C_2STAGE_REAL_GENERIC_SIMPLE 2
#define ELPA2_REAL_KERNEL_BGP 3 #define ELPA_C_2STAGE_REAL_BGP 3
#define ELPA2_REAL_KERNEL_BGQ 4 #define ELPA_C_2STAGE_REAL_BGQ 4
#define ELPA2_REAL_KERNEL_SSE 5 #define ELPA_C_2STAGE_REAL_SSE 5
#define ELPA2_REAL_KERNEL_SSE_BLOCK2 6 #define ELPA_C_2STAGE_REAL_SSE_BLOCK2 6
#define ELPA2_REAL_KERNEL_SSE_BLOCK4 7 #define ELPA_C_2STAGE_REAL_SSE_BLOCK4 7
#define ELPA2_REAL_KERNEL_SSE_BLOCK6 8 #define ELPA_C_2STAGE_REAL_SSE_BLOCK6 8
#define ELPA2_REAL_KERNEL_AVX_BLOCK2 9 #define ELPA_C_2STAGE_REAL_AVX_BLOCK2 9
#define ELPA2_REAL_KERNEL_AVX_BLOCK4 10 #define ELPA_C_2STAGE_REAL_AVX_BLOCK4 10
#define ELPA2_REAL_KERNEL_AVX_BLOCK6 11 #define ELPA_C_2STAGE_REAL_AVX_BLOCK6 11
#define ELPA2_REAL_KERNEL_AVX2_BLOCK2 12 #define ELPA_C_2STAGE_REAL_AVX2_BLOCK2 12
#define ELPA2_REAL_KERNEL_AVX2_BLOCK4 13 #define ELPA_C_2STAGE_REAL_AVX2_BLOCK4 13
#define ELPA2_REAL_KERNEL_AVX2_BLOCK6 14 #define ELPA_C_2STAGE_REAL_AVX2_BLOCK6 14
#define ELPA2_REAL_KERNEL_AVX512_BLOCK2 15 #define ELPA_C_2STAGE_REAL_AVX512_BLOCK2 15
#define ELPA2_REAL_KERNEL_AVX512_BLOCK4 16 #define ELPA_C_2STAGE_REAL_AVX512_BLOCK4 16
#define ELPA2_REAL_KERNEL_AVX512_BLOCK6 17 #define ELPA_C_2STAGE_REAL_AVX512_BLOCK6 17
#define ELPA2_REAL_KERNEL_GPU 18 #define ELPA_C_2STAGE_REAL_GPU 18
#define ELPA2_NUMBER_OF_REAL_KERNELS 18 #define ELPA_C_2STAGE_NUMBER_OF_REAL_KERNELS 18
#define ELPA2_COMPLEX_KERNEL_GENERIC 1 #define ELPA_C_2STAGE_COMPLEX_GENERIC 1
#define ELPA2_COMPLEX_KERNEL_GENERIC_SIMPLE 2 #define ELPA_C_2STAGE_COMPLEX_GENERIC_SIMPLE 2
#define ELPA2_COMPLEX_KERNEL_BGP 3 #define ELPA_C_2STAGE_COMPLEX_BGP 3
#define ELPA2_COMPLEX_KERNEL_BGQ 4 #define ELPA_C_2STAGE_COMPLEX_BGQ 4
#define ELPA2_COMPLEX_KERNEL_SSE 5 #define ELPA_C_2STAGE_COMPLEX_SSE 5
#define ELPA2_COMPLEX_KERNEL_SSE_BLOCK1 6 #define ELPA_C_2STAGE_COMPLEX_SSE_BLOCK1 6
#define ELPA2_COMPLEX_KERNEL_SSE_BLOCK2 7 #define ELPA_C_2STAGE_COMPLEX_SSE_BLOCK2 7
#define ELPA2_COMPLEX_KERNEL_AVX_BLOCK1 8 #define ELPA_C_2STAGE_COMPLEX_AVX_BLOCK1 8
#define ELPA2_COMPLEX_KERNEL_AVX_BLOCK2 9 #define ELPA_C_2STAGE_COMPLEX_AVX_BLOCK2 9
#define ELPA2_COMPLEX_KERNEL_AVX2_BLOCK1 10 #define ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK1 10
#define ELPA2_COMPLEX_KERNEL_AVX2_BLOCK2 11 #define ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK2 11
#define ELPA2_COMPLEX_KERNEL_AVX512_BLOCK1 12 #define ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK1 12
#define ELPA2_COMPLEX_KERNEL_AVX512_BLOCK2 13 #define ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK2 13
#define ELPA_C_2STAGE_COMPLEX_GPU 14
#define ELPA2_COMPLEX_KERNEL_GPU 14
#define ELPA_C_2STAGE_NUMBER_OF_COMPLEX_KERNELS 14
#define ELPA2_NUMBER_OF_COMPLEX_KERNELS 14
#if defined(WITH_REAL_AVX_BLOCK2_KERNEL)
#ifndef WITH_ONE_SPECIFIC_REAL_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GENERIC
#else /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#ifdef WITH_REAL_GENERIC_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GENERIC
#endif
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GENERIC_SIMPLE
#endif
#ifdef WITH_REAL_SSE_ASSEMBLY_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE
#endif
#if defined(WITH_REAL_SSE_BLOCK2_KERNEL) || defined(WITH_REAL_SSE_BLOCK4_KERNEL) || defined(WITH_REAL_SSE_BLOCK6_KERNEL)
#ifdef WITH_REAL_SSE_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE_BLOCK6
#else
#ifdef WITH_REAL_SSE_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE_BLOCK4
#else
#ifdef WITH_REAL_SSE_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_SSE_BLOCK2_KERNEL) || defined(WITH_REAL_SSE_BLOCK4_KERNEL) || defined(WITH_REAL_SSE_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BLOCK6
#else
#ifdef WITH_REAL_AVX_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BLOCK4
#else
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX2_BLOCK2_KERNEL) || defined(WITH_REAL_AVX2_BLOCK4_KERNEL) || defined(WITH_REAL_AVX2_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX2_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX2_BLOCK6
#else
#ifdef WITH_REAL_AVX2_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX2_BLOCK4
#else
#ifdef WITH_REAL_AVX2_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX2_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX2_BLOCK2_KERNEL) || defined(WITH_REAL_AVX2_BLOCK4_KERNEL) || defined(WITH_REAL_AVX2_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX512_BLOCK2_KERNEL) || defined(WITH_REAL_AVX512_BLOCK4_KERNEL) || defined(WITH_REAL_AVX512_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX512_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX512_BLOCK6
#else
#ifdef WITH_REAL_AVX512_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX512_BLOCK4
#else
#ifdef WITH_REAL_AVX512_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX512_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX512_BLOCK2_KERNEL) || defined(WITH_REAL_AVX512_BLOCK4_KERNEL) || defined(WITH_REAL_AVX512_BLOCK6_KERNEL) */
#ifdef WITH_REAL_BGP_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BGP
#endif
#ifdef WITH_REAL_BGQ_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BGQ
#endif
#ifdef WITH_GPU_VERSION
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GPU
#endif
#endif /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#else /* WITH_REAL_AVX_BLOCK2_KERNEL */
#ifndef WITH_ONE_SPECIFIC_REAL_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GENERIC
#else /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#ifdef WITH_REAL_GENERIC_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GENERIC
#endif
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GENERIC_SIMPLE
#endif
#ifdef WITH_REAL_SSE_ASSEMBLY_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE
#endif
#if defined(WITH_REAL_SSE_BLOCK2_KERNEL) || defined(WITH_REAL_SSE_BLOCK4_KERNEL) || defined(WITH_REAL_SSE_BLOCK6_KERNEL)
#ifdef WITH_REAL_SSE_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE_BLOCK6
#else
#ifdef WITH_REAL_SSE_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE_BLOCK4
#else
#ifdef WITH_REAL_SSE_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_SSE_BLOCK2_KERNEL) || defined(WITH_REAL_SSE_BLOCK4_KERNEL) || defined(WITH_REAL_SSE_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BLOCK6
#else
#ifdef WITH_REAL_AVX_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BLOCK4
#else
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX2_BLOCK2_KERNEL) || defined(WITH_REAL_AVX2_BLOCK4_KERNEL) || defined(WITH_REAL_AVX2_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX2_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX2_BLOCK6
#else
#ifdef WITH_REAL_AVX2_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX2_BLOCK4
#else
#ifdef WITH_REAL_AVX2_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX2_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX2_BLOCK2_KERNEL) || defined(WITH_REAL_AVX2_BLOCK4_KERNEL) || defined(WITH_REAL_AVX2_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX512_BLOCK2_KERNEL) || defined(WITH_REAL_AVX512_BLOCK4_KERNEL) || defined(WITH_REAL_AVX512_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX512_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX512_BLOCK6
#else
#ifdef WITH_REAL_AVX512_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX512_BLOCK4
#else
#ifdef WITH_REAL_AVX512_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX512_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX512_BLOCK2_KERNEL) || defined(WITH_REAL_AVX512_BLOCK4_KERNEL) || defined(WITH_REAL_AVX512_BLOCK6_KERNEL) */
#ifdef WITH_REAL_BGP_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BGP
#endif
#ifdef WITH_REAL_BGQ_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BGQ
#endif
#ifdef WITH_GPU_VERSION
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GPU
#endif
#endif /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#endif /* WITH_REAL_AVX_BLOCK2_KERNEL */
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL)
#ifndef WITH_ONE_SPECIFIC_COMPLEX_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GENERIC
#else /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#ifdef WITH_COMPLEX_GENERIC_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GENERIC
#endif
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GENERIC_SIMPLE
#endif
#ifdef WITH_COMPLEX_SSE_ASSEMBLY_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_SSE
#endif
#if defined(WITH_COMPLEX_SSE_BLOCK1_KERNEL) || defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_SSE_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_SSE_BLOCK2
#else
#ifdef WITH_COMPLEX_SSE_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_SSE_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEXL_SSE_BLOCK1_KERNEL) || defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX2_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX2_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX512_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX512_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX512_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEX_AVX512_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL) */
#ifdef WITH_GPU_VERSION
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GPU
#endif
#endif /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#else /* WITH_COMPLEX_AVX_BLOCK1_KERNEL */
#ifndef WITH_ONE_SPECIFIC_COMPLEX_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GENERIC
#else /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#ifdef WITH_COMPLEX_GENERIC_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GENERIC
#endif
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GENERIC_SIMPLE
#endif
#ifdef WITH_COMPLEX_SSE_ASSEMBLY_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_SSE
#endif
#if defined(WITH_COMPLEX_SSE_BLOCK1_KERNEL) || defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_SSE_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_SSE_BLOCK2
#else
#ifdef WITH_COMPLEX_SSE_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_SSE_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEXL_SSE_BLOCK1_KERNEL) || defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX2_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX2_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX512_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX512_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX512_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEX_AVX512_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL) */
#ifdef WITH_GPU_VERSION
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GPU
#endif
#endif /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_AVX_BLOCK1_KERNEL */
#ifdef ELPA_H
#define ELPA_2STAGE_REAL_GENERIC ELPA_C_2STAGE_REAL_GENERIC
#define ELPA_2STAGE_REAL_GENERIC_SIMPLE ELPA_C_2STAGE_REAL_GENERIC_SIMPLE
#define ELPA_2STAGE_REAL_BGP ELPA_C_2STAGE_REAL_BGP
#define ELPA_2STAGE_REAL_BGQ ELPA_C_2STAGE_REAL_BGQ
#define ELPA_2STAGE_REAL_SSE ELPA_C_2STAGE_REAL_SSE
#define ELPA_2STAGE_REAL_SSE_BLOCK2 ELPA_C_2STAGE_REAL_SSE_BLOCK2
#define ELPA_2STAGE_REAL_SSE_BLOCK4 ELPA_C_2STAGE_REAL_SSE_BLOCK4
#define ELPA_2STAGE_REAL_SSE_BLOCK6 ELPA_C_2STAGE_REAL_SSE_BLOCK6
#define ELPA_2STAGE_REAL_AVX_BLOCK2 ELPA_C_2STAGE_REAL_AVX_BLOCK2
#define ELPA_2STAGE_REAL_AVX_BLOCK4 ELPA_C_2STAGE_REAL_AVX_BLOCK4
#define ELPA_2STAGE_REAL_AVX_BLOCK6 ELPA_C_2STAGE_REAL_AVX_BLOCK6
#define ELPA_2STAGE_REAL_AVX2_BLOCK2 ELPA_C_2STAGE_REAL_AVX2_BLOCK2
#define ELPA_2STAGE_REAL_AVX2_BLOCK4 ELPA_C_2STAGE_REAL_AVX2_BLOCK4
#define ELPA_2STAGE_REAL_AVX2_BLOCK6 ELPA_C_2STAGE_REAL_AVX2_BLOCK6
#define ELPA_2STAGE_REAL_AVX512_BLOCK2 ELPA_C_2STAGE_REAL_AVX512_BLOCK2
#define ELPA_2STAGE_REAL_AVX512_BLOCK4 ELPA_C_2STAGE_REAL_AVX512_BLOCK4
#define ELPA_2STAGE_REAL_AVX512_BLOCK6 ELPA_C_2STAGE_REAL_AVX512_BLOCK6
#define ELPA_2STAGE_REAL_GPU ELPA_C_2STAGE_REAL_GPU
#define ELPA_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_DEFAULT
#define ELPA_2STAGE_NUMBER_OF_REAL_KERNELS ELPA_C_2STAGE_NUMBER_OF_REAL_KERNELS
#define ELPA_2STAGE_COMPLEX_GENERIC ELPA_C_2STAGE_COMPLEX_GENERIC
#define ELPA_2STAGE_COMPLEX_GENERIC_SIMPLE ELPA_C_2STAGE_COMPLEX_GENERIC_SIMPLE
#define ELPA_2STAGE_COMPLEX_BGP ELPA_C_2STAGE_COMPLEX_BGP
#define ELPA_2STAGE_COMPLEX_BGQ ELPA_C_2STAGE_COMPLEX_BGQ
#define ELPA_2STAGE_COMPLEX_SSE ELPA_C_2STAGE_COMPLEX_SSE
#define ELPA_2STAGE_COMPLEX_SSE_BLOCK1 ELPA_C_2STAGE_COMPLEX_SSE_BLOCK1
#define ELPA_2STAGE_COMPLEX_SSE_BLOCK2 ELPA_C_2STAGE_COMPLEX_SSE_BLOCK2
#define ELPA_2STAGE_COMPLEX_AVX_BLOCK1 ELPA_C_2STAGE_COMPLEX_AVX_BLOCK1
#define ELPA_2STAGE_COMPLEX_AVX_BLOCK2 ELPA_C_2STAGE_COMPLEX_AVX_BLOCK2
#define ELPA_2STAGE_COMPLEX_AVX2_BLOCK1 ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK1
#define ELPA_2STAGE_COMPLEX_AVX2_BLOCK2 ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK2
#define ELPA_2STAGE_COMPLEX_AVX512_BLOCK1 ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK1
#define ELPA_2STAGE_COMPLEX_AVX512_BLOCK2 ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK2
#define ELPA_2STAGE_COMPLEX_GPU ELPA_C_2STAGE_COMPLEX_GPU
#define ELPA_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_DEFAULT
#define ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS ELPA_C_2STAGE_NUMBER_OF_COMPLEX_KERNELS
#endif
#define ELPA_C_SOLVER_1STAGE 1
#define ELPA_C_SOLVER_2STAGE 2
#define ELPA_C_NUMBER_OF_SOLVERS 2
#ifdef ELPA_H
#define ELPA_SOLVER_1STAGE ELPA_C_SOLVER_1STAGE
#define ELPA_SOLVER_2STAGE ELPA_C_SOLVER_2STAGE
#define ELPA_NUMBER_OF_SOLVERS ELPA_C_NUMBER_OF_SOLVERS
#endif
...@@ -110,202 +110,28 @@ module ELPA2_utilities ...@@ -110,202 +110,28 @@ module ELPA2_utilities
public :: qr_decomposition_via_environment_variable public :: qr_decomposition_via_environment_variable
integer, parameter :: number_of_real_kernels = ELPA2_NUMBER_OF_REAL_KERNELS integer(kind=ik), parameter :: number_of_real_kernels = ELPA_C_2STAGE_NUMBER_OF_REAL_KERNELS
integer, parameter :: REAL_ELPA_KERNEL_GENERIC = ELPA2_REAL_KERNEL_GENERIC integer(kind=ik), parameter :: REAL_ELPA_KERNEL_GENERIC = ELPA_C_2STAGE_REAL_GENERIC
integer, parameter :: REAL_ELPA_KERNEL_GENERIC_SIMPLE = ELPA2_REAL_KERNEL_GENERIC_SIMPLE integer(kind=ik), parameter :: REAL_ELPA_KERNEL_GENERIC_SIMPLE = ELPA_C_2STAGE_REAL_GENERIC_SIMPLE
integer, parameter :: REAL_ELPA_KERNEL_BGP = ELPA2_REAL_KERNEL_BGP integer(kind=ik), parameter :: REAL_ELPA_KERNEL_BGP = ELPA_C_2STAGE_REAL_BGP
integer, parameter :: REAL_ELPA_KERNEL_BGQ = ELPA2_REAL_KERNEL_BGQ integer(kind=ik), parameter :: REAL_ELPA_KERNEL_BGQ = ELPA_C_2STAGE_REAL_BGQ
integer, parameter :: REAL_ELPA_KERNEL_SSE = ELPA2_REAL_KERNEL_SSE integer(kind=ik), parameter :: REAL_ELPA_KERNEL_SSE = ELPA_C_2STAGE_REAL_SSE
integer, parameter :: REAL_ELPA_KERNEL_SSE_BLOCK2 = ELPA2_REAL_KERNEL_SSE_BLOCK2 integer(kind=ik), parameter :: REAL_ELPA_KERNEL_SSE_BLOCK2 = ELPA_C_2STAGE_REAL_SSE_BLOCK2
integer, parameter :: REAL_ELPA_KERNEL_SSE_BLOCK4 = ELPA2_REAL_KERNEL_SSE_BLOCK4 integer(kind=ik), parameter :: REAL_ELPA_KERNEL_SSE_BLOCK4 = ELPA_C_2STAGE_REAL_SSE_BLOCK4
integer, parameter :: REAL_ELPA_KERNEL_SSE_BLOCK6 = ELPA2_REAL_KERNEL_SSE_BLOCK6 integer(kind=ik), parameter :: REAL_ELPA_KERNEL_SSE_BLOCK6 = ELPA_C_2STAGE_REAL_SSE_BLOCK6
integer, parameter :: REAL_ELPA_KERNEL_AVX_BLOCK2 = ELPA2_REAL_KERNEL_AVX_BLOCK2 integer(kind=ik), parameter :: REAL_ELPA_KERNEL_AVX_BLOCK2 = ELPA_C_2STAGE_REAL_AVX_BLOCK2
integer, parameter :: REAL_ELPA_KERNEL_AVX_BLOCK4 = ELPA2_REAL_KERNEL_AVX_BLOCK4 integer(kind=ik), parameter :: REAL_ELPA_KERNEL_AVX_BLOCK4 = ELPA_C_2STAGE_REAL_AVX_BLOCK4
integer, parameter :: REAL_ELPA_KERNEL_AVX_BLOCK6 = ELPA2_REAL_KERNEL_AVX_BLOCK6 integer(kind=ik), parameter :: REAL_ELPA_KERNEL_AVX_BLOCK6 = ELPA_C_2STAGE_REAL_AVX_BLOCK6
integer, parameter :: REAL_ELPA_KERNEL_AVX2_BLOCK2 = ELPA2_REAL_KERNEL_AVX2_BLOCK2 integer(kind=ik), parameter :: REAL_ELPA_KERNEL_AVX2_BLOCK2 = ELPA_C_2STAGE_REAL_AVX2_BLOCK2
integer, parameter :: REAL_ELPA_KERNEL_AVX2_BLOCK4 = ELPA2_REAL_KERNEL_AVX2_BLOCK4 integer(kind=ik), parameter :: REAL_ELPA_KERNEL_AVX2_BLOCK4 = ELPA_C_2STAGE_REAL_AVX2_BLOCK4
integer, parameter :: REAL_ELPA_KERNEL_AVX2_BLOCK6 = ELPA2_REAL_KERNEL_AVX2_BLOCK6 integer(kind=ik), parameter :: REAL_ELPA_KERNEL_AVX2_BLOCK6 = ELPA_C_2STAGE_REAL_AVX2_BLOCK6
integer, parameter :: REAL_ELPA_KERNEL_AVX512_BLOCK2 = ELPA2_REAL_KERNEL_AVX512_BLOCK2 integer(kind=ik), parameter :: REAL_ELPA_KERNEL_AVX512_BLOCK2 = ELPA_C_2STAGE_REAL_AVX512_BLOCK2
integer, parameter :: REAL_ELPA_KERNEL_AVX512_BLOCK4 = ELPA2_REAL_KERNEL_AVX512_BLOCK4 integer(kind=ik), parameter :: REAL_ELPA_KERNEL_AVX512_BLOCK4 = ELPA_C_2STAGE_REAL_AVX512_BLOCK4
integer, parameter :: REAL_ELPA_KERNEL_AVX512_BLOCK6 = ELPA2_REAL_KERNEL_AVX512_BLOCK6 integer(kind=ik), parameter :: REAL_ELPA_KERNEL_AVX512_BLOCK6 = ELPA_C_2STAGE_REAL_AVX512_BLOCK6
integer(kind=ik), parameter :: REAL_ELPA_KERNEL_GPU = ELPA_C_2STAGE_REAL_GPU
integer(kind=ik), parameter :: REAL_ELPA_KERNEL_GPU = ELPA2_REAL_KERNEL_GPU integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = ELPA_C_2STAGE_REAL_DEFAULT
#if defined(WITH_REAL_AVX_BLOCK2_KERNEL)
#ifndef WITH_ONE_SPECIFIC_REAL_KERNEL
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_GENERIC
#else /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#ifdef WITH_REAL_GENERIC_KERNEL
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_GENERIC
#endif
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_GENERIC_SIMPLE
#endif
#ifdef WITH_REAL_SSE_ASSEMBLY_KERNEL
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_SSE
#endif
#if defined(WITH_REAL_SSE_BLOCK2_KERNEL) || defined(WITH_REAL_SSE_BLOCK4_KERNEL) || defined(WITH_REAL_SSE_BLOCK6_KERNEL)
#ifdef WITH_REAL_SSE_BLOCK6_KERNEL
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_SSE_BLOCK6
#else
#ifdef WITH_REAL_SSE_BLOCK4_KERNEL
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_SSE_BLOCK4
#else
#ifdef WITH_REAL_SSE_BLOCK2_KERNEL
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_SSE_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_SSE_BLOCK2_KERNEL) || defined(WITH_REAL_SSE_BLOCK4_KERNEL) || defined(WITH_REAL_SSE_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX_BLOCK6_KERNEL
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BLOCK6
#else
#ifdef WITH_REAL_AVX_BLOCK4_KERNEL
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BLOCK4
#else
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX2_BLOCK2_KERNEL) || defined(WITH_REAL_AVX2_BLOCK4_KERNEL) || defined(WITH_REAL_AVX2_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX2_BLOCK6_KERNEL
integer, parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX2_BLOCK6
#else
#ifdef WITH_REAL_AVX2_BLOCK4_KERNEL
integer, parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX2_BLOCK4
#else
#ifdef WITH_REAL_AVX2_BLOCK2_KERNEL
integer, parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX2_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX2_BLOCK2_KERNEL) || defined(WITH_REAL_AVX2_BLOCK4_KERNEL) || defined(WITH_REAL_AVX2_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX512_BLOCK2_KERNEL) || defined(WITH_REAL_AVX512_BLOCK4_KERNEL) || defined(WITH_REAL_AVX512_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX512_BLOCK6_KERNEL
integer, parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX512_BLOCK6
#else
#ifdef WITH_REAL_AVX512_BLOCK4_KERNEL
integer, parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX512_BLOCK4
#else
#ifdef WITH_REAL_AVX512_BLOCK2_KERNEL
integer, parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX512_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX512_BLOCK2_KERNEL) || defined(WITH_REAL_AVX512_BLOCK4_KERNEL) || defined(WITH_REAL_AVX512_BLOCK6_KERNEL) */
#ifdef WITH_REAL_BGP_KERNEL
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BGP
#endif
#ifdef WITH_REAL_BGQ_KERNEL
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_AVX_BGQ
#endif
#ifdef WITH_GPU_VERSION
integer(kind=ik), parameter :: DEFAULT_REAL_ELPA_KERNEL = REAL_ELPA_KERNEL_GPU
#endif
#endif /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#else /* WITH_REAL_AVX_BLOCK2_KERNEL */