Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
elpa
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
11
Issues
11
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Environments
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
elpa
elpa
Commits
460b754c
Commit
460b754c
authored
Apr 28, 2017
by
Lorenz Huedepohl
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Work-In-Progress commit for new ELPA interface
parent
dbe258d9
Changes
26
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
26 changed files
with
1201 additions
and
2288 deletions
+1201
-2288
Makefile.am
Makefile.am
+5
-5
configure.ac
configure.ac
+444
-798
elpa/elpa_constants.h
elpa/elpa_constants.h
+0
-15
elpa/elpa_constants.h.in
elpa/elpa_constants.h.in
+87
-0
elpa/elpa_kernel_constants.h
elpa/elpa_kernel_constants.h
+0
-386
elpa/elpa_solver_constants.h
elpa/elpa_solver_constants.h
+0
-11
generated_headers.am
generated_headers.am
+7
-1
m4/ax_elpa_gpu_version_only.m4
m4/ax_elpa_gpu_version_only.m4
+0
-48
m4/ax_elpa_specific_kernels.m4
m4/ax_elpa_specific_kernels.m4
+0
-354
m4/ax_prog_cxx_mpi.m4
m4/ax_prog_cxx_mpi.m4
+0
-173
src/elpa1/legacy_interface/elpa_cholesky_template_legacy.X90
src/elpa1/legacy_interface/elpa_cholesky_template_legacy.X90
+16
-8
src/elpa1/legacy_interface/elpa_invert_trm_legacy.X90
src/elpa1/legacy_interface/elpa_invert_trm_legacy.X90
+20
-12
src/elpa1/legacy_interface/elpa_multiply_a_b_legacy.X90
src/elpa1/legacy_interface/elpa_multiply_a_b_legacy.X90
+16
-10
src/elpa2/kernels/elpa2_kernels_complex_sse_1hv_double_precision.c
.../kernels/elpa2_kernels_complex_sse_1hv_double_precision.c
+1
-1
src/elpa2/legacy_interface/elpa2_utilities_legacy.F90
src/elpa2/legacy_interface/elpa2_utilities_legacy.F90
+38
-40
src/elpa2/legacy_interface/elpa2_utilities_private_legacy.F90
...elpa2/legacy_interface/elpa2_utilities_private_legacy.F90
+0
-1
src/elpa_constants.F90
src/elpa_constants.F90
+6
-0
src/elpa_constants_private.h
src/elpa_constants_private.h
+5
-0
src/elpa_index.c
src/elpa_index.c
+174
-12
src/elpa_index.h
src/elpa_index.h
+110
-18
src/elpa_options.c
src/elpa_options.c
+0
-147
src/elpa_t.F90
src/elpa_t.F90
+226
-244
src/fortran_constants.h
src/fortran_constants.h
+25
-0
src/general/elpa_utilities.F90
src/general/elpa_utilities.F90
+0
-1
src/helpers/mod_precision.F90
src/helpers/mod_precision.F90
+1
-1
test/Fortran/test_new_interface_real_1stage.F90
test/Fortran/test_new_interface_real_1stage.F90
+20
-2
No files found.
Makefile.am
View file @
460b754c
...
@@ -20,6 +20,7 @@ libelpa@SUFFIX@_public_la_SOURCES = \
...
@@ -20,6 +20,7 @@ libelpa@SUFFIX@_public_la_SOURCES = \
src/elpa1/legacy_interface/elpa1_auxiliary_legacy.F90
\
src/elpa1/legacy_interface/elpa1_auxiliary_legacy.F90
\
src/elpa1/elpa1_auxiliary.F90
\
src/elpa1/elpa1_auxiliary.F90
\
src/elpa_t.F90
\
src/elpa_t.F90
\
src/elpa_constants.F90
\
src/general/elpa_utilities.F90
src/general/elpa_utilities.F90
# internal parts
# internal parts
...
@@ -53,8 +54,8 @@ libelpa@SUFFIX@_private_la_SOURCES = \
...
@@ -53,8 +54,8 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/elpa2/qr/elpa_pdgeqrf.F90
\
src/elpa2/qr/elpa_pdgeqrf.F90
\
src/elpa1/elpa1.F90
\
src/elpa1/elpa1.F90
\
src/elpa2/elpa2.F90
\
src/elpa2/elpa2.F90
\
src/elpa_
index
.c
\
src/elpa_
constants
.c
\
src/elpa_
options
.c
src/elpa_
index
.c
EXTRA_libelpa@SUFFIX@
_private_la_DEPENDENCIES
=
\
EXTRA_libelpa@SUFFIX@
_private_la_DEPENDENCIES
=
\
src/elpa1/elpa_reduce_add_vectors.X90
\
src/elpa1/elpa_reduce_add_vectors.X90
\
...
@@ -322,10 +323,9 @@ endif
...
@@ -322,10 +323,9 @@ endif
include
generated_headers.am
include
generated_headers.am
BUILT_SOURCES
=
$(generated_headers)
BUILT_SOURCES
=
$(generated_headers)
# install public Fortran modules files in the include/ dir
# install public
headers and
Fortran modules files in the include/ dir
elpa_includedir
=
$(includedir)
/elpa@SUFFIX@-@PACKAGE_VERSION@
elpa_includedir
=
$(includedir)
/elpa@SUFFIX@-@PACKAGE_VERSION@
nobase_elpa_include_HEADERS
=
$(
wildcard
modules/
*
)
nobase_elpa_include_HEADERS
=
$(
wildcard
modules/
*
)
$(
wildcard
elpa/
*
)
nobase_elpa_include_HEADERS
+=
elpa/elpa.h elpa/elpa_kernel_constants.h elpa/elpa_solver_constants.h elpa/elpa_constants.h elpa/elpa_generated.h
dist_man_MANS
=
\
dist_man_MANS
=
\
man/solve_evp_real.3
\
man/solve_evp_real.3
\
...
...
configure.ac
View file @
460b754c
This diff is collapsed.
Click to expand it.
elpa/elpa_constants.h
deleted
100644 → 0
View file @
dbe258d9
#define ELPA_INVALID_INT INT_MIN
#define ELPA_C_ERROR 0
#define ELPA_C_OK 1
#ifdef ELPA_H
#define ELPA_ERROR ELPA_C_ERROR
#define ELPA_OK ELPA_C_OK
#endif
#include <elpa/elpa_kernel_constants.h>
#include <elpa/elpa_solver_constants.h>
elpa/elpa_constants.h.in
0 → 100644
View file @
460b754c
#pragma once
/* This might seem over-engineered, but helps to re-use this file also on the
* Fortran side and thus to keep the definitions in this one place here
*/
/* Private helper macros */
#define ELPA_ENUM_ENTRY(name, value) \
name = value,
#define ELPA_ENUM_SUM(name, value) +1
/* Solver constants */
#define ELPA_SOLVER_MAP(X) \
X(ELPA_SOLVER_1STAGE, 1) \
X(ELPA_SOLVER_2STAGE, 2)
enum ELPA_SOLVERS {
ELPA_SOLVER_MAP(ELPA_ENUM_ENTRY)
};
#define ELPA_NUMBER_OF_SOLVERS (0 ELPA_SOLVER_MAP(ELPA_ENUM_SUM))
/* Kernel constants */
#define ELPA_REAL_KERNEL_MAP(X) \
X(ELPA_2STAGE_REAL_GENERIC, 1) \
X(ELPA_2STAGE_REAL_GENERIC_SIMPLE, 2) \
X(ELPA_2STAGE_REAL_BGP, 3) \
X(ELPA_2STAGE_REAL_BGQ, 4) \
X(ELPA_2STAGE_REAL_SSE, 5) \
X(ELPA_2STAGE_REAL_SSE_BLOCK2, 6) \
X(ELPA_2STAGE_REAL_SSE_BLOCK4, 7) \
X(ELPA_2STAGE_REAL_SSE_BLOCK6, 8) \
X(ELPA_2STAGE_REAL_AVX_BLOCK2, 9) \
X(ELPA_2STAGE_REAL_AVX_BLOCK4, 10) \
X(ELPA_2STAGE_REAL_AVX_BLOCK6, 11) \
X(ELPA_2STAGE_REAL_AVX2_BLOCK2, 12) \
X(ELPA_2STAGE_REAL_AVX2_BLOCK4, 13) \
X(ELPA_2STAGE_REAL_AVX2_BLOCK6, 14) \
X(ELPA_2STAGE_REAL_AVX512_BLOCK2, 15) \
X(ELPA_2STAGE_REAL_AVX512_BLOCK4, 16) \
X(ELPA_2STAGE_REAL_AVX512_BLOCK6, 17) \
X(ELPA_2STAGE_REAL_GPU, 18) \
X(ELPA_2STAGE_REAL_DEFAULT, @ELPA_2STAGE_REAL_DEFAULT@)
enum ELPA_REAL_KERNELS {
ELPA_REAL_KERNEL_MAP(ELPA_ENUM_ENTRY)
};
#define ELPA_COMPLEX_KERNEL_MAP(X) \
X(ELPA_2STAGE_COMPLEX_GENERIC, 1) \
X(ELPA_2STAGE_COMPLEX_GENERIC_SIMPLE, 2) \
X(ELPA_2STAGE_COMPLEX_BGP, 3) \
X(ELPA_2STAGE_COMPLEX_BGQ, 4) \
X(ELPA_2STAGE_COMPLEX_SSE, 5) \
X(ELPA_2STAGE_COMPLEX_SSE_BLOCK1, 6) \
X(ELPA_2STAGE_COMPLEX_SSE_BLOCK2, 7) \
X(ELPA_2STAGE_COMPLEX_AVX_BLOCK1, 8) \
X(ELPA_2STAGE_COMPLEX_AVX_BLOCK2, 9) \
X(ELPA_2STAGE_COMPLEX_AVX2_BLOCK1, 10) \
X(ELPA_2STAGE_COMPLEX_AVX2_BLOCK2, 11) \
X(ELPA_2STAGE_COMPLEX_AVX512_BLOCK1, 12) \
X(ELPA_2STAGE_COMPLEX_AVX512_BLOCK2, 13) \
X(ELPA_2STAGE_COMPLEX_GPU, 14) \
X(ELPA_2STAGE_COMPLEX_DEFAULT, @ELPA_2STAGE_COMPLEX_DEFAULT@)
enum ELPA_COMPLEX_KERNELS {
ELPA_COMPLEX_KERNEL_MAP(ELPA_ENUM_ENTRY)
};
/* General constants */
#define ELPA_CONSTANTS_MAP(X) \
X(ELPA_ERROR, -1) \
X(ELPA_NO, 0) \
X(ELPA_OK, 1)
enum ELPA_CONSTANTS {
ELPA_CONSTANTS_MAP(ELPA_ENUM_ENTRY)
/* -1 to take out the default kernel again */
ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS = (0 ELPA_COMPLEX_KERNEL_MAP(ELPA_ENUM_SUM) -1),
ELPA_2STAGE_NUMBER_OF_REAL_KERNELS = (0 ELPA_REAL_KERNEL_MAP(ELPA_ENUM_SUM) -1),
};
elpa/elpa_kernel_constants.h
deleted
100644 → 0
View file @
dbe258d9
#define ELPA_C_2STAGE_REAL_GENERIC 1
#define ELPA_C_2STAGE_REAL_GENERIC_SIMPLE 2
#define ELPA_C_2STAGE_REAL_BGP 3
#define ELPA_C_2STAGE_REAL_BGQ 4
#define ELPA_C_2STAGE_REAL_SSE 5
#define ELPA_C_2STAGE_REAL_SSE_BLOCK2 6
#define ELPA_C_2STAGE_REAL_SSE_BLOCK4 7
#define ELPA_C_2STAGE_REAL_SSE_BLOCK6 8
#define ELPA_C_2STAGE_REAL_AVX_BLOCK2 9
#define ELPA_C_2STAGE_REAL_AVX_BLOCK4 10
#define ELPA_C_2STAGE_REAL_AVX_BLOCK6 11
#define ELPA_C_2STAGE_REAL_AVX2_BLOCK2 12
#define ELPA_C_2STAGE_REAL_AVX2_BLOCK4 13
#define ELPA_C_2STAGE_REAL_AVX2_BLOCK6 14
#define ELPA_C_2STAGE_REAL_AVX512_BLOCK2 15
#define ELPA_C_2STAGE_REAL_AVX512_BLOCK4 16
#define ELPA_C_2STAGE_REAL_AVX512_BLOCK6 17
#define ELPA_C_2STAGE_REAL_GPU 18
#define ELPA_C_2STAGE_NUMBER_OF_REAL_KERNELS 18
#define ELPA_C_2STAGE_COMPLEX_GENERIC 1
#define ELPA_C_2STAGE_COMPLEX_GENERIC_SIMPLE 2
#define ELPA_C_2STAGE_COMPLEX_BGP 3
#define ELPA_C_2STAGE_COMPLEX_BGQ 4
#define ELPA_C_2STAGE_COMPLEX_SSE 5
#define ELPA_C_2STAGE_COMPLEX_SSE_BLOCK1 6
#define ELPA_C_2STAGE_COMPLEX_SSE_BLOCK2 7
#define ELPA_C_2STAGE_COMPLEX_AVX_BLOCK1 8
#define ELPA_C_2STAGE_COMPLEX_AVX_BLOCK2 9
#define ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK1 10
#define ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK2 11
#define ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK1 12
#define ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK2 13
#define ELPA_C_2STAGE_COMPLEX_GPU 14
#define ELPA_C_2STAGE_NUMBER_OF_COMPLEX_KERNELS 14
#if defined(WITH_REAL_AVX_BLOCK2_KERNEL)
#ifndef WITH_ONE_SPECIFIC_REAL_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GENERIC
#else
/* WITH_ONE_SPECIFIC_REAL_KERNEL */
#ifdef WITH_REAL_GENERIC_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GENERIC
#endif
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GENERIC_SIMPLE
#endif
#ifdef WITH_REAL_SSE_ASSEMBLY_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE
#endif
#if defined(WITH_REAL_SSE_BLOCK2_KERNEL) || defined(WITH_REAL_SSE_BLOCK4_KERNEL) || defined(WITH_REAL_SSE_BLOCK6_KERNEL)
#ifdef WITH_REAL_SSE_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE_BLOCK6
#else
#ifdef WITH_REAL_SSE_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE_BLOCK4
#else
#ifdef WITH_REAL_SSE_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE_BLOCK2
#endif
#endif
#endif
#endif
/* #if defined(WITH_REAL_SSE_BLOCK2_KERNEL) || defined(WITH_REAL_SSE_BLOCK4_KERNEL) || defined(WITH_REAL_SSE_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BLOCK6
#else
#ifdef WITH_REAL_AVX_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BLOCK4
#else
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BLOCK2
#endif
#endif
#endif
#endif
/* #if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX2_BLOCK2_KERNEL) || defined(WITH_REAL_AVX2_BLOCK4_KERNEL) || defined(WITH_REAL_AVX2_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX2_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX2_BLOCK6
#else
#ifdef WITH_REAL_AVX2_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX2_BLOCK4
#else
#ifdef WITH_REAL_AVX2_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX2_BLOCK2
#endif
#endif
#endif
#endif
/* #if defined(WITH_REAL_AVX2_BLOCK2_KERNEL) || defined(WITH_REAL_AVX2_BLOCK4_KERNEL) || defined(WITH_REAL_AVX2_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX512_BLOCK2_KERNEL) || defined(WITH_REAL_AVX512_BLOCK4_KERNEL) || defined(WITH_REAL_AVX512_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX512_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX512_BLOCK6
#else
#ifdef WITH_REAL_AVX512_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX512_BLOCK4
#else
#ifdef WITH_REAL_AVX512_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX512_BLOCK2
#endif
#endif
#endif
#endif
/* #if defined(WITH_REAL_AVX512_BLOCK2_KERNEL) || defined(WITH_REAL_AVX512_BLOCK4_KERNEL) || defined(WITH_REAL_AVX512_BLOCK6_KERNEL) */
#ifdef WITH_REAL_BGP_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BGP
#endif
#ifdef WITH_REAL_BGQ_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BGQ
#endif
#ifdef WITH_GPU_VERSION
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GPU
#endif
#endif
/* WITH_ONE_SPECIFIC_REAL_KERNEL */
#else
/* WITH_REAL_AVX_BLOCK2_KERNEL */
#ifndef WITH_ONE_SPECIFIC_REAL_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GENERIC
#else
/* WITH_ONE_SPECIFIC_REAL_KERNEL */
#ifdef WITH_REAL_GENERIC_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GENERIC
#endif
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GENERIC_SIMPLE
#endif
#ifdef WITH_REAL_SSE_ASSEMBLY_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE
#endif
#if defined(WITH_REAL_SSE_BLOCK2_KERNEL) || defined(WITH_REAL_SSE_BLOCK4_KERNEL) || defined(WITH_REAL_SSE_BLOCK6_KERNEL)
#ifdef WITH_REAL_SSE_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE_BLOCK6
#else
#ifdef WITH_REAL_SSE_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE_BLOCK4
#else
#ifdef WITH_REAL_SSE_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_SSE_BLOCK2
#endif
#endif
#endif
#endif
/* #if defined(WITH_REAL_SSE_BLOCK2_KERNEL) || defined(WITH_REAL_SSE_BLOCK4_KERNEL) || defined(WITH_REAL_SSE_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BLOCK6
#else
#ifdef WITH_REAL_AVX_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BLOCK4
#else
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BLOCK2
#endif
#endif
#endif
#endif
/* #if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX2_BLOCK2_KERNEL) || defined(WITH_REAL_AVX2_BLOCK4_KERNEL) || defined(WITH_REAL_AVX2_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX2_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX2_BLOCK6
#else
#ifdef WITH_REAL_AVX2_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX2_BLOCK4
#else
#ifdef WITH_REAL_AVX2_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX2_BLOCK2
#endif
#endif
#endif
#endif
/* #if defined(WITH_REAL_AVX2_BLOCK2_KERNEL) || defined(WITH_REAL_AVX2_BLOCK4_KERNEL) || defined(WITH_REAL_AVX2_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX512_BLOCK2_KERNEL) || defined(WITH_REAL_AVX512_BLOCK4_KERNEL) || defined(WITH_REAL_AVX512_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX512_BLOCK6_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX512_BLOCK6
#else
#ifdef WITH_REAL_AVX512_BLOCK4_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX512_BLOCK4
#else
#ifdef WITH_REAL_AVX512_BLOCK2_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX512_BLOCK2
#endif
#endif
#endif
#endif
/* #if defined(WITH_REAL_AVX512_BLOCK2_KERNEL) || defined(WITH_REAL_AVX512_BLOCK4_KERNEL) || defined(WITH_REAL_AVX512_BLOCK6_KERNEL) */
#ifdef WITH_REAL_BGP_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BGP
#endif
#ifdef WITH_REAL_BGQ_KERNEL
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_AVX_BGQ
#endif
#ifdef WITH_GPU_VERSION
#define ELPA_C_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_GPU
#endif
#endif
/* WITH_ONE_SPECIFIC_REAL_KERNEL */
#endif
/* WITH_REAL_AVX_BLOCK2_KERNEL */
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL)
#ifndef WITH_ONE_SPECIFIC_COMPLEX_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GENERIC
#else
/* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#ifdef WITH_COMPLEX_GENERIC_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GENERIC
#endif
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GENERIC_SIMPLE
#endif
#ifdef WITH_COMPLEX_SSE_ASSEMBLY_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_SSE
#endif
#if defined(WITH_COMPLEX_SSE_BLOCK1_KERNEL) || defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_SSE_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_SSE_BLOCK2
#else
#ifdef WITH_COMPLEX_SSE_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_SSE_BLOCK1
#endif
#endif
#endif
/* defined(WITH_COMPLEXL_SSE_BLOCK1_KERNEL) || defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX_BLOCK1
#endif
#endif
#endif
/* defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX2_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX2_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK1
#endif
#endif
#endif
/* defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX512_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX512_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX512_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK1
#endif
#endif
#endif
/* defined(WITH_COMPLEX_AVX512_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL) */
#ifdef WITH_GPU_VERSION
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GPU
#endif
#endif
/* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#else
/* WITH_COMPLEX_AVX_BLOCK1_KERNEL */
#ifndef WITH_ONE_SPECIFIC_COMPLEX_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GENERIC
#else
/* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#ifdef WITH_COMPLEX_GENERIC_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GENERIC
#endif
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GENERIC_SIMPLE
#endif
#ifdef WITH_COMPLEX_SSE_ASSEMBLY_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_SSE
#endif
#if defined(WITH_COMPLEX_SSE_BLOCK1_KERNEL) || defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_SSE_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_SSE_BLOCK2
#else
#ifdef WITH_COMPLEX_SSE_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_SSE_BLOCK1
#endif
#endif
#endif
/* defined(WITH_COMPLEXL_SSE_BLOCK1_KERNEL) || defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX_BLOCK1
#endif
#endif
#endif
/* defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX2_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX2_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK1
#endif
#endif
#endif
/* defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX512_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX512_BLOCK2_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX512_BLOCK1_KERNEL
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK1
#endif
#endif
#endif
/* defined(WITH_COMPLEX_AVX512_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX512_BLOCK2_KERNEL) */
#ifdef WITH_GPU_VERSION
#define ELPA_C_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_GPU
#endif
#endif
/* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#endif
/* WITH_COMPLEX_AVX_BLOCK1_KERNEL */
#ifdef ELPA_H
#define ELPA_2STAGE_REAL_GENERIC ELPA_C_2STAGE_REAL_GENERIC
#define ELPA_2STAGE_REAL_GENERIC_SIMPLE ELPA_C_2STAGE_REAL_GENERIC_SIMPLE
#define ELPA_2STAGE_REAL_BGP ELPA_C_2STAGE_REAL_BGP
#define ELPA_2STAGE_REAL_BGQ ELPA_C_2STAGE_REAL_BGQ
#define ELPA_2STAGE_REAL_SSE ELPA_C_2STAGE_REAL_SSE
#define ELPA_2STAGE_REAL_SSE_BLOCK2 ELPA_C_2STAGE_REAL_SSE_BLOCK2
#define ELPA_2STAGE_REAL_SSE_BLOCK4 ELPA_C_2STAGE_REAL_SSE_BLOCK4
#define ELPA_2STAGE_REAL_SSE_BLOCK6 ELPA_C_2STAGE_REAL_SSE_BLOCK6
#define ELPA_2STAGE_REAL_AVX_BLOCK2 ELPA_C_2STAGE_REAL_AVX_BLOCK2
#define ELPA_2STAGE_REAL_AVX_BLOCK4 ELPA_C_2STAGE_REAL_AVX_BLOCK4
#define ELPA_2STAGE_REAL_AVX_BLOCK6 ELPA_C_2STAGE_REAL_AVX_BLOCK6
#define ELPA_2STAGE_REAL_AVX2_BLOCK2 ELPA_C_2STAGE_REAL_AVX2_BLOCK2
#define ELPA_2STAGE_REAL_AVX2_BLOCK4 ELPA_C_2STAGE_REAL_AVX2_BLOCK4
#define ELPA_2STAGE_REAL_AVX2_BLOCK6 ELPA_C_2STAGE_REAL_AVX2_BLOCK6
#define ELPA_2STAGE_REAL_AVX512_BLOCK2 ELPA_C_2STAGE_REAL_AVX512_BLOCK2
#define ELPA_2STAGE_REAL_AVX512_BLOCK4 ELPA_C_2STAGE_REAL_AVX512_BLOCK4
#define ELPA_2STAGE_REAL_AVX512_BLOCK6 ELPA_C_2STAGE_REAL_AVX512_BLOCK6
#define ELPA_2STAGE_REAL_GPU ELPA_C_2STAGE_REAL_GPU
#define ELPA_2STAGE_REAL_DEFAULT ELPA_C_2STAGE_REAL_DEFAULT
#define ELPA_2STAGE_NUMBER_OF_REAL_KERNELS ELPA_C_2STAGE_NUMBER_OF_REAL_KERNELS
#define ELPA_2STAGE_COMPLEX_GENERIC ELPA_C_2STAGE_COMPLEX_GENERIC
#define ELPA_2STAGE_COMPLEX_GENERIC_SIMPLE ELPA_C_2STAGE_COMPLEX_GENERIC_SIMPLE
#define ELPA_2STAGE_COMPLEX_BGP ELPA_C_2STAGE_COMPLEX_BGP
#define ELPA_2STAGE_COMPLEX_BGQ ELPA_C_2STAGE_COMPLEX_BGQ
#define ELPA_2STAGE_COMPLEX_SSE ELPA_C_2STAGE_COMPLEX_SSE
#define ELPA_2STAGE_COMPLEX_SSE_BLOCK1 ELPA_C_2STAGE_COMPLEX_SSE_BLOCK1
#define ELPA_2STAGE_COMPLEX_SSE_BLOCK2 ELPA_C_2STAGE_COMPLEX_SSE_BLOCK2
#define ELPA_2STAGE_COMPLEX_AVX_BLOCK1 ELPA_C_2STAGE_COMPLEX_AVX_BLOCK1
#define ELPA_2STAGE_COMPLEX_AVX_BLOCK2 ELPA_C_2STAGE_COMPLEX_AVX_BLOCK2
#define ELPA_2STAGE_COMPLEX_AVX2_BLOCK1 ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK1
#define ELPA_2STAGE_COMPLEX_AVX2_BLOCK2 ELPA_C_2STAGE_COMPLEX_AVX2_BLOCK2
#define ELPA_2STAGE_COMPLEX_AVX512_BLOCK1 ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK1
#define ELPA_2STAGE_COMPLEX_AVX512_BLOCK2 ELPA_C_2STAGE_COMPLEX_AVX512_BLOCK2
#define ELPA_2STAGE_COMPLEX_GPU ELPA_C_2STAGE_COMPLEX_GPU
#define ELPA_2STAGE_COMPLEX_DEFAULT ELPA_C_2STAGE_COMPLEX_DEFAULT
#define ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS ELPA_C_2STAGE_NUMBER_OF_COMPLEX_KERNELS
#endif
elpa/elpa_solver_constants.h
deleted
100644 → 0
View file @
dbe258d9
#define ELPA_C_SOLVER_1STAGE 1
#define ELPA_C_SOLVER_2STAGE 2
#define ELPA_C_NUMBER_OF_SOLVERS 2
#ifdef ELPA_H
#define ELPA_SOLVER_1STAGE ELPA_C_SOLVER_1STAGE
#define ELPA_SOLVER_2STAGE ELPA_C_SOLVER_2STAGE
#define ELPA_NUMBER_OF_SOLVERS ELPA_C_NUMBER_OF_SOLVERS
#endif
generated_headers.am
View file @
460b754c
...
@@ -28,7 +28,13 @@ elpa/elpa_generated_fortran_interfaces.h: $(wildcard $(top_srcdir)/src/elpa2/ker
...
@@ -28,7 +28,13 @@ elpa/elpa_generated_fortran_interfaces.h: $(wildcard $(top_srcdir)/src/elpa2/ker
$(
call
extract_interface,!f>
)
$(
call
extract_interface,!f>
)
$(
call
extract_interface,#!f>
)
$(
call
extract_interface,#!f>
)
generated_headers
=
config-f90.h elpa/elpa_generated.h
test
/shared/generated.h elpa/elpa_generated_fortran_interfaces.h
src/fortran_constants.X90
:
$(top_srcdir)/src/fortran_constants.h
$(CPP)
$(CPPFLAGS)
-I
$(top_builddir)
/
-I
$(top_srcdir)
/
-I
.
$<
-o
$@
_
||
{
rm
-f
$@
;
exit
1
;
}
awk
'/!ELPA_C_DEFINE/ {gsub(/!ELPA_C_DEFINE/, "\n"); gsub(/NEWLINE/, "\n"); print;}'
<
$@
_
>
$@
||
{
rm
-f
$@
;
exit
1
;
}
rm
$@
_
generated_headers
=
config-f90.h elpa/elpa_generated.h
test
/shared/generated.h elpa/elpa_generated_fortran_interfaces.h src/fortran_constants.X90
generated-headers
:
$(generated_headers)
generated-headers
:
$(generated_headers)
...
...
m4/ax_elpa_gpu_version_only.m4
deleted
100644 → 0
View file @
dbe258d9
dnl macro for testing whether the user wanted to compile only with the GPU version
dnl usage: DEFINE_OPTION([gpu-support-only],[gpu-support],[with_gpu_support],[install_gpu])
AC_DEFUN([DEFINE_OPTION_GPU_SUPPORT_ONLY],[
AC_ARG_WITH([$1],
AS_HELP_STRING([--with-$1],
[only compile $2 ]),
[with_option=yes],[with_option=no])
if test x"${with_option}" = x"yes" ; then
dnl make sure that all the other kernels are unset
install_real_generic=no
install_real_generic_simple=no
install_real_sse=no
install_real_bgp=no
install_real_bgq=no
install_real_avx_block2=no
install_real_avx_block4=no
install_real_avx_block6=no
install_complex_generic=no
install_complex_generic_simple=no
install_complex_sse=no
install_complex_bgp=no
install_complex_bgq=no
install_complex_avx_block1=no
install_complex_avx_block2=no
install_gpu=yes
want_avx=no
build_with_gpu_support_only=yes
use_specific_complex_kernel=yes
use_specific_real_kernel=yes
dnl now set the specific kernel
$3=yes
AC_MSG_NOTICE([ELPA will be build only with $1])
else
build_with_gpu_support_only=no
fi
])
m4/ax_elpa_specific_kernels.m4
deleted
100644 → 0
View file @
dbe258d9
dnl macro for testing whether the user wanted to compile only with one
dnl specific real kernel
dnl usage: DEFINE_OPTION([real-generic-kernel-only],[generic-kernel],[with_real_generic_kernel],[install_real_generic])
AC_DEFUN([DEFINE_OPTION_SPECIFIC_REAL_KERNEL],[
AC_ARG_WITH([$1],
AS_HELP_STRING([--with-$1],
[only compile $2 for real case]),
[with_option=yes],[with_option=no])
if test x"${with_option}" = x"yes" ; then
if test x"${use_specific_real_kernel}" = x"no" ; then
dnl make sure that all the other kernels are unset
install_real_generic=no
install_real_generic_simple=no
install_real_sse_assembly=no
install_real_bgp=no
install_real_bgq=no
install_real_sse_block2=no
install_real_sse_block4=no
install_real_sse_block6=no
install_real_avx_block2=no
install_real_avx_block4=no
install_real_avx_block6=no
install_real_avx2_block2=no
install_real_avx2_block4=no
install_real_avx2_block6=no
install_real_avx512_block2=no
install_real_avx512_block4=no
install_real_avx512_block6=no
want_sse=no
want_avx=no
want_avx2=no
want_avx512=no
install_gpu=no
use_specific_real_kernel=yes
dnl now set the specific kernel
$3=yes
dnl take care of some dependencies
if test x"${install_real_sse_block4}" = x"yes" ; then
AC_MSG_NOTICE([$1 set. Also sse_block2 is needed])
install_real_sse_block2=yes
fi
if test x"${install_real_avx_block4}" = x"yes" ; then
AC_MSG_NOTICE([$1 set. Also avx_block2 is needed])