elpa_index.c 66.1 KB
Newer Older
Andreas Marek's avatar
Andreas Marek committed
1
2
//    Copyright 2017, L. Hüdepohl and A. Marek, MPCDF
//
Andreas Marek's avatar
Andreas Marek committed
3
//    This file is part of ELPA.
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
//
//    The ELPA library was originally created by the ELPA consortium,
//    consisting of the following organizations:
//
//    - Max Planck Computing and Data Facility (MPCDF), formerly known as
//      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
//    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
//      Informatik,
//    - Technische Universität München, Lehrstuhl für Informatik mit
//      Schwerpunkt Wissenschaftliches Rechnen ,
//    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
//    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
//      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
//      and
//    - IBM Deutschland GmbH
//
//    This particular source code file contains additions, changes and
//    enhancements authored by Intel Corporation which is not part of
//    the ELPA consortium.
//
//    More information can be found here:
//    http://elpa.mpcdf.mpg.de/
//
//    ELPA is free software: you can redistribute it and/or modify
//    it under the terms of the version 3 of the license of the
//    GNU Lesser General Public License as published by the Free
//    Software Foundation.
//
//    ELPA is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU Lesser General Public License for more details.
//
//    You should have received a copy of the GNU Lesser General Public License
//    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
//
//    ELPA reflects a substantial effort on the part of the original
//    ELPA consortium, and we ask you to respect the spirit of the
//    license that we chose: i.e., please contribute any changes you
//    may have back to the original ELPA library distribution, and keep
//    any derivatives of ELPA under the same license that we chose for
//    the original distribution, the GNU Lesser General Public License.
//
//    Authors: L. Huedepohl and A. Marek, MPCDF
Pavel Kus's avatar
Pavel Kus committed
48
#include <assert.h>
Pavel Kus's avatar
Pavel Kus committed
49
50
#include <stdio.h>
#include <stdlib.h>
51
#include <elpa/elpa.h>
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
52
#include "elpa_index.h"
53

Andreas Marek's avatar
Andreas Marek committed
54
55
#include "config.h"

56
#ifdef WITH_OPENMP_TRADITIONAL
Andreas Marek's avatar
Andreas Marek committed
57
58
59
60
61
#include <omp.h>
#endif

int max_threads_glob;
int set_max_threads_glob=0;
Soheil Soltani's avatar
Soheil Soltani committed
62
int const default_max_stored_rows = 256;   
Andreas Marek's avatar
Andreas Marek committed
63

64
65
static int enumerate_identity(elpa_index_t index, int i);
static int cardinality_bool(elpa_index_t index);
66
67
static int valid_bool(elpa_index_t index, int n, int new_value);

68
69
70
71
72
static int number_of_matrix_layouts(elpa_index_t index);
static int matrix_layout_enumerate(elpa_index_t index, int i);
static int matrix_layout_is_valid(elpa_index_t index, int n, int new_value);
static const char* elpa_matrix_layout_name(int layout);

73
74
static int number_of_solvers(elpa_index_t index);
static int solver_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
75
static int solver_is_valid(elpa_index_t index, int n, int new_value);
76
77
static const char* elpa_solver_name(int solver);

78
79
static int number_of_real_kernels(elpa_index_t index);
static int real_kernel_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
80
81
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *real_kernel_name(int kernel);
82

83
84
static int number_of_complex_kernels(elpa_index_t index);
static int complex_kernel_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
85
86
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *complex_kernel_name(int kernel);
87

88
89
static int band_to_full_cardinality(elpa_index_t index);
static int band_to_full_enumerate(elpa_index_t index, int i);
90
91
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value);

92
93
94
95
static int stripewidth_real_cardinality(elpa_index_t index);
static int stripewidth_real_enumerate(elpa_index_t index, int i);
static int stripewidth_real_is_valid(elpa_index_t index, int n, int new_value);

96
97
98
99
static int internal_nblk_cardinality(elpa_index_t index);
static int internal_nblk_enumerate(elpa_index_t index, int i);
static int internal_nblk_is_valid(elpa_index_t index, int n, int new_value);

100
101
102
103
static int stripewidth_complex_cardinality(elpa_index_t index);
static int stripewidth_complex_enumerate(elpa_index_t index, int i);
static int stripewidth_complex_is_valid(elpa_index_t index, int n, int new_value);

Pavel Kus's avatar
Pavel Kus committed
104
105
106
static int omp_threads_cardinality(elpa_index_t index);
static int omp_threads_enumerate(elpa_index_t index, int i);
static int omp_threads_is_valid(elpa_index_t index, int n, int new_value);
Andreas Marek's avatar
Andreas Marek committed
107

108
109
110
111
static int max_stored_rows_cardinality(elpa_index_t index);
static int max_stored_rows_enumerate(elpa_index_t index, int i);
static int max_stored_rows_is_valid(elpa_index_t index, int n, int new_value);

112
static int min_tile_size_cardinality(elpa_index_t index);
113
114
115
static int min_tile_size_enumerate(elpa_index_t index, int i);
static int min_tile_size_is_valid(elpa_index_t index, int n, int new_value);

116
#ifdef WITH_NVIDIA_GPU_VERSION
117
118
119
120
int nvidia_gpu_count();
#endif
#ifdef WITH_AMD_GPU_VERSION
int amd_gpu_count();
121
#endif
122
123
124
#ifdef WITH_INTEL_GPU_VERSION
//missing function for GPU count
#endif
125
126
127
128
129

static int use_gpu_id_cardinality(elpa_index_t index);
static int use_gpu_id_enumerate(elpa_index_t index, int i);
static int use_gpu_id_is_valid(elpa_index_t index, int n, int new_value);

130
131
132
static int valid_with_gpu(elpa_index_t index, int n, int new_value);
static int valid_with_gpu_elpa1(elpa_index_t index, int n, int new_value);
static int valid_with_gpu_elpa2(elpa_index_t index, int n, int new_value);
133

134
static int intermediate_bandwidth_cardinality(elpa_index_t index);
135
136
static int intermediate_bandwidth_enumerate(elpa_index_t index, int i);
static int intermediate_bandwidth_is_valid(elpa_index_t index, int n, int new_value);
137

138
139
140
141
static int cannon_buffer_size_cardinality(elpa_index_t index);
static int cannon_buffer_size_enumerate(elpa_index_t index, int i);
static int cannon_buffer_size_is_valid(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
142
static int na_is_valid(elpa_index_t index, int n, int new_value);
143
static int nev_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
144
static int bw_is_valid(elpa_index_t index, int n, int new_value);
145
static int output_build_config_is_valid(elpa_index_t index, int n, int new_value);
146
static int gpu_is_valid(elpa_index_t index, int n, int new_value);
Andreas Marek's avatar
Andreas Marek committed
147
static int verbose_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
148

149
150
static int is_positive(elpa_index_t index, int n, int new_value);

Wenzhe Yu's avatar
Wenzhe Yu committed
151
152
153
static int elpa_float_string_to_value(char *name, char *string, float *value);
static int elpa_float_value_to_string(char *name, float value, const char **string);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
154
155
static int elpa_double_string_to_value(char *name, char *string, double *value);
static int elpa_double_value_to_string(char *name, double value, const char **string);
156

Soheil Soltani's avatar
Soheil Soltani committed
157
         
Pavel Kus's avatar
Pavel Kus committed
158
#define BASE_ENTRY(option_name, option_description, once_value, readonly_value, print_flag_value) \
159
160
161
162
163
164
165
                .base = { \
                        .name = option_name, \
                        .description = option_description, \
                        .once = once_value, \
                        .readonly = readonly_value, \
                        .env_default = "ELPA_DEFAULT_" option_name, \
                        .env_force = "ELPA_FORCE_" option_name, \
Pavel Kus's avatar
Pavel Kus committed
166
                        .print_flag = print_flag_value, \
167
                }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
168

Pavel Kus's avatar
Pavel Kus committed
169
#define INT_PARAMETER_ENTRY(option_name, option_description, valid_func, print_flag) \
170
        { \
Pavel Kus's avatar
Pavel Kus committed
171
                BASE_ENTRY(option_name, option_description, 1, 0, print_flag), \
172
                .valid = valid_func, \
173
        }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
174

Pavel Kus's avatar
Pavel Kus committed
175
#define BOOL_ENTRY(option_name, option_description, default, tune_level, tune_domain, print_flag) \
176
        { \
Pavel Kus's avatar
Pavel Kus committed
177
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
178
                .default_value = default, \
179
180
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
181
182
183
                .cardinality = cardinality_bool, \
                .enumerate = enumerate_identity, \
                .valid = valid_bool, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
184
185
        }

Pavel Kus's avatar
Pavel Kus committed
186
#define INT_ENTRY(option_name, option_description, default, tune_level, tune_domain, card_func, enumerate_func, valid_func, to_string_func, print_flag) \
187
        { \
Pavel Kus's avatar
Pavel Kus committed
188
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
189
                .default_value = default, \
190
191
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
192
193
194
195
                .cardinality = card_func, \
                .enumerate = enumerate_func, \
                .valid = valid_func, \
                .to_string = to_string_func, \
196
197
        }

Pavel Kus's avatar
Pavel Kus committed
198
#define INT_ANY_ENTRY(option_name, option_description, print_flag) \
199
        { \
Pavel Kus's avatar
Pavel Kus committed
200
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
201
202
        }

203
204
/* The order here is important! Tunable options that are dependent on other
 * tunable options must appear later in the list than their prerequisites */
205
static const elpa_index_int_entry_t int_entries[] = {
Pavel Kus's avatar
Pavel Kus committed
206
207
208
209
210
211
212
213
214
215
216
        INT_PARAMETER_ENTRY("na", "Global matrix has size (na * na)", na_is_valid, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("nev", "Number of eigenvectors to be computed, 0 <= nev <= na", nev_is_valid, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("nblk", "Block size of scalapack block-cyclic distribution", is_positive, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("local_nrows", "Number of matrix rows stored on this process", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("local_ncols", "Number of matrix columns stored on this process", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("process_row", "Process row number in the 2D domain decomposition", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("process_col", "Process column number in the 2D domain decomposition", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("process_id", "Process rank", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("num_process_rows", "Number of process row number in the 2D domain decomposition", NULL, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("num_process_cols", "Number of process column number in the 2D domain decomposition", NULL, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("num_processes", "Total number of processes", NULL, PRINT_STRUCTURE),
217
        INT_PARAMETER_ENTRY("bandwidth", "If specified, a band matrix with this bandwidth is expected as input; bandwidth must be multiply of nblk and at least 2", bw_is_valid, PRINT_YES),
Pavel Kus's avatar
Pavel Kus committed
218
219
220
221
        INT_ANY_ENTRY("mpi_comm_rows", "Communicator for inter-row communication", PRINT_NO),
        INT_ANY_ENTRY("mpi_comm_cols", "Communicator for inter-column communication", PRINT_NO),
        INT_ANY_ENTRY("mpi_comm_parent", "Parent communicator", PRINT_NO),
        INT_ANY_ENTRY("blacs_context", "BLACS context", PRINT_NO),
Andreas Marek's avatar
Andreas Marek committed
222
223
        INT_ENTRY("verbose", "ELPA API prints verbose messages", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
                        cardinality_bool, enumerate_identity, verbose_is_valid, NULL, PRINT_YES),
224
//#ifdef REDISTRIBUTE_MATRIX
225
226
        INT_ENTRY("internal_nblk", "Internally used block size of scalapack block-cyclic distribution", 0, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
                   internal_nblk_cardinality, internal_nblk_enumerate, internal_nblk_is_valid, NULL, PRINT_YES),
227
//#endif
228
229
230
231
#ifdef STORE_BUILD_CONFIG
        INT_ENTRY("output_build_config", "Output the build config", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
                        cardinality_bool, enumerate_identity, output_build_config_is_valid, NULL, PRINT_NO),
#endif
232
233
	INT_ENTRY("matrix_order","Order of the matrix layout", COLUMN_MAJOR_ORDER, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
                         number_of_matrix_layouts, matrix_layout_enumerate, matrix_layout_is_valid, elpa_matrix_layout_name, PRINT_YES), \
234
        INT_ENTRY("solver", "Solver to use", ELPA_SOLVER_1STAGE, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
235
                        number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name, PRINT_YES),
236
237
        INT_ENTRY("gpu", "Use Nvidia GPU acceleration", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
                        cardinality_bool, enumerate_identity, gpu_is_valid, NULL, PRINT_YES),
238
239
        INT_ENTRY("nvidia-gpu", "Use Nvidia GPU acceleration", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
                        cardinality_bool, enumerate_identity, gpu_is_valid, NULL, PRINT_YES),
240
241
        INT_ENTRY("intel-gpu", "Use INTEL GPU acceleration", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
                        cardinality_bool, enumerate_identity, gpu_is_valid, NULL, PRINT_YES),
242
        INT_ENTRY("amd-gpu", "Use AMD GPU acceleration", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
243
                        cardinality_bool, enumerate_identity, gpu_is_valid, NULL, PRINT_YES),
244
245
        //default of gpu ussage for individual phases is 1. However, it is only evaluated, if GPU is used at all, which first has to be determined
        //by the parameter gpu and presence of the device
246
        INT_ENTRY("gpu_tridiag", "Use GPU acceleration for ELPA1 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
247
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa1, NULL, PRINT_YES),
248
        INT_ENTRY("gpu_solve_tridi", "Use GPU acceleration for ELPA solve tridi", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
249
                        cardinality_bool, enumerate_identity, valid_with_gpu, NULL, PRINT_YES),
250
        INT_ENTRY("gpu_trans_ev", "Use GPU acceleration for ELPA1 trans ev", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
251
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa1, NULL, PRINT_YES),
252
        INT_ENTRY("gpu_bandred", "Use GPU acceleration for ELPA2 band reduction", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
253
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
254
255
256
	//not yet ported to GPU
        //INT_ENTRY("gpu_tridiag_band", "Use GPU acceleration for ELPA2 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
        //                cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
257
258
        INT_ENTRY("gpu_trans_ev_tridi_to_band", "Use GPU acceleration for ELPA2 trans_ev_tridi_to_band", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
259
        INT_ENTRY("gpu_trans_ev_band_to_full", "Use GPU acceleration for ELPA2 trans_ev_band_to_full", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
260
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
261
262
	INT_ENTRY("use_gpu_id", "Calling MPI task will use this gpu id", -99, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
		  use_gpu_id_cardinality, use_gpu_id_enumerate, use_gpu_id_is_valid, NULL, PRINT_YES), 
263
        INT_ENTRY("real_kernel", "Real kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_REAL_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, \
Pavel Kus's avatar
Pavel Kus committed
264
                        number_of_real_kernels, real_kernel_enumerate, real_kernel_is_valid, real_kernel_name, PRINT_YES),
265
        INT_ENTRY("complex_kernel", "Complex kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_COMPLEX_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_COMPLEX, \
Pavel Kus's avatar
Pavel Kus committed
266
                        number_of_complex_kernels, complex_kernel_enumerate, complex_kernel_is_valid, complex_kernel_name, PRINT_YES),
267

268
        INT_ENTRY("min_tile_size", "Minimal tile size used internally in elpa1_tridiag and elpa2_bandred", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
269
                        min_tile_size_cardinality, min_tile_size_enumerate, min_tile_size_is_valid, NULL, PRINT_YES),
270
        INT_ENTRY("intermediate_bandwidth", "Specifies the intermediate bandwidth in ELPA2 full->banded step. Must be a multiple of nblk", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
271
                        intermediate_bandwidth_cardinality, intermediate_bandwidth_enumerate, intermediate_bandwidth_is_valid, NULL, PRINT_YES),
272

273
        INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
274
                        band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL, PRINT_YES),
275
        INT_ENTRY("stripewidth_real", "Stripewidth_real, default 48. Must be a multiple of 4", 48, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_REAL, \
276
                        stripewidth_real_cardinality, stripewidth_real_enumerate, stripewidth_real_is_valid, NULL, PRINT_YES),
277
        INT_ENTRY("stripewidth_complex", "Stripewidth_complex, default 96. Must be a multiple of 8", 96, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_COMPLEX, \
278
279
                        stripewidth_complex_cardinality, stripewidth_complex_enumerate, stripewidth_complex_is_valid, NULL, PRINT_YES),

Soheil Soltani's avatar
Soheil Soltani committed
280
        INT_ENTRY("max_stored_rows", "Maximum number of stored rows used in ELPA 1 backtransformation", default_max_stored_rows, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
281
                        max_stored_rows_cardinality, max_stored_rows_enumerate, max_stored_rows_is_valid, NULL, PRINT_YES),
282
#ifdef WITH_OPENMP_TRADITIONAL
283
        INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
284
                        omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
285
#else
286
        INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
287
                        omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
288
#endif
289
        INT_ENTRY("cannon_buffer_size", "Increasing the buffer size might make it faster, but costs memory", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
290
                        cannon_buffer_size_cardinality, cannon_buffer_size_enumerate, cannon_buffer_size_is_valid, NULL, PRINT_YES),
291
#if defined(THREADING_SUPPORT_CHECK) && defined(ALLOW_THREAD_LIMITING) && !defined(HAVE_SUFFICIENT_MPI_THREADING_SUPPORT)
292
        BOOL_ENTRY("limit_openmp_threads", "Limit the number if openmp threads to 1", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_NO),
293
#endif
Pavel Kus's avatar
Pavel Kus committed
294
295
296
297
        BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_REAL, PRINT_YES),
        BOOL_ENTRY("timings", "Enable time measurement", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("debug", "Emit verbose debugging messages", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("print_flops", "Print FLOP rates on task 0", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
298
        BOOL_ENTRY("measure_performance", "Also measure with flops (via papi) with the timings", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
Pavel Kus's avatar
Pavel Kus committed
299
        BOOL_ENTRY("check_pd", "Check eigenvalues to be positive", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
300
        BOOL_ENTRY("output_pinning_information", "Print the pinning information", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
Pavel Kus's avatar
Pavel Kus committed
301
        BOOL_ENTRY("cannon_for_generalized", "Whether to use Cannons algorithm for the generalized EVP", 1, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
302
303
};

Wenzhe Yu's avatar
Wenzhe Yu committed
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
#define READONLY_FLOAT_ENTRY(option_name, option_description) \
        { \
                BASE_ENTRY(option_name, option_description, 0, 1, 0) \
        }

#define FLOAT_ENTRY(option_name, option_description, default, print_flag) \
        { \
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
                .default_value = default, \
        }

static const elpa_index_float_entry_t float_entries[] = {
        FLOAT_ENTRY("thres_pd_single", "Threshold to define ill-conditioning, default 0.00001", 0.00001, PRINT_YES),
};

319
#define READONLY_DOUBLE_ENTRY(option_name, option_description) \
320
        { \
321
                BASE_ENTRY(option_name, option_description, 0, 1, 0) \
322
323
        }

324
325
326
327
328
329
#define DOUBLE_ENTRY(option_name, option_description, default, print_flag) \
        { \
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
                .default_value = default, \
        }

330
static const elpa_index_double_entry_t double_entries[] = {
Wenzhe Yu's avatar
Wenzhe Yu committed
331
        DOUBLE_ENTRY("thres_pd_double", "Threshold to define ill-conditioning, default 0.00001", 0.00001, PRINT_YES),
332
};
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
333

334
void elpa_index_free(elpa_index_t index) {
335
336
337
338
339
340
341
#define FREE_OPTION(TYPE, ...) \
        free(index->TYPE##_options.values); \
        free(index->TYPE##_options.is_set); \
        free(index->TYPE##_options.notified);

        FOR_ALL_TYPES(FREE_OPTION);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
342
343
344
        free(index);
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
345
346
347
static int compar(const void *a, const void *b) {
        return strcmp(((elpa_index_int_entry_t *) a)->base.name,
                      ((elpa_index_int_entry_t *) b)->base.name);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
348
349
}

350
351
352
#define IMPLEMENT_FIND_ENTRY(TYPE, ...) \
        static int find_##TYPE##_entry(char *name) { \
                elpa_index_##TYPE##_entry_t *entry; \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
353
                elpa_index_##TYPE##_entry_t key = { .base = {.name = name} } ; \
354
                size_t nmembers = nelements(TYPE##_entries); \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
355
                entry = lfind((const void*) &key, (const void *) TYPE##_entries, &nmembers, sizeof(elpa_index_##TYPE##_entry_t), compar); \
356
357
358
359
360
361
362
363
364
365
366
367
368
369
                if (entry) { \
                        return (entry - &TYPE##_entries[0]); \
                } else { \
                        return -1; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_FIND_ENTRY)


#define IMPLEMENT_GETENV(TYPE, PRINTF_SPEC, ...) \
        static int getenv_##TYPE(elpa_index_t index, const char *env_variable, enum NOTIFY_FLAGS notify_flag, int n, TYPE *value, const char *error_string) { \
                int err; \
                char *env_value = getenv(env_variable); \
                if (env_value) { \
370
                        err = elpa_##TYPE##_string_to_value(TYPE##_entries[n].base.name, env_value, value); \
371
372
373
374
375
                        if (err != ELPA_OK) { \
                                fprintf(stderr, "ELPA: Error interpreting environment variable %s with value '%s': %s\n", \
                                                TYPE##_entries[n].base.name, env_value, elpa_strerr(err)); \
                        } else {\
                                const char *value_string = NULL; \
376
                                if (elpa_##TYPE##_value_to_string(TYPE##_entries[n].base.name, *value, &value_string) == ELPA_OK) { \
377
                                        if (!(index->TYPE##_options.notified[n] & notify_flag)) { \
Pavel Kus's avatar
Pavel Kus committed
378
                                                if (elpa_index_is_printing_mpi_rank(index)) { \
Andreas Marek's avatar
Andreas Marek committed
379
                                                  if (elpa_index_int_value_is_set(index, "verbose")) { \
380
381
                                                        fprintf(stderr, "ELPA: %s '%s' is set to %s due to environment variable %s\n", \
                                                                      error_string, TYPE##_entries[n].base.name, value_string, env_variable); \
Andreas Marek's avatar
Andreas Marek committed
382
					          } \
383
                                                } \
384
385
386
                                                index->TYPE##_options.notified[n] |= notify_flag; \
                                        } \
                                } else { \
Pavel Kus's avatar
Pavel Kus committed
387
                                        if (elpa_index_is_printing_mpi_rank(index)) { \
388
                                                fprintf(stderr, "ELPA: %s '%s' is set to '" PRINTF_SPEC "' due to environment variable %s\n", \
389
                                                        error_string, TYPE##_entries[n].base.name, *value, env_variable);\
390
                                        } \
391
392
393
394
395
396
397
398
399
                                } \
                                return 1; \
                        } \
                } \
                return 0; \
        }
FOR_ALL_TYPES(IMPLEMENT_GETENV)


400
#define IMPLEMENT_GET_FUNCTION(TYPE, PRINTF_SPEC, SCANF_SPEC, ERROR_VALUE) \
401
402
        TYPE elpa_index_get_##TYPE##_value(elpa_index_t index, char *name, int *error) { \
                TYPE ret; \
403
404
405
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        int from_env = 0; \
                        if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                                from_env = getenv_##TYPE(index, TYPE##_entries[n].base.env_force, NOTIFY_ENV_FORCE, n, &ret, "Option"); \
                        } \
                        if (!from_env) { \
                                ret = index->TYPE##_options.values[n]; \
                        } \
                        if (error != NULL) { \
                                *error = ELPA_OK; \
                        } \
                        return ret; \
                } else { \
                        if (error != NULL) { \
                                *error = ELPA_ERROR_ENTRY_NOT_FOUND; \
                        } \
                        return ERROR_VALUE; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_GET_FUNCTION)


#define IMPLEMENT_LOC_FUNCTION(TYPE, ...) \
        TYPE* elpa_index_get_##TYPE##_loc(elpa_index_t index, char *name) { \
431
432
433
                if (sizeof(TYPE##_entries) == 0) { \
                        return NULL; \
                } \
434
435
436
437
438
439
440
441
442
443
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        return &index->TYPE##_options.values[n]; \
                } else { \
                        return NULL; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_LOC_FUNCTION)


444
#define IMPLEMENT_SET_FUNCTION(TYPE, PRINTF_SPEC, ...) \
445
        int elpa_index_set_##TYPE##_value(elpa_index_t index, char *name, TYPE value) { \
446
447
448
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
449
450
451
452
453
454
                int n = find_##TYPE##_entry(name); \
                if (n < 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                }; \
                if (TYPE##_entries[n].valid != NULL) { \
                        if(!TYPE##_entries[n].valid(index, n, value)) { \
455
                                return ELPA_ERROR_ENTRY_INVALID_VALUE; \
456
457
458
                        }; \
                } \
                if (TYPE##_entries[n].base.once & index->TYPE##_options.is_set[n]) { \
459
460
                        return ELPA_ERROR_ENTRY_ALREADY_SET; \
                } \
461
                if (TYPE##_entries[n].base.readonly) { \
462
                        return ELPA_ERROR_ENTRY_READONLY; \
463
464
465
466
467
468
469
                } \
                index->TYPE##_options.values[n] = value; \
                index->TYPE##_options.is_set[n] = 1; \
                return ELPA_OK; \
        }
FOR_ALL_TYPES(IMPLEMENT_SET_FUNCTION)

Pavel Kus's avatar
Pavel Kus committed
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
#define IMPLEMENT_SET_FROM_LOAD_FUNCTION(TYPE, PRINTF_SPEC, ...) \
        int elpa_index_set_from_load_##TYPE##_value(elpa_index_t index, char *name, TYPE value, int explicit) { \
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
                int n = find_##TYPE##_entry(name); \
                if (n < 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                }; \
                index->TYPE##_options.values[n] = value; \
                if(explicit) \
                        index->TYPE##_options.is_set[n] = 1; \
                return ELPA_OK; \
        }
FOR_ALL_TYPES(IMPLEMENT_SET_FROM_LOAD_FUNCTION)

486
487
488

#define IMPLEMENT_IS_SET_FUNCTION(TYPE, ...) \
        int elpa_index_##TYPE##_value_is_set(elpa_index_t index, char *name) { \
489
490
491
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        if (index->TYPE##_options.is_set[n]) { \
                                return 1; \
                        } else { \
                                return 0; \
                        } \
                } else { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_IS_SET_FUNCTION)


int elpa_index_value_is_set(elpa_index_t index, char *name) {
        int res = ELPA_ERROR;

#define RET_IF_SET(TYPE, ...) \
        res = elpa_index_##TYPE##_value_is_set(index, name); \
        if (res >= 0) { \
                return res; \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
513
        }
514
515
516

        FOR_ALL_TYPES(RET_IF_SET)

517
518
519
520
        fprintf(stderr, "ELPA Error: Could not find entry '%s'\n", name);
        return res;
}

521
522
523
524
525
int elpa_index_int_is_valid(elpa_index_t index, char *name, int new_value) {
        int n = find_int_entry(name); \
        if (n >= 0) { \
                if (int_entries[n].valid == NULL) {
                        return ELPA_OK;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
526
                } else {
527
                        return int_entries[n].valid(index, n, new_value) ? ELPA_OK : ELPA_ERROR;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
528
529
                }
        }
530
        return ELPA_ERROR_ENTRY_NOT_FOUND;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
531
532
}

533
int elpa_int_value_to_string(char *name, int value, const char **string) {
534
535
536
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
537
        }
538
        if (int_entries[n].to_string == NULL) {
539
                return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
540
541
542
        }
        *string = int_entries[n].to_string(value);
        return ELPA_OK;
543
544
}

545
546

int elpa_int_value_to_strlen(char *name, int value) {
547
        const char *string = NULL;
548
        elpa_int_value_to_string(name, value, &string);
549
        if (string == NULL) {
550
551
552
                return 0;
        } else {
                return strlen(string);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
553
554
        }
}
555

556
557
558
559
560
561

int elpa_index_int_value_to_strlen(elpa_index_t index, char *name) {
        int n = find_int_entry(name);
        if (n < 0) {
                return 0;
        }
562
        return elpa_int_value_to_strlen(name, index->int_options.values[n]);
563
564
565
566
}


int elpa_int_string_to_value(char *name, char *string, int *value) {
567
568
569
570
571
572
573
574
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }

        if (int_entries[n].to_string == NULL) {
                int val, ret;
                ret = sscanf(string, "%d", &val);
575
                if (ret == 1) {
576
                        *value = val;
577
578
                        return ELPA_OK;
                } else {
579
                        return ELPA_ERROR_ENTRY_INVALID_VALUE;
580
581
582
                }
        }

583
584
        for (int i = 0; i < int_entries[n].cardinality(NULL); i++) {
                int candidate = int_entries[n].enumerate(NULL, i);
585
586
587
                if (strcmp(string, int_entries[n].to_string(candidate)) == 0) {
                        *value = candidate;
                        return ELPA_OK;
588
                }
589
        }
590
        return ELPA_ERROR_ENTRY_INVALID_VALUE;
591
592
}

Wenzhe Yu's avatar
Wenzhe Yu committed
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
int elpa_float_string_to_value(char *name, char *string, float *value) {
        float val;
        int ret = sscanf(string, "%lf", &val);
        if (ret == 1) {
                *value = val;
                return ELPA_OK;
        } else {
                /* \todo: remove */
                fprintf(stderr, "ELPA: DEBUG: Could not parse float value '%s' for option '%s'\n", string, name);
                return ELPA_ERROR_ENTRY_INVALID_VALUE;
        }
}

int elpa_float_value_to_string(char *name, float value, const char **string) {
        return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
}

610
int elpa_double_string_to_value(char *name, char *string, double *value) {
611
612
        double val;
        int ret = sscanf(string, "%lf", &val);
613
        if (ret == 1) {
614
615
                *value = val;
                return ELPA_OK;
616
        } else {
617
618
                /* \todo: remove */
                fprintf(stderr, "ELPA: DEBUG: Could not parse double value '%s' for option '%s'\n", string, name);
619
                return ELPA_ERROR_ENTRY_INVALID_VALUE;
620
621
622
        }
}

623
int elpa_double_value_to_string(char *name, double value, const char **string) {
624
        return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
625
}
626

627
int elpa_option_cardinality(char *name) {
628
629
630
631
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].cardinality) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }
632
        return int_entries[n].cardinality(NULL);
633
}
634

635
int elpa_option_enumerate(char *name, int i) {
636
637
638
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].enumerate) {
                return 0;
639
        }
640
        return int_entries[n].enumerate(NULL, i);
641
642
}

643

644
/* Helper functions for simple int entries */
645
static int cardinality_bool(elpa_index_t index) {
646
647
        return 2;
}
648

649
650
static int valid_bool(elpa_index_t index, int n, int new_value) {
        return (0 <= new_value) && (new_value < 2);
651
652
}

653
static int enumerate_identity(elpa_index_t index, int i) {
654
655
656
        return i;
}

657
658
659
660
661
662
663
664
665
666
/* Helper functions for specific options */

#define NAME_CASE(name, value, ...) \
        case value: \
                return #name;

#define VALID_CASE(name, value) \
        case value: \
                return 1;

667
#define VALID_CASE_3(name, value, available, other_checks) \
668
        case value: \
669
                return available && (other_checks(value));
670

671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
static const char* elpa_matrix_layout_name(int layout) {
	switch(layout) {
		ELPA_FOR_ALL_MATRIX_LAYOUTS(NAME_CASE)
		default:
			return "(Invalid matrix layout)";
	}
}

static int number_of_matrix_layouts(elpa_index_t index) {
        return ELPA_NUMBER_OF_MATRIX_LAYOUTS;
}

static int matrix_layout_enumerate(elpa_index_t index, int i) {
#define OPTION_RANK(name, value, ...) \
        +(value >= sizeof(array_of_size_value)/sizeof(int) ? 0 : 1)

#define EMPTY()
#define DEFER1(m) m EMPTY()
#define EVAL(...) __VA_ARGS__

#define ENUMERATE_CASE(name, value, ...) \
        { const int array_of_size_value[value]; \
        case 0 DEFER1(INNER_ITERATOR)()(OPTION_RANK): \
                return value; }

        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_MATRIX_LAYOUTS
                EVAL(ELPA_FOR_ALL_MATRIX_LAYOUTS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}

static int matrix_layout_is_valid(elpa_index_t index, int n, int new_value) {
        switch(new_value) {
                ELPA_FOR_ALL_MATRIX_LAYOUTS(VALID_CASE)
                default:
                        return 0;
        }
}

713
714
715
716
717
static const char* elpa_solver_name(int solver) {
        switch(solver) {
                ELPA_FOR_ALL_SOLVERS(NAME_CASE)
                default:
                        return "(Invalid solver)";
718
719
720
        }
}

721
static int number_of_solvers(elpa_index_t index) {
722
        return ELPA_NUMBER_OF_SOLVERS;
723
724
}

725
static int solver_enumerate(elpa_index_t index, int i) {
726
#define OPTION_RANK(name, value, ...) \
727
        +(value >= sizeof(array_of_size_value)/sizeof(int) ? 0 : 1)
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744

#define EMPTY()
#define DEFER1(m) m EMPTY()
#define EVAL(...) __VA_ARGS__

#define ENUMERATE_CASE(name, value, ...) \
        { const int array_of_size_value[value]; \
        case 0 DEFER1(INNER_ITERATOR)()(OPTION_RANK): \
                return value; }

        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_SOLVERS
                EVAL(ELPA_FOR_ALL_SOLVERS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
745
746
747
}


Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
748
static int solver_is_valid(elpa_index_t index, int n, int new_value) {
749
750
751
752
753
        switch(new_value) {
                ELPA_FOR_ALL_SOLVERS(VALID_CASE)
                default:
                        return 0;
        }
754
755
}

756
static int number_of_real_kernels(elpa_index_t index) {
757
758
        return ELPA_2STAGE_NUMBER_OF_REAL_KERNELS;
}
759

760
static int real_kernel_enumerate(elpa_index_t index,int i) {
761
762
763
764
765
766
767
768
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_REAL_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_REAL_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}
769

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
770
static const char *real_kernel_name(int kernel) {
771
772
773
774
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(NAME_CASE)
                default:
                        return "(Invalid real kernel)";
775
        }
776
}
777

778
779
780
781
782
783
784
785
#define REAL_NVIDIA_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_REAL_NVIDIA_GPU ? gpu_is_active : 1

#define REAL_AMD_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_REAL_AMD_GPU ? gpu_is_active : 1

#define REAL_INTEL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_REAL_INTEL_GPU ? gpu_is_active : 1
786

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
787
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value) {
788
789
790
791
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_REAL_DEFAULT;
        }
792
        int gpu_is_active = (elpa_index_get_int_value(index, "nvidia-gpu", NULL) || elpa_index_get_int_value(index, "gpu", NULL) || elpa_index_get_int_value(index, "amd-gpu", NULL) || elpa_index_get_int_value(index, "intel-gpu", NULL));
793
        switch(new_value) {
794
795
#ifdef WITH_NVIDIA_GPU_VERSION
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(VALID_CASE_3, REAL_NVIDIA_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
Andreas Marek's avatar
Andreas Marek committed
796
#endif
797
798
#ifdef WITH_AMD_GPU_VERSION
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(VALID_CASE_3, REAL_AMD_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
Andreas Marek's avatar
Andreas Marek committed
799
#endif
Andreas Marek's avatar
Andreas Marek committed
800
801
802
803
804
#ifdef WITH_INTEL_GPU_VERSION
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(VALID_CASE_3, REAL_INTEL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
#endif
#if !defined(WITH_NVIDIA_GPU_VERSION) && !defined(WITH_AMD_GPU_VERSION) && !defined(WITH_INTEL_GPU_VERSION)
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(VALID_CASE_3, REAL_NVIDIA_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
805
#endif
806
		// intel missing
807
808
                default:
                        return 0;
809
        }
810
}
811

812
static int number_of_complex_kernels(elpa_index_t index) {
813
814
        return ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS;
}
815

816

817
static int complex_kernel_enumerate(elpa_index_t index,int i) {
818
819
820
821
822
823
824
825
826
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
827
static const char *complex_kernel_name(int kernel) {
828
829
830
831
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(NAME_CASE)
                default:
                        return "(Invalid complex kernel)";
832
        }
833
}
834

835
836
837
838
839
840
841
842
#define COMPLEX_NVIDIA_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_COMPLEX_NVIDIA_GPU ? gpu_is_active : 1

#define COMPLEX_AMD_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_COMPLEX_AMD_GPU ? gpu_is_active : 1

#define COMPLEX_INTEL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_COMPLEX_INTEL_GPU ? gpu_is_active : 1
843

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
844
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value) {
845
846
847
848
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_COMPLEX_DEFAULT;
        }
849
        int gpu_is_active = (elpa_index_get_int_value(index, "nvidia-gpu", NULL) || elpa_index_get_int_value(index, "amd-gpu", NULL) || elpa_index_get_int_value(index, "intel-gpu", NULL));
850
        switch(new_value) {
Andreas Marek's avatar
Andreas Marek committed
851
#ifdef WITH_NVIDIA_GPU_VERSION
852
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(VALID_CASE_3, COMPLEX_NVIDIA_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
Andreas Marek's avatar
Andreas Marek committed
853
854
#endif
#ifdef WITH_AMD_GPU_VERSION
855
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(VALID_CASE_3, COMPLEX_AMD_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
Andreas Marek's avatar
Andreas Marek committed
856
#endif
Andreas Marek's avatar
Andreas Marek committed
857
858
859
860
861
#ifdef WITH_INTEL_GPU_VERSION
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(VALID_CASE_3, COMPLEX_INTEL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
#endif
#if !defined(WITH_NVIDIA_GPU_VERSION) && !defined(WITH_AMD_GPU_VERSION) && !defined(WITH_INTEL_GPU_VERSION)
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(VALID_CASE_3, COMPLEX_NVIDIA_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
862
#endif
863
		// intel missing
864
865
866
867
                default:
                        return 0;
        }
}
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
868

869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
static const char* elpa_autotune_level_name(int level) {
        switch(level) {
                ELPA_FOR_ALL_AUTOTUNE_LEVELS(NAME_CASE)
                default:
                        return "(Invalid autotune level)";
        }
}

static const char* elpa_autotune_domain_name(int domain) {
        switch(domain) {
                ELPA_FOR_ALL_AUTOTUNE_DOMAINS(NAME_CASE)
                default:
                        return "(Invalid autotune domain)";
        }
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
885
886
887
888
static int na_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

889
890
891
892
static int nev_is_valid(elpa_index_t index, int n, int new_value) {
        if (!elpa_index_int_value_is_set(index, "na")) {
                return 0;
        }
893
        return 0 <= new_value && new_value <= elpa_index_get_int_value(index, "na", NULL);
894
895
896
897
898
899
}

static int is_positive(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
900
901
902
903
904
905
906
static int bw_is_valid(elpa_index_t index, int n, int new_value) {
        int na;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }

        na = elpa_index_get_int_value(index, "na", NULL);
907
        return (2 <= new_value) && (new_value < na);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
908
}
909

910
911
912
913
static int output_build_config_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value == 0 || new_value == 1;
}

914
915
916
917
static int gpu_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value == 0 || new_value == 1;
}

Andreas Marek's avatar
Andreas Marek committed
918
919
920
921
static int verbose_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value == 0 || new_value == 1;
}

922
static int band_to_full_cardinality(elpa_index_t index) {
923
	return 10;
924
}
925
static int band_to_full_enumerate(elpa_index_t index, int i) {
926
	return i+1;
927
928
}

929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
static int internal_nblk_is_valid(elpa_index_t index, int n, int new_value) {
        return (0 <= new_value);
}
static int internal_nblk_cardinality(elpa_index_t index) {
	return 9;
}

static int internal_nblk_enumerate(elpa_index_t index, int i) {
	switch(i) {
	  case 0:
	    return 2;
	  case 1:
	    return 4;
	  case 2:
	    return 8;
	  case 3:
	    return 16;
	  case 4:
	    return 32;
	  case 5:
	    return 64;
	  case 6:
	    return 128;
	  case 7:
	    return 256;
	  case 8:
	    return 1024;
	}
}

Pavel Kus's avatar
Pavel Kus committed
959
// TODO shouldnt it be only for ELPA2??
960
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value) {
961
962
	int max_block=10;
        return (1 <= new_value) && (new_value <= max_block);
963
964
}

965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
static int stripewidth_real_cardinality(elpa_index_t index) {
	return 17;
}

static int stripewidth_complex_cardinality(elpa_index_t index) {
	return 17;
}

static int stripewidth_real_enumerate(elpa_index_t index, int i) {
	switch(i) {
	  case 0:
	    return 32;
	  case 1:
	    return 36;
	  case 2:
	    return 40;
	  case 3:
	    return 44;
	  case 4:
	    return 48;
	  case 5:
	    return 52;
	  case 6:
	    return 56;
	  case 7:
	    return 60;
	  case 8:
	    return 64;
	  case 9:
	    return 68;
	  case 10:
	    return 72;
	  case 11:
	    return 76;
	  case 12:
	    return 80;
	  case 13:
	    return 84;
	  case 14:
	    return 88;
	  case 15:
	    return 92;
	  case 16:
	    return 96;
	}
}

static int stripewidth_complex_enumerate(elpa_index_t index, int i) {
	switch(i) {
	  case 0:
	    return 48;
	  case 1:
	    return 56;
	  case 2:
	    return 64;
	  case 3:
	    return 72;
	  case 4:
	    return 80;
	  case 5:
	    return 88;
	  case 6:
	    return 96;
	  case 7:
	    return 104;
	  case 8:
	    return 112;
	  case 9:
	    return 120;
	  case 10:
	    return 128;
	  case 11:
	    return 136;
	  case 12:
	    return 144;
	  case 13:
	    return 152;
	  case 14:
	    return 160;
	  case 15:
	    return 168;
	  case 16:
	    return 176;
	}
}

static int stripewidth_real_is_valid(elpa_index_t index, int n, int new_value) {
	return (32 <= new_value) && (new_value <= 96);
}

static int stripewidth_complex_is_valid(elpa_index_t index, int n, int new_value) {
	return (48 <= new_value) && (new_value <= 176);
}

Pavel Kus's avatar
Pavel Kus committed
1059
static int omp_threads_cardinality(elpa_index_t index) {
Andreas Marek's avatar
Andreas Marek committed
1060
	int max_threads;
1061
#ifdef WITH_OPENMP_TRADITIONAL
Andreas Marek's avatar
Andreas Marek committed
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
	if (set_max_threads_glob == 0) {
		max_threads_glob = omp_get_max_threads();
		set_max_threads_glob = 1;
	}
#else
	max_threads_glob = 1;
	set_max_threads_glob = 1;
#endif
	max_threads = max_threads_glob;
	return max_threads;
}

Pavel Kus's avatar
Pavel Kus committed
1074
static int omp_threads_enumerate(elpa_index_t index, int i) {
Andreas Marek's avatar
Andreas Marek committed
1075
1076
1077
1078
1079
        return i + 1;
}

static int omp_threads_is_valid(elpa_index_t index, int n, int new_value) {
        int max_threads;
1080
#ifdef WITH_OPENMP_TRADITIONAL
1081
1082
1083
1084
	if (set_max_threads_glob == 0) {
		max_threads_glob = omp_get_max_threads();
		set_max_threads_glob = 1;
	}
Andreas Marek's avatar
Andreas Marek committed
1085
#else
1086
1087
	max_threads_glob = 1;
	set_max_threads_glob = 1;
Andreas Marek's avatar
Andreas Marek committed
1088
#endif
1089
	max_threads = max_threads_glob;
Andreas Marek's avatar
Andreas Marek committed
1090
        return (1 <= new_value) && (new_value <= max_threads);
Andreas Marek's avatar
Andreas Marek committed
1091
1092
}

1093

1094
static int valid_with_gpu(elpa_index_t index, int n, int new_value) {
1095
        int gpu_is_active = (elpa_index_get_int_value(index, "nvidia-gpu", NULL) || elpa_index_get_int_value(index, "gpu", NULL) || elpa_index_get_int_value(index, "amd-gpu", NULL) || elpa_index_get_int_value(index, "intel-gpu", NULL));
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
        if (gpu_is_active == 1) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

static int valid_with_gpu_elpa1(elpa_index_t index, int n, int new_value) {
        int solver = elpa_index_get_int_value(index, "solver", NULL);
1106
        int gpu_is_active = (elpa_index_get_int_value(index, "nvidia-gpu", NULL) || elpa_index_get_int_value(index, "gpu", NULL) || elpa_index_get_int_value(index, "amd-gpu", NULL) || elpa_index_get_int_value(index, "intel-gpu", NULL));
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
        if ((solver == ELPA_SOLVER_1STAGE) && (gpu_is_active == 1)) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

static int valid_with_gpu_elpa2(elpa_index_t index, int n, int new_value) {
        int solver = elpa_index_get_int_value(index, "solver", NULL);
1117
        int gpu_is_active = (elpa_index_get_int_value(index, "nvidia-gpu", NULL) || elpa_index_get_int_value(index, "gpu", NULL) || elpa_index_get_int_value(index, "amd-gpu", NULL) || elpa_index_get_int_value(index, "intel-gpu", NULL));
1118
1119
1120
1121
1122
1123
1124
1125
        if ((solver == ELPA_SOLVER_2STAGE) && (gpu_is_active == 1)) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

1126
static int max_stored_rows_cardinality(elpa_index_t index) {
Soheil Soltani's avatar
Soheil Soltani committed
1127
	return 4;
1128
1129
1130
}

static int max_stored_rows_enumerate(elpa_index_t index, int i) {
Soheil Soltani's avatar
Soheil Soltani committed
1131
1132
  switch(i) {
  case 0:
Soheil Soltani's avatar
Soheil Soltani committed
1133
    return 64;
Soheil Soltani's avatar
Soheil Soltani committed
1134
  case 1:
Soheil Soltani's avatar
Soheil Soltani committed
1135
    return 128;
Soheil Soltani's avatar
Soheil Soltani committed
1136
1137
  case 2:
    return 256;
Soheil Soltani's avatar
Soheil Soltani committed
1138
1139
  case 3:
    return 512;
Soheil Soltani's avatar
Soheil Soltani committed
1140
  }
1141
1142
1143
}

static int max_stored_rows_is_valid(elpa_index_t index, int n, int new_value) {