elpa_index.c 60.8 KB
Newer Older
Andreas Marek's avatar
Andreas Marek committed
1
2
//    Copyright 2017, L. Hüdepohl and A. Marek, MPCDF
//
Andreas Marek's avatar
Andreas Marek committed
3
//    This file is part of ELPA.
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
//
//    The ELPA library was originally created by the ELPA consortium,
//    consisting of the following organizations:
//
//    - Max Planck Computing and Data Facility (MPCDF), formerly known as
//      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
//    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
//      Informatik,
//    - Technische Universität München, Lehrstuhl für Informatik mit
//      Schwerpunkt Wissenschaftliches Rechnen ,
//    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
//    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
//      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
//      and
//    - IBM Deutschland GmbH
//
//    This particular source code file contains additions, changes and
//    enhancements authored by Intel Corporation which is not part of
//    the ELPA consortium.
//
//    More information can be found here:
//    http://elpa.mpcdf.mpg.de/
//
//    ELPA is free software: you can redistribute it and/or modify
//    it under the terms of the version 3 of the license of the
//    GNU Lesser General Public License as published by the Free
//    Software Foundation.
//
//    ELPA is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU Lesser General Public License for more details.
//
//    You should have received a copy of the GNU Lesser General Public License
//    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
//
//    ELPA reflects a substantial effort on the part of the original
//    ELPA consortium, and we ask you to respect the spirit of the
//    license that we chose: i.e., please contribute any changes you
//    may have back to the original ELPA library distribution, and keep
//    any derivatives of ELPA under the same license that we chose for
//    the original distribution, the GNU Lesser General Public License.
//
//    Authors: L. Huedepohl and A. Marek, MPCDF
Pavel Kus's avatar
Pavel Kus committed
48
#include <assert.h>
Pavel Kus's avatar
Pavel Kus committed
49
50
#include <stdio.h>
#include <stdlib.h>
51
#include <elpa/elpa.h>
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
52
#include "elpa_index.h"
53

Andreas Marek's avatar
Andreas Marek committed
54
55
#include "config.h"

56
#ifdef WITH_OPENMP_TRADITIONAL
Andreas Marek's avatar
Andreas Marek committed
57
58
59
60
61
62
#include <omp.h>
#endif

int max_threads_glob;
int set_max_threads_glob=0;

63
64
static int enumerate_identity(elpa_index_t index, int i);
static int cardinality_bool(elpa_index_t index);
65
66
static int valid_bool(elpa_index_t index, int n, int new_value);

67
68
69
70
71
static int number_of_matrix_layouts(elpa_index_t index);
static int matrix_layout_enumerate(elpa_index_t index, int i);
static int matrix_layout_is_valid(elpa_index_t index, int n, int new_value);
static const char* elpa_matrix_layout_name(int layout);

72
73
static int number_of_solvers(elpa_index_t index);
static int solver_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
74
static int solver_is_valid(elpa_index_t index, int n, int new_value);
75
76
static const char* elpa_solver_name(int solver);

77
78
static int number_of_real_kernels(elpa_index_t index);
static int real_kernel_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
79
80
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *real_kernel_name(int kernel);
81

82
83
static int number_of_complex_kernels(elpa_index_t index);
static int complex_kernel_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
84
85
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *complex_kernel_name(int kernel);
86

87
88
static int band_to_full_cardinality(elpa_index_t index);
static int band_to_full_enumerate(elpa_index_t index, int i);
89
90
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value);

91
92
93
94
static int stripewidth_real_cardinality(elpa_index_t index);
static int stripewidth_real_enumerate(elpa_index_t index, int i);
static int stripewidth_real_is_valid(elpa_index_t index, int n, int new_value);

95
96
97
98
static int internal_nblk_cardinality(elpa_index_t index);
static int internal_nblk_enumerate(elpa_index_t index, int i);
static int internal_nblk_is_valid(elpa_index_t index, int n, int new_value);

99
100
101
102
static int stripewidth_complex_cardinality(elpa_index_t index);
static int stripewidth_complex_enumerate(elpa_index_t index, int i);
static int stripewidth_complex_is_valid(elpa_index_t index, int n, int new_value);

Pavel Kus's avatar
Pavel Kus committed
103
104
105
static int omp_threads_cardinality(elpa_index_t index);
static int omp_threads_enumerate(elpa_index_t index, int i);
static int omp_threads_is_valid(elpa_index_t index, int n, int new_value);
Andreas Marek's avatar
Andreas Marek committed
106

107
108
109
110
static int max_stored_rows_cardinality(elpa_index_t index);
static int max_stored_rows_enumerate(elpa_index_t index, int i);
static int max_stored_rows_is_valid(elpa_index_t index, int n, int new_value);

111
static int min_tile_size_cardinality(elpa_index_t index);
112
113
114
115
116
117
static int min_tile_size_enumerate(elpa_index_t index, int i);
static int min_tile_size_is_valid(elpa_index_t index, int n, int new_value);

static int valid_with_gpu(elpa_index_t index, int n, int new_value);
static int valid_with_gpu_elpa1(elpa_index_t index, int n, int new_value);
static int valid_with_gpu_elpa2(elpa_index_t index, int n, int new_value);
118

119
static int intermediate_bandwidth_cardinality(elpa_index_t index);
120
121
static int intermediate_bandwidth_enumerate(elpa_index_t index, int i);
static int intermediate_bandwidth_is_valid(elpa_index_t index, int n, int new_value);
122

123
124
125
126
static int cannon_buffer_size_cardinality(elpa_index_t index);
static int cannon_buffer_size_enumerate(elpa_index_t index, int i);
static int cannon_buffer_size_is_valid(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
127
static int na_is_valid(elpa_index_t index, int n, int new_value);
128
static int nev_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
129
static int bw_is_valid(elpa_index_t index, int n, int new_value);
130
static int output_build_config_is_valid(elpa_index_t index, int n, int new_value);
131
static int gpu_is_valid(elpa_index_t index, int n, int new_value);
132
static int skewsymmetric_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
133

134
135
static int is_positive(elpa_index_t index, int n, int new_value);

Wenzhe Yu's avatar
Wenzhe Yu committed
136
137
138
static int elpa_float_string_to_value(char *name, char *string, float *value);
static int elpa_float_value_to_string(char *name, float value, const char **string);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
139
140
static int elpa_double_string_to_value(char *name, char *string, double *value);
static int elpa_double_value_to_string(char *name, double value, const char **string);
141

Pavel Kus's avatar
Pavel Kus committed
142
#define BASE_ENTRY(option_name, option_description, once_value, readonly_value, print_flag_value) \
143
144
145
146
147
148
149
                .base = { \
                        .name = option_name, \
                        .description = option_description, \
                        .once = once_value, \
                        .readonly = readonly_value, \
                        .env_default = "ELPA_DEFAULT_" option_name, \
                        .env_force = "ELPA_FORCE_" option_name, \
Pavel Kus's avatar
Pavel Kus committed
150
                        .print_flag = print_flag_value, \
151
                }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
152

Pavel Kus's avatar
Pavel Kus committed
153
#define INT_PARAMETER_ENTRY(option_name, option_description, valid_func, print_flag) \
154
        { \
Pavel Kus's avatar
Pavel Kus committed
155
                BASE_ENTRY(option_name, option_description, 1, 0, print_flag), \
156
                .valid = valid_func, \
157
        }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
158

Pavel Kus's avatar
Pavel Kus committed
159
#define BOOL_ENTRY(option_name, option_description, default, tune_level, tune_domain, print_flag) \
160
        { \
Pavel Kus's avatar
Pavel Kus committed
161
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
162
                .default_value = default, \
163
164
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
165
166
167
                .cardinality = cardinality_bool, \
                .enumerate = enumerate_identity, \
                .valid = valid_bool, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
168
169
        }

Pavel Kus's avatar
Pavel Kus committed
170
#define INT_ENTRY(option_name, option_description, default, tune_level, tune_domain, card_func, enumerate_func, valid_func, to_string_func, print_flag) \
171
        { \
Pavel Kus's avatar
Pavel Kus committed
172
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
173
                .default_value = default, \
174
175
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
176
177
178
179
                .cardinality = card_func, \
                .enumerate = enumerate_func, \
                .valid = valid_func, \
                .to_string = to_string_func, \
180
181
        }

Pavel Kus's avatar
Pavel Kus committed
182
#define INT_ANY_ENTRY(option_name, option_description, print_flag) \
183
        { \
Pavel Kus's avatar
Pavel Kus committed
184
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
185
186
        }

187
188
/* The order here is important! Tunable options that are dependent on other
 * tunable options must appear later in the list than their prerequisites */
189
static const elpa_index_int_entry_t int_entries[] = {
Pavel Kus's avatar
Pavel Kus committed
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
        INT_PARAMETER_ENTRY("na", "Global matrix has size (na * na)", na_is_valid, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("nev", "Number of eigenvectors to be computed, 0 <= nev <= na", nev_is_valid, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("nblk", "Block size of scalapack block-cyclic distribution", is_positive, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("local_nrows", "Number of matrix rows stored on this process", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("local_ncols", "Number of matrix columns stored on this process", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("process_row", "Process row number in the 2D domain decomposition", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("process_col", "Process column number in the 2D domain decomposition", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("process_id", "Process rank", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("num_process_rows", "Number of process row number in the 2D domain decomposition", NULL, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("num_process_cols", "Number of process column number in the 2D domain decomposition", NULL, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("num_processes", "Total number of processes", NULL, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("bandwidth", "If specified, a band matrix with this bandwidth is expected as input; bandwidth must be multiply of nblk", bw_is_valid, PRINT_YES),
        INT_ANY_ENTRY("mpi_comm_rows", "Communicator for inter-row communication", PRINT_NO),
        INT_ANY_ENTRY("mpi_comm_cols", "Communicator for inter-column communication", PRINT_NO),
        INT_ANY_ENTRY("mpi_comm_parent", "Parent communicator", PRINT_NO),
        INT_ANY_ENTRY("blacs_context", "BLACS context", PRINT_NO),
206
//#ifdef REDISTRIBUTE_MATRIX
207
208
        INT_ENTRY("internal_nblk", "Internally used block size of scalapack block-cyclic distribution", 0, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
                   internal_nblk_cardinality, internal_nblk_enumerate, internal_nblk_is_valid, NULL, PRINT_YES),
209
//#endif
210
211
212
213
#ifdef STORE_BUILD_CONFIG
        INT_ENTRY("output_build_config", "Output the build config", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
                        cardinality_bool, enumerate_identity, output_build_config_is_valid, NULL, PRINT_NO),
#endif
214
215
	INT_ENTRY("matrix_order","Order of the matrix layout", COLUMN_MAJOR_ORDER, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
                         number_of_matrix_layouts, matrix_layout_enumerate, matrix_layout_is_valid, elpa_matrix_layout_name, PRINT_YES), \
216
        INT_ENTRY("solver", "Solver to use", ELPA_SOLVER_1STAGE, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
217
                        number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name, PRINT_YES),
218
        INT_ENTRY("gpu", "Use GPU acceleration", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
219
                        cardinality_bool, enumerate_identity, gpu_is_valid, NULL, PRINT_YES),
220
221
        INT_ENTRY("is_skewsymmetric", "Matrix is skewsymmetic", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0,
                        cardinality_bool, enumerate_identity, skewsymmetric_is_valid, NULL, PRINT_YES),
222
223
        //default of gpu ussage for individual phases is 1. However, it is only evaluated, if GPU is used at all, which first has to be determined
        //by the parameter gpu and presence of the device
224
        INT_ENTRY("gpu_tridiag", "Use GPU acceleration for ELPA1 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
225
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa1, NULL, PRINT_YES),
226
        INT_ENTRY("gpu_solve_tridi", "Use GPU acceleration for ELPA solve tridi", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
227
                        cardinality_bool, enumerate_identity, valid_with_gpu, NULL, PRINT_YES),
228
        INT_ENTRY("gpu_trans_ev", "Use GPU acceleration for ELPA1 trans ev", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
229
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa1, NULL, PRINT_YES),
230
        INT_ENTRY("gpu_bandred", "Use GPU acceleration for ELPA2 band reduction", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
231
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
232
233
234
	//not yet ported to GPU
        //INT_ENTRY("gpu_tridiag_band", "Use GPU acceleration for ELPA2 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
        //                cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
235
236
        INT_ENTRY("gpu_trans_ev_tridi_to_band", "Use GPU acceleration for ELPA2 trans_ev_tridi_to_band", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
237
        INT_ENTRY("gpu_trans_ev_band_to_full", "Use GPU acceleration for ELPA2 trans_ev_band_to_full", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
238
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
239
        INT_ENTRY("real_kernel", "Real kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_REAL_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, \
Pavel Kus's avatar
Pavel Kus committed
240
                        number_of_real_kernels, real_kernel_enumerate, real_kernel_is_valid, real_kernel_name, PRINT_YES),
241
        INT_ENTRY("complex_kernel", "Complex kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_COMPLEX_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_COMPLEX, \
Pavel Kus's avatar
Pavel Kus committed
242
                        number_of_complex_kernels, complex_kernel_enumerate, complex_kernel_is_valid, complex_kernel_name, PRINT_YES),
243

244
        INT_ENTRY("min_tile_size", "Minimal tile size used internally in elpa1_tridiag and elpa2_bandred", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
245
                        min_tile_size_cardinality, min_tile_size_enumerate, min_tile_size_is_valid, NULL, PRINT_YES),
246
        INT_ENTRY("intermediate_bandwidth", "Specifies the intermediate bandwidth in ELPA2 full->banded step. Must be a multiple of nblk", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
247
                        intermediate_bandwidth_cardinality, intermediate_bandwidth_enumerate, intermediate_bandwidth_is_valid, NULL, PRINT_YES),
248

249
        INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
250
                        band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL, PRINT_YES),
251
        INT_ENTRY("stripewidth_real", "Stripewidth_real, default 48. Must be a multiple of 4", 48, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_REAL, \
252
                        stripewidth_real_cardinality, stripewidth_real_enumerate, stripewidth_real_is_valid, NULL, PRINT_YES),
253
        INT_ENTRY("stripewidth_complex", "Stripewidth_complex, default 96. Must be a multiple of 8", 96, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_COMPLEX, \
254
255
                        stripewidth_complex_cardinality, stripewidth_complex_enumerate, stripewidth_complex_is_valid, NULL, PRINT_YES),

256
        INT_ENTRY("max_stored_rows", "Maximum number of stored rows used in ELPA 1 backtransformation, default 63", 63, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
257
                        max_stored_rows_cardinality, max_stored_rows_enumerate, max_stored_rows_is_valid, NULL, PRINT_YES),
258
#ifdef WITH_OPENMP_TRADITIONAL
259
        INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
260
                        omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
261
#else
262
        INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
263
                        omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
264
#endif
265
        INT_ENTRY("cannon_buffer_size", "Increasing the buffer size might make it faster, but costs memory", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
266
                        cannon_buffer_size_cardinality, cannon_buffer_size_enumerate, cannon_buffer_size_is_valid, NULL, PRINT_YES),
267
        //BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_REAL),
Pavel Kus's avatar
Pavel Kus committed
268
269
270
271
        BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_REAL, PRINT_YES),
        BOOL_ENTRY("timings", "Enable time measurement", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("debug", "Emit verbose debugging messages", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("print_flops", "Print FLOP rates on task 0", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
272
        BOOL_ENTRY("measure_performance", "Also measure with flops (via papi) with the timings", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
Pavel Kus's avatar
Pavel Kus committed
273
        BOOL_ENTRY("check_pd", "Check eigenvalues to be positive", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
274
        BOOL_ENTRY("output_pinning_information", "Print the pinning information", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
Pavel Kus's avatar
Pavel Kus committed
275
        BOOL_ENTRY("cannon_for_generalized", "Whether to use Cannons algorithm for the generalized EVP", 1, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
276
277
};

Wenzhe Yu's avatar
Wenzhe Yu committed
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
#define READONLY_FLOAT_ENTRY(option_name, option_description) \
        { \
                BASE_ENTRY(option_name, option_description, 0, 1, 0) \
        }

#define FLOAT_ENTRY(option_name, option_description, default, print_flag) \
        { \
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
                .default_value = default, \
        }

static const elpa_index_float_entry_t float_entries[] = {
        FLOAT_ENTRY("thres_pd_single", "Threshold to define ill-conditioning, default 0.00001", 0.00001, PRINT_YES),
};

293
#define READONLY_DOUBLE_ENTRY(option_name, option_description) \
294
        { \
295
                BASE_ENTRY(option_name, option_description, 0, 1, 0) \
296
297
        }

298
299
300
301
302
303
#define DOUBLE_ENTRY(option_name, option_description, default, print_flag) \
        { \
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
                .default_value = default, \
        }

304
static const elpa_index_double_entry_t double_entries[] = {
Wenzhe Yu's avatar
Wenzhe Yu committed
305
        DOUBLE_ENTRY("thres_pd_double", "Threshold to define ill-conditioning, default 0.00001", 0.00001, PRINT_YES),
306
};
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
307

308
void elpa_index_free(elpa_index_t index) {
309
310
311
312
313
314
315
#define FREE_OPTION(TYPE, ...) \
        free(index->TYPE##_options.values); \
        free(index->TYPE##_options.is_set); \
        free(index->TYPE##_options.notified);

        FOR_ALL_TYPES(FREE_OPTION);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
316
317
318
        free(index);
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
319
320
321
static int compar(const void *a, const void *b) {
        return strcmp(((elpa_index_int_entry_t *) a)->base.name,
                      ((elpa_index_int_entry_t *) b)->base.name);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
322
323
}

324
325
326
#define IMPLEMENT_FIND_ENTRY(TYPE, ...) \
        static int find_##TYPE##_entry(char *name) { \
                elpa_index_##TYPE##_entry_t *entry; \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
327
                elpa_index_##TYPE##_entry_t key = { .base = {.name = name} } ; \
328
                size_t nmembers = nelements(TYPE##_entries); \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
329
                entry = lfind((const void*) &key, (const void *) TYPE##_entries, &nmembers, sizeof(elpa_index_##TYPE##_entry_t), compar); \
330
331
332
333
334
335
336
337
338
339
340
341
342
343
                if (entry) { \
                        return (entry - &TYPE##_entries[0]); \
                } else { \
                        return -1; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_FIND_ENTRY)


#define IMPLEMENT_GETENV(TYPE, PRINTF_SPEC, ...) \
        static int getenv_##TYPE(elpa_index_t index, const char *env_variable, enum NOTIFY_FLAGS notify_flag, int n, TYPE *value, const char *error_string) { \
                int err; \
                char *env_value = getenv(env_variable); \
                if (env_value) { \
344
                        err = elpa_##TYPE##_string_to_value(TYPE##_entries[n].base.name, env_value, value); \
345
346
347
348
349
                        if (err != ELPA_OK) { \
                                fprintf(stderr, "ELPA: Error interpreting environment variable %s with value '%s': %s\n", \
                                                TYPE##_entries[n].base.name, env_value, elpa_strerr(err)); \
                        } else {\
                                const char *value_string = NULL; \
350
                                if (elpa_##TYPE##_value_to_string(TYPE##_entries[n].base.name, *value, &value_string) == ELPA_OK) { \
351
                                        if (!(index->TYPE##_options.notified[n] & notify_flag)) { \
Pavel Kus's avatar
Pavel Kus committed
352
                                                if (elpa_index_is_printing_mpi_rank(index)) { \
353
354
355
                                                        fprintf(stderr, "ELPA: %s '%s' is set to %s due to environment variable %s\n", \
                                                                      error_string, TYPE##_entries[n].base.name, value_string, env_variable); \
                                                } \
356
357
358
                                                index->TYPE##_options.notified[n] |= notify_flag; \
                                        } \
                                } else { \
Pavel Kus's avatar
Pavel Kus committed
359
                                        if (elpa_index_is_printing_mpi_rank(index)) { \
360
                                                fprintf(stderr, "ELPA: %s '%s' is set to '" PRINTF_SPEC "' due to environment variable %s\n", \
361
                                                        error_string, TYPE##_entries[n].base.name, *value, env_variable);\
362
                                        } \
363
364
365
366
367
368
369
370
371
                                } \
                                return 1; \
                        } \
                } \
                return 0; \
        }
FOR_ALL_TYPES(IMPLEMENT_GETENV)


372
#define IMPLEMENT_GET_FUNCTION(TYPE, PRINTF_SPEC, SCANF_SPEC, ERROR_VALUE) \
373
374
        TYPE elpa_index_get_##TYPE##_value(elpa_index_t index, char *name, int *error) { \
                TYPE ret; \
375
376
377
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        int from_env = 0; \
                        if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                                from_env = getenv_##TYPE(index, TYPE##_entries[n].base.env_force, NOTIFY_ENV_FORCE, n, &ret, "Option"); \
                        } \
                        if (!from_env) { \
                                ret = index->TYPE##_options.values[n]; \
                        } \
                        if (error != NULL) { \
                                *error = ELPA_OK; \
                        } \
                        return ret; \
                } else { \
                        if (error != NULL) { \
                                *error = ELPA_ERROR_ENTRY_NOT_FOUND; \
                        } \
                        return ERROR_VALUE; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_GET_FUNCTION)


#define IMPLEMENT_LOC_FUNCTION(TYPE, ...) \
        TYPE* elpa_index_get_##TYPE##_loc(elpa_index_t index, char *name) { \
403
404
405
                if (sizeof(TYPE##_entries) == 0) { \
                        return NULL; \
                } \
406
407
408
409
410
411
412
413
414
415
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        return &index->TYPE##_options.values[n]; \
                } else { \
                        return NULL; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_LOC_FUNCTION)


416
#define IMPLEMENT_SET_FUNCTION(TYPE, PRINTF_SPEC, ...) \
417
        int elpa_index_set_##TYPE##_value(elpa_index_t index, char *name, TYPE value) { \
418
419
420
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
421
422
423
424
425
426
                int n = find_##TYPE##_entry(name); \
                if (n < 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                }; \
                if (TYPE##_entries[n].valid != NULL) { \
                        if(!TYPE##_entries[n].valid(index, n, value)) { \
427
                                return ELPA_ERROR_ENTRY_INVALID_VALUE; \
428
429
430
                        }; \
                } \
                if (TYPE##_entries[n].base.once & index->TYPE##_options.is_set[n]) { \
431
432
                        return ELPA_ERROR_ENTRY_ALREADY_SET; \
                } \
433
                if (TYPE##_entries[n].base.readonly) { \
434
                        return ELPA_ERROR_ENTRY_READONLY; \
435
436
437
438
439
440
441
                } \
                index->TYPE##_options.values[n] = value; \
                index->TYPE##_options.is_set[n] = 1; \
                return ELPA_OK; \
        }
FOR_ALL_TYPES(IMPLEMENT_SET_FUNCTION)

Pavel Kus's avatar
Pavel Kus committed
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
#define IMPLEMENT_SET_FROM_LOAD_FUNCTION(TYPE, PRINTF_SPEC, ...) \
        int elpa_index_set_from_load_##TYPE##_value(elpa_index_t index, char *name, TYPE value, int explicit) { \
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
                int n = find_##TYPE##_entry(name); \
                if (n < 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                }; \
                index->TYPE##_options.values[n] = value; \
                if(explicit) \
                        index->TYPE##_options.is_set[n] = 1; \
                return ELPA_OK; \
        }
FOR_ALL_TYPES(IMPLEMENT_SET_FROM_LOAD_FUNCTION)

458
459
460

#define IMPLEMENT_IS_SET_FUNCTION(TYPE, ...) \
        int elpa_index_##TYPE##_value_is_set(elpa_index_t index, char *name) { \
461
462
463
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        if (index->TYPE##_options.is_set[n]) { \
                                return 1; \
                        } else { \
                                return 0; \
                        } \
                } else { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_IS_SET_FUNCTION)


int elpa_index_value_is_set(elpa_index_t index, char *name) {
        int res = ELPA_ERROR;

#define RET_IF_SET(TYPE, ...) \
        res = elpa_index_##TYPE##_value_is_set(index, name); \
        if (res >= 0) { \
                return res; \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
485
        }
486
487
488

        FOR_ALL_TYPES(RET_IF_SET)

489
490
491
492
        fprintf(stderr, "ELPA Error: Could not find entry '%s'\n", name);
        return res;
}

493
494
495
496
497
int elpa_index_int_is_valid(elpa_index_t index, char *name, int new_value) {
        int n = find_int_entry(name); \
        if (n >= 0) { \
                if (int_entries[n].valid == NULL) {
                        return ELPA_OK;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
498
                } else {
499
                        return int_entries[n].valid(index, n, new_value) ? ELPA_OK : ELPA_ERROR;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
500
501
                }
        }
502
        return ELPA_ERROR_ENTRY_NOT_FOUND;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
503
504
}

505
int elpa_int_value_to_string(char *name, int value, const char **string) {
506
507
508
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
509
        }
510
        if (int_entries[n].to_string == NULL) {
511
                return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
512
513
514
        }
        *string = int_entries[n].to_string(value);
        return ELPA_OK;
515
516
}

517
518

int elpa_int_value_to_strlen(char *name, int value) {
519
        const char *string = NULL;
520
        elpa_int_value_to_string(name, value, &string);
521
        if (string == NULL) {
522
523
524
                return 0;
        } else {
                return strlen(string);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
525
526
        }
}
527

528
529
530
531
532
533

int elpa_index_int_value_to_strlen(elpa_index_t index, char *name) {
        int n = find_int_entry(name);
        if (n < 0) {
                return 0;
        }
534
        return elpa_int_value_to_strlen(name, index->int_options.values[n]);
535
536
537
538
}


int elpa_int_string_to_value(char *name, char *string, int *value) {
539
540
541
542
543
544
545
546
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }

        if (int_entries[n].to_string == NULL) {
                int val, ret;
                ret = sscanf(string, "%d", &val);
547
                if (ret == 1) {
548
                        *value = val;
549
550
                        return ELPA_OK;
                } else {
551
                        return ELPA_ERROR_ENTRY_INVALID_VALUE;
552
553
554
                }
        }

555
556
        for (int i = 0; i < int_entries[n].cardinality(NULL); i++) {
                int candidate = int_entries[n].enumerate(NULL, i);
557
558
559
                if (strcmp(string, int_entries[n].to_string(candidate)) == 0) {
                        *value = candidate;
                        return ELPA_OK;
560
                }
561
        }
562
        return ELPA_ERROR_ENTRY_INVALID_VALUE;
563
564
}

Wenzhe Yu's avatar
Wenzhe Yu committed
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
int elpa_float_string_to_value(char *name, char *string, float *value) {
        float val;
        int ret = sscanf(string, "%lf", &val);
        if (ret == 1) {
                *value = val;
                return ELPA_OK;
        } else {
                /* \todo: remove */
                fprintf(stderr, "ELPA: DEBUG: Could not parse float value '%s' for option '%s'\n", string, name);
                return ELPA_ERROR_ENTRY_INVALID_VALUE;
        }
}

int elpa_float_value_to_string(char *name, float value, const char **string) {
        return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
}

582
int elpa_double_string_to_value(char *name, char *string, double *value) {
583
584
        double val;
        int ret = sscanf(string, "%lf", &val);
585
        if (ret == 1) {
586
587
                *value = val;
                return ELPA_OK;
588
        } else {
589
590
                /* \todo: remove */
                fprintf(stderr, "ELPA: DEBUG: Could not parse double value '%s' for option '%s'\n", string, name);
591
                return ELPA_ERROR_ENTRY_INVALID_VALUE;
592
593
594
        }
}

595
int elpa_double_value_to_string(char *name, double value, const char **string) {
596
        return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
597
}
598

599
int elpa_option_cardinality(char *name) {
600
601
602
603
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].cardinality) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }
604
        return int_entries[n].cardinality(NULL);
605
}
606

607
int elpa_option_enumerate(char *name, int i) {
608
609
610
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].enumerate) {
                return 0;
611
        }
612
        return int_entries[n].enumerate(NULL, i);
613
614
}

615

616
/* Helper functions for simple int entries */
617
static int cardinality_bool(elpa_index_t index) {
618
619
        return 2;
}
620

621
622
static int valid_bool(elpa_index_t index, int n, int new_value) {
        return (0 <= new_value) && (new_value < 2);
623
624
}

625
static int enumerate_identity(elpa_index_t index, int i) {
626
627
628
        return i;
}

629
630
631
632
633
634
635
636
637
638
/* Helper functions for specific options */

#define NAME_CASE(name, value, ...) \
        case value: \
                return #name;

#define VALID_CASE(name, value) \
        case value: \
                return 1;

639
#define VALID_CASE_3(name, value, available, other_checks) \
640
        case value: \
641
                return available && (other_checks(value));
642

643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
static const char* elpa_matrix_layout_name(int layout) {
	switch(layout) {
		ELPA_FOR_ALL_MATRIX_LAYOUTS(NAME_CASE)
		default:
			return "(Invalid matrix layout)";
	}
}

static int number_of_matrix_layouts(elpa_index_t index) {
        return ELPA_NUMBER_OF_MATRIX_LAYOUTS;
}

static int matrix_layout_enumerate(elpa_index_t index, int i) {
#define OPTION_RANK(name, value, ...) \
        +(value >= sizeof(array_of_size_value)/sizeof(int) ? 0 : 1)

#define EMPTY()
#define DEFER1(m) m EMPTY()
#define EVAL(...) __VA_ARGS__

#define ENUMERATE_CASE(name, value, ...) \
        { const int array_of_size_value[value]; \
        case 0 DEFER1(INNER_ITERATOR)()(OPTION_RANK): \
                return value; }

        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_MATRIX_LAYOUTS
                EVAL(ELPA_FOR_ALL_MATRIX_LAYOUTS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}

static int matrix_layout_is_valid(elpa_index_t index, int n, int new_value) {
        switch(new_value) {
                ELPA_FOR_ALL_MATRIX_LAYOUTS(VALID_CASE)
                default:
                        return 0;
        }
}

685
686
687
688
689
static const char* elpa_solver_name(int solver) {
        switch(solver) {
                ELPA_FOR_ALL_SOLVERS(NAME_CASE)
                default:
                        return "(Invalid solver)";
690
691
692
        }
}

693
static int number_of_solvers(elpa_index_t index) {
694
        return ELPA_NUMBER_OF_SOLVERS;
695
696
}

697
static int solver_enumerate(elpa_index_t index, int i) {
698
#define OPTION_RANK(name, value, ...) \
699
        +(value >= sizeof(array_of_size_value)/sizeof(int) ? 0 : 1)
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716

#define EMPTY()
#define DEFER1(m) m EMPTY()
#define EVAL(...) __VA_ARGS__

#define ENUMERATE_CASE(name, value, ...) \
        { const int array_of_size_value[value]; \
        case 0 DEFER1(INNER_ITERATOR)()(OPTION_RANK): \
                return value; }

        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_SOLVERS
                EVAL(ELPA_FOR_ALL_SOLVERS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
717
718
719
}


Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
720
static int solver_is_valid(elpa_index_t index, int n, int new_value) {
721
722
723
724
725
        switch(new_value) {
                ELPA_FOR_ALL_SOLVERS(VALID_CASE)
                default:
                        return 0;
        }
726
727
}

728
static int number_of_real_kernels(elpa_index_t index) {
729
730
        return ELPA_2STAGE_NUMBER_OF_REAL_KERNELS;
}
731

732
static int real_kernel_enumerate(elpa_index_t index,int i) {
733
734
735
736
737
738
739
740
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_REAL_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_REAL_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}
741

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
742
static const char *real_kernel_name(int kernel) {
743
744
745
746
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(NAME_CASE)
                default:
                        return "(Invalid real kernel)";
747
        }
748
}
749

750
#define REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
Andreas Marek's avatar
Andreas Marek committed
751
        kernel_number == ELPA_2STAGE_REAL_GPU ? gpu_is_active : 1
752

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
753
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value) {
754
755
756
757
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_REAL_DEFAULT;
        }
758
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
759
        switch(new_value) {
760
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(VALID_CASE_3, REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
761
762
                default:
                        return 0;
763
        }
764
}
765

766
static int number_of_complex_kernels(elpa_index_t index) {
767
768
        return ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS;
}
769

770

771
static int complex_kernel_enumerate(elpa_index_t index,int i) {
772
773
774
775
776
777
778
779
780
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
781
static const char *complex_kernel_name(int kernel) {
782
783
784
785
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(NAME_CASE)
                default:
                        return "(Invalid complex kernel)";
786
        }
787
}
788

789
#define COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
Andreas Marek's avatar
Andreas Marek committed
790
        kernel_number == ELPA_2STAGE_COMPLEX_GPU ? gpu_is_active : 1
791

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
792
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value) {
793
794
795
796
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_COMPLEX_DEFAULT;
        }
797
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
798
        switch(new_value) {
799
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(VALID_CASE_3, COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
800
801
802
803
                default:
                        return 0;
        }
}
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
804

805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
static const char* elpa_autotune_level_name(int level) {
        switch(level) {
                ELPA_FOR_ALL_AUTOTUNE_LEVELS(NAME_CASE)
                default:
                        return "(Invalid autotune level)";
        }
}

static const char* elpa_autotune_domain_name(int domain) {
        switch(domain) {
                ELPA_FOR_ALL_AUTOTUNE_DOMAINS(NAME_CASE)
                default:
                        return "(Invalid autotune domain)";
        }
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
821
822
823
824
static int na_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

825
826
827
828
static int nev_is_valid(elpa_index_t index, int n, int new_value) {
        if (!elpa_index_int_value_is_set(index, "na")) {
                return 0;
        }
829
        return 0 <= new_value && new_value <= elpa_index_get_int_value(index, "na", NULL);
830
831
832
833
834
835
}

static int is_positive(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
836
837
838
839
840
841
842
843
844
static int bw_is_valid(elpa_index_t index, int n, int new_value) {
        int na;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }

        na = elpa_index_get_int_value(index, "na", NULL);
        return (0 <= new_value) && (new_value < na);
}
845

846
847
848
849
static int output_build_config_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value == 0 || new_value == 1;
}

850
851
852
853
static int gpu_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value == 0 || new_value == 1;
}

854
855
856
857
static int skewsymmetric_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value == 0 || new_value == 1;
}

858
static int band_to_full_cardinality(elpa_index_t index) {
859
	return 10;
860
}
861
static int band_to_full_enumerate(elpa_index_t index, int i) {
862
	return i+1;
863
864
}

865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
static int internal_nblk_is_valid(elpa_index_t index, int n, int new_value) {
        return (0 <= new_value);
}
static int internal_nblk_cardinality(elpa_index_t index) {
	return 9;
}

static int internal_nblk_enumerate(elpa_index_t index, int i) {
	switch(i) {
	  case 0:
	    return 2;
	  case 1:
	    return 4;
	  case 2:
	    return 8;
	  case 3:
	    return 16;
	  case 4:
	    return 32;
	  case 5:
	    return 64;
	  case 6:
	    return 128;
	  case 7:
	    return 256;
	  case 8:
	    return 1024;
	}
}

Pavel Kus's avatar
Pavel Kus committed
895
// TODO shouldnt it be only for ELPA2??
896
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value) {
897
898
	int max_block=10;
        return (1 <= new_value) && (new_value <= max_block);
899
900
}

901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
static int stripewidth_real_cardinality(elpa_index_t index) {
	return 17;
}

static int stripewidth_complex_cardinality(elpa_index_t index) {
	return 17;
}

static int stripewidth_real_enumerate(elpa_index_t index, int i) {
	switch(i) {
	  case 0:
	    return 32;
	  case 1:
	    return 36;
	  case 2:
	    return 40;
	  case 3:
	    return 44;
	  case 4:
	    return 48;
	  case 5:
	    return 52;
	  case 6:
	    return 56;
	  case 7:
	    return 60;
	  case 8:
	    return 64;
	  case 9:
	    return 68;
	  case 10:
	    return 72;
	  case 11:
	    return 76;
	  case 12:
	    return 80;
	  case 13:
	    return 84;
	  case 14:
	    return 88;
	  case 15:
	    return 92;
	  case 16:
	    return 96;
	}
}

static int stripewidth_complex_enumerate(elpa_index_t index, int i) {
	switch(i) {
	  case 0:
	    return 48;
	  case 1:
	    return 56;
	  case 2:
	    return 64;
	  case 3:
	    return 72;
	  case 4:
	    return 80;
	  case 5:
	    return 88;
	  case 6:
	    return 96;
	  case 7:
	    return 104;
	  case 8:
	    return 112;
	  case 9:
	    return 120;
	  case 10:
	    return 128;
	  case 11:
	    return 136;
	  case 12:
	    return 144;
	  case 13:
	    return 152;
	  case 14:
	    return 160;
	  case 15:
	    return 168;
	  case 16:
	    return 176;
	}
}

static int stripewidth_real_is_valid(elpa_index_t index, int n, int new_value) {
	return (32 <= new_value) && (new_value <= 96);
}

static int stripewidth_complex_is_valid(elpa_index_t index, int n, int new_value) {
	return (48 <= new_value) && (new_value <= 176);
}

Pavel Kus's avatar
Pavel Kus committed
995
static int omp_threads_cardinality(elpa_index_t index) {
Andreas Marek's avatar
Andreas Marek committed
996
	int max_threads;
997
#ifdef WITH_OPENMP_TRADITIONAL
Andreas Marek's avatar
Andreas Marek committed
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
	if (set_max_threads_glob == 0) {
		max_threads_glob = omp_get_max_threads();
		set_max_threads_glob = 1;
	}
#else
	max_threads_glob = 1;
	set_max_threads_glob = 1;
#endif
	max_threads = max_threads_glob;
	return max_threads;
}

Pavel Kus's avatar
Pavel Kus committed
1010
static int omp_threads_enumerate(elpa_index_t index, int i) {
Andreas Marek's avatar
Andreas Marek committed
1011
1012
1013
1014
1015
        return i + 1;
}

static int omp_threads_is_valid(elpa_index_t index, int n, int new_value) {
        int max_threads;
1016
#ifdef WITH_OPENMP_TRADITIONAL
1017
1018
1019
1020
	if (set_max_threads_glob == 0) {
		max_threads_glob = omp_get_max_threads();
		set_max_threads_glob = 1;
	}
Andreas Marek's avatar
Andreas Marek committed
1021
#else
1022
1023
	max_threads_glob = 1;
	set_max_threads_glob = 1;
Andreas Marek's avatar
Andreas Marek committed
1024
#endif
1025
	max_threads = max_threads_glob;
Andreas Marek's avatar
Andreas Marek committed
1026
        return (1 <= new_value) && (new_value <= max_threads);
Andreas Marek's avatar
Andreas Marek committed
1027
1028
}

1029

1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
static int valid_with_gpu(elpa_index_t index, int n, int new_value) {
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
        if (gpu_is_active == 1) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

static int valid_with_gpu_elpa1(elpa_index_t index, int n, int new_value) {
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
        if ((solver == ELPA_SOLVER_1STAGE) && (gpu_is_active == 1)) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

static int valid_with_gpu_elpa2(elpa_index_t index, int n, int new_value) {
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
        if ((solver == ELPA_SOLVER_2STAGE) && (gpu_is_active == 1)) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
static int max_stored_rows_cardinality(elpa_index_t index) {
	return 8;
}

static int max_stored_rows_enumerate(elpa_index_t index, int i) {
	switch(i) {
	  case 0:
	    return 15;
	  case 1:
	    return 31;
	  case 2:
	    return 47;
	  case 3:
	    return 63;
	  case 4:
	    return 79;
	  case 5:
	    return 95;
	  case 6:
	    return 111;
	  case 7:
	    return 127;
	}
}

static int max_stored_rows_is_valid(elpa_index_t index, int n, int new_value) {
1088
1089
1090
1091
1092
1093
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_2STAGE) {
                return new_value == 15;
        } else {
                return (15 <= new_value) && (new_value <= 127);
        }
1094
1095
1096
}


1097
1098
1099
// TODO: this shoudl definitely be improved (too many options to test in autotuning)
static const int TILE_SIZE_STEP = 128;

1100
static int min_tile_size_cardinality(elpa_index_t index) {
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
        int na;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);
        return na/TILE_SIZE_STEP;
}

static int min_tile_size_enumerate(elpa_index_t index, int i) {
        return (i+1) * TILE_SIZE_STEP;
}

static int min_tile_size_is_valid(elpa_index_t index, int n, int new_value) {
       return new_value % TILE_SIZE_STEP == 0;
1117
}
1118

1119
static int intermediate_bandwidth_cardinality(elpa_index_t index) {
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
        int na, nblk;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);

        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

        return na/nblk;
}

static int intermediate_bandwidth_enumerate(elpa_index_t index, int i) {
        int nblk;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

        return (i+1) * nblk;
}

static int intermediate_bandwidth_is_valid(elpa_index_t index, int n, int new_value) {
        int na, nblk;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);

        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == nblk;
        } else {
                if((new_value <= 1 ) || (new_value > na ))
                  return 0;
                if(new_value % nblk != 0) {
                  fprintf(stderr, "intermediate bandwidth has to be multiple of nblk\n");
                  return 0;
                }
1170
        }
1171
1172
}

1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
static int cannon_buffer_size_cardinality(elpa_index_t index) {
        return 2;
}

static int cannon_buffer_size_enumerate(elpa_index_t index, int i) {
        int np_rows;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "num_process_rows") != 1) {
                return 0;
        }
        np_rows = elpa_index_get_int_value(index, "num_process_rows", NULL);

        // TODO: 0 is both error code and legal value?
        if(i == 0)
          return 0;
        else
          return np_rows - 1;
}

static int cannon_buffer_size_is_valid(elpa_index_t index, int n, int new_value) {
        int np_rows;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "num_process_rows") != 1) {
                return 0;
        }
        np_rows = elpa_index_get_int_value(index, "num_process_rows", NULL);

        return ((new_value >= 0) && (new_value < np_rows));
}

1205
elpa_index_t elpa_index_instance() {
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
        elpa_index_t index = (elpa_index_t) calloc(1, sizeof(struct elpa_index_struct));

#define ALLOCATE(TYPE, PRINTF_SPEC, ...) \
        index->TYPE##_options.values = (TYPE*) calloc(nelements(TYPE##_entries), sizeof(TYPE)); \
        index->TYPE##_options.is_set = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        index->TYPE##_options.notified = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        for (int n = 0; n < nelements(TYPE##_entries); n++) { \
                TYPE default_value = TYPE##_entries[n].default_value; \
                if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                        getenv_##TYPE(index, TYPE##_entries[n].base.env_default, NOTIFY_ENV_DEFAULT, n, &default_value, "Default for option"); \
                } \
                index->TYPE##_options.values[n] = default_value; \
        }

        FOR_ALL_TYPES(ALLOCATE)

        return index;
1223
}
1224

Pavel Kus's avatar
Pavel Kus committed
1225
1226
1227
1228
1229
1230
1231
static int is_tunable_but_overriden(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
        return (int_entries[i].autotune_level != 0) &&
               (int_entries[i].autotune_level <= autotune_level) &&
               (int_entries[i].autotune_domain & autotune_domain) &&
               (index->int_options.is_set[i]);
}

1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
static int is_tunable(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
        return (int_entries[i].autotune_level != 0) &&
               (int_entries[i].autotune_level <= autotune_level) &&
               (int_entries[i].autotune_domain & autotune_domain) &&
               (!index->int_options.is_set[i]);
}

int elpa_index_autotune_cardinality(elpa_index_t index, int autotune_level, int autotune_domain) {
        int N = 1;

        for (int i = 0; i < nelements(int_entries); i++) { \
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
1244
                        N *= int_entries[i].cardinality(index);
1245
1246
1247
1248
1249
                }
        }
        return N;
}

1250
1251
void elpa_index_print_int_parameter(elpa_index_t index, char* buff, int i)
{
Pavel Kus's avatar
Pavel Kus committed
1252
        int value = index->int_options.values[i];