elpa_index.c 55.5 KB
Newer Older
Andreas Marek's avatar
Andreas Marek committed
1
//    This file is part of ELPA.
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
//
//    The ELPA library was originally created by the ELPA consortium,
//    consisting of the following organizations:
//
//    - Max Planck Computing and Data Facility (MPCDF), formerly known as
//      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
//    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
//      Informatik,
//    - Technische Universität München, Lehrstuhl für Informatik mit
//      Schwerpunkt Wissenschaftliches Rechnen ,
//    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
//    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
//      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
//      and
//    - IBM Deutschland GmbH
//
//    This particular source code file contains additions, changes and
//    enhancements authored by Intel Corporation which is not part of
//    the ELPA consortium.
//
//    More information can be found here:
//    http://elpa.mpcdf.mpg.de/
//
//    ELPA is free software: you can redistribute it and/or modify
//    it under the terms of the version 3 of the license of the
//    GNU Lesser General Public License as published by the Free
//    Software Foundation.
//
//    ELPA is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU Lesser General Public License for more details.
//
//    You should have received a copy of the GNU Lesser General Public License
//    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
//
//    ELPA reflects a substantial effort on the part of the original
//    ELPA consortium, and we ask you to respect the spirit of the
//    license that we chose: i.e., please contribute any changes you
//    may have back to the original ELPA library distribution, and keep
//    any derivatives of ELPA under the same license that we chose for
//    the original distribution, the GNU Lesser General Public License.
//
//    Authors: L. Huedepohl and A. Marek, MPCDF
Pavel Kus's avatar
Pavel Kus committed
46
#include <assert.h>
Pavel Kus's avatar
Pavel Kus committed
47
48
#include <stdio.h>
#include <stdlib.h>
49
#include <elpa/elpa.h>
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
50
#include "elpa_index.h"
51

Andreas Marek's avatar
Andreas Marek committed
52
53
54
55
56
57
58
59
60
#include "config.h"

#ifdef WITH_OPENMP
#include <omp.h>
#endif

int max_threads_glob;
int set_max_threads_glob=0;

61
62
static int enumerate_identity(elpa_index_t index, int i);
static int cardinality_bool(elpa_index_t index);
63
64
static int valid_bool(elpa_index_t index, int n, int new_value);

65
66
static int number_of_solvers(elpa_index_t index);
static int solver_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
67
static int solver_is_valid(elpa_index_t index, int n, int new_value);
68
69
static const char* elpa_solver_name(int solver);

70
71
static int number_of_real_kernels(elpa_index_t index);
static int real_kernel_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
72
73
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *real_kernel_name(int kernel);
74

75
76
static int number_of_complex_kernels(elpa_index_t index);
static int complex_kernel_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
77
78
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *complex_kernel_name(int kernel);
79

80
81
static int band_to_full_cardinality(elpa_index_t index);
static int band_to_full_enumerate(elpa_index_t index, int i);
82
83
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value);

84
85
86
87
88
89
90
91
static int stripewidth_real_cardinality(elpa_index_t index);
static int stripewidth_real_enumerate(elpa_index_t index, int i);
static int stripewidth_real_is_valid(elpa_index_t index, int n, int new_value);

static int stripewidth_complex_cardinality(elpa_index_t index);
static int stripewidth_complex_enumerate(elpa_index_t index, int i);
static int stripewidth_complex_is_valid(elpa_index_t index, int n, int new_value);

Pavel Kus's avatar
Pavel Kus committed
92
93
94
static int omp_threads_cardinality(elpa_index_t index);
static int omp_threads_enumerate(elpa_index_t index, int i);
static int omp_threads_is_valid(elpa_index_t index, int n, int new_value);
Andreas Marek's avatar
Andreas Marek committed
95

96
97
98
99
static int max_stored_rows_cardinality(elpa_index_t index);
static int max_stored_rows_enumerate(elpa_index_t index, int i);
static int max_stored_rows_is_valid(elpa_index_t index, int n, int new_value);

100
static int min_tile_size_cardinality(elpa_index_t index);
101
102
103
104
105
106
static int min_tile_size_enumerate(elpa_index_t index, int i);
static int min_tile_size_is_valid(elpa_index_t index, int n, int new_value);

static int valid_with_gpu(elpa_index_t index, int n, int new_value);
static int valid_with_gpu_elpa1(elpa_index_t index, int n, int new_value);
static int valid_with_gpu_elpa2(elpa_index_t index, int n, int new_value);
107

108
static int intermediate_bandwidth_cardinality(elpa_index_t index);
109
110
static int intermediate_bandwidth_enumerate(elpa_index_t index, int i);
static int intermediate_bandwidth_is_valid(elpa_index_t index, int n, int new_value);
111

112
113
114
115
static int cannon_buffer_size_cardinality(elpa_index_t index);
static int cannon_buffer_size_enumerate(elpa_index_t index, int i);
static int cannon_buffer_size_is_valid(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
116
static int na_is_valid(elpa_index_t index, int n, int new_value);
117
static int nev_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
118
static int bw_is_valid(elpa_index_t index, int n, int new_value);
119
static int gpu_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
120

121
122
static int is_positive(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
123
124
static int elpa_double_string_to_value(char *name, char *string, double *value);
static int elpa_double_value_to_string(char *name, double value, const char **string);
125

Pavel Kus's avatar
Pavel Kus committed
126
#define BASE_ENTRY(option_name, option_description, once_value, readonly_value, print_flag_value) \
127
128
129
130
131
132
133
                .base = { \
                        .name = option_name, \
                        .description = option_description, \
                        .once = once_value, \
                        .readonly = readonly_value, \
                        .env_default = "ELPA_DEFAULT_" option_name, \
                        .env_force = "ELPA_FORCE_" option_name, \
Pavel Kus's avatar
Pavel Kus committed
134
                        .print_flag = print_flag_value, \
135
                }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
136

Pavel Kus's avatar
Pavel Kus committed
137
#define INT_PARAMETER_ENTRY(option_name, option_description, valid_func, print_flag) \
138
        { \
Pavel Kus's avatar
Pavel Kus committed
139
                BASE_ENTRY(option_name, option_description, 1, 0, print_flag), \
140
                .valid = valid_func, \
141
        }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
142

Pavel Kus's avatar
Pavel Kus committed
143
#define BOOL_ENTRY(option_name, option_description, default, tune_level, tune_domain, print_flag) \
144
        { \
Pavel Kus's avatar
Pavel Kus committed
145
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
146
                .default_value = default, \
147
148
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
149
150
151
                .cardinality = cardinality_bool, \
                .enumerate = enumerate_identity, \
                .valid = valid_bool, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
152
153
        }

Pavel Kus's avatar
Pavel Kus committed
154
#define INT_ENTRY(option_name, option_description, default, tune_level, tune_domain, card_func, enumerate_func, valid_func, to_string_func, print_flag) \
155
        { \
Pavel Kus's avatar
Pavel Kus committed
156
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
157
                .default_value = default, \
158
159
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
160
161
162
163
                .cardinality = card_func, \
                .enumerate = enumerate_func, \
                .valid = valid_func, \
                .to_string = to_string_func, \
164
165
        }

Pavel Kus's avatar
Pavel Kus committed
166
#define INT_ANY_ENTRY(option_name, option_description, print_flag) \
167
        { \
Pavel Kus's avatar
Pavel Kus committed
168
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
169
170
        }

171
172
/* The order here is important! Tunable options that are dependent on other
 * tunable options must appear later in the list than their prerequisites */
173
static const elpa_index_int_entry_t int_entries[] = {
Pavel Kus's avatar
Pavel Kus committed
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
        INT_PARAMETER_ENTRY("na", "Global matrix has size (na * na)", na_is_valid, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("nev", "Number of eigenvectors to be computed, 0 <= nev <= na", nev_is_valid, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("nblk", "Block size of scalapack block-cyclic distribution", is_positive, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("local_nrows", "Number of matrix rows stored on this process", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("local_ncols", "Number of matrix columns stored on this process", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("process_row", "Process row number in the 2D domain decomposition", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("process_col", "Process column number in the 2D domain decomposition", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("process_id", "Process rank", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("num_process_rows", "Number of process row number in the 2D domain decomposition", NULL, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("num_process_cols", "Number of process column number in the 2D domain decomposition", NULL, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("num_processes", "Total number of processes", NULL, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("bandwidth", "If specified, a band matrix with this bandwidth is expected as input; bandwidth must be multiply of nblk", bw_is_valid, PRINT_YES),
        INT_ANY_ENTRY("mpi_comm_rows", "Communicator for inter-row communication", PRINT_NO),
        INT_ANY_ENTRY("mpi_comm_cols", "Communicator for inter-column communication", PRINT_NO),
        INT_ANY_ENTRY("mpi_comm_parent", "Parent communicator", PRINT_NO),
        INT_ANY_ENTRY("blacs_context", "BLACS context", PRINT_NO),
190
        INT_ANY_ENTRY("legacy_api", "This object has been created through the legacy api. Parameter for internal use only", PRINT_NO),
191
        INT_ENTRY("solver", "Solver to use", ELPA_SOLVER_1STAGE, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
192
                        number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name, PRINT_YES),
193
        INT_ENTRY("gpu", "Use GPU acceleration", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
194
                        cardinality_bool, enumerate_identity, gpu_is_valid, NULL, PRINT_YES),
195
196
        //default of gpu ussage for individual phases is 1. However, it is only evaluated, if GPU is used at all, which first has to be determined
        //by the parameter gpu and presence of the device
197
        INT_ENTRY("gpu_tridiag", "Use GPU acceleration for ELPA1 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
198
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa1, NULL, PRINT_YES),
199
        INT_ENTRY("gpu_solve_tridi", "Use GPU acceleration for ELPA solve tridi", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
200
                        cardinality_bool, enumerate_identity, valid_with_gpu, NULL, PRINT_YES),
201
        INT_ENTRY("gpu_trans_ev", "Use GPU acceleration for ELPA1 trans ev", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
202
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa1, NULL, PRINT_YES),
203
        INT_ENTRY("gpu_bandred", "Use GPU acceleration for ELPA2 band reduction", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
204
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
205
        INT_ENTRY("gpu_tridiag_band", "Use GPU acceleration for ELPA2 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
206
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
207
        INT_ENTRY("gpu_trans_ev_tridi_to_band", "Use GPU acceleration for ELPA2 trans_ev_tridi_to_band", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
208
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
209
        INT_ENTRY("gpu_trans_ev_band_to_full", "Use GPU acceleration for ELPA2 trans_ev_band_to_full", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
210
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
211
        INT_ENTRY("real_kernel", "Real kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_REAL_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, \
Pavel Kus's avatar
Pavel Kus committed
212
                        number_of_real_kernels, real_kernel_enumerate, real_kernel_is_valid, real_kernel_name, PRINT_YES),
213
        INT_ENTRY("complex_kernel", "Complex kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_COMPLEX_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_COMPLEX, \
Pavel Kus's avatar
Pavel Kus committed
214
                        number_of_complex_kernels, complex_kernel_enumerate, complex_kernel_is_valid, complex_kernel_name, PRINT_YES),
215

216
        INT_ENTRY("min_tile_size", "Minimal tile size used internally in elpa1_tridiag and elpa2_bandred", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
217
                        min_tile_size_cardinality, min_tile_size_enumerate, min_tile_size_is_valid, NULL, PRINT_YES),
218
        INT_ENTRY("intermediate_bandwidth", "Specifies the intermediate bandwidth in ELPA2 full->banded step. Must be a multiple of nblk", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
219
                        intermediate_bandwidth_cardinality, intermediate_bandwidth_enumerate, intermediate_bandwidth_is_valid, NULL, PRINT_YES),
220

221
        INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
222
                        band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL, PRINT_YES),
223
        INT_ENTRY("stripewidth_real", "Stripewidth_real, default 48. Must be a multiple of 4", 48, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_REAL,
224
                        stripewidth_real_cardinality, stripewidth_real_enumerate, stripewidth_real_is_valid, NULL, PRINT_YES),
225
        INT_ENTRY("stripewidth_complex", "Stripewidth_complex, default 96. Must be a multiple of 8", 96, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_COMPLEX,
226
227
                        stripewidth_complex_cardinality, stripewidth_complex_enumerate, stripewidth_complex_is_valid, NULL, PRINT_YES),

228
        INT_ENTRY("max_stored_rows", "Maximum number of stored rows used in ELPA 1 backtransformation, default 63", 63, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
229
                        max_stored_rows_cardinality, max_stored_rows_enumerate, max_stored_rows_is_valid, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
230
#ifdef WITH_OPENMP
231
        INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
232
                        omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
233
#else
234
        INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
235
                        omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
236
#endif
237
        INT_ENTRY("cannon_buffer_size", "Increasing the buffer size might make it faster, but costs memory", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
238
                        cannon_buffer_size_cardinality, cannon_buffer_size_enumerate, cannon_buffer_size_is_valid, NULL, PRINT_YES),
239
        //BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_REAL),
Pavel Kus's avatar
Pavel Kus committed
240
241
242
243
        BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_REAL, PRINT_YES),
        BOOL_ENTRY("timings", "Enable time measurement", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("debug", "Emit verbose debugging messages", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("print_flops", "Print FLOP rates on task 0", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
244
        BOOL_ENTRY("measure_performance", "Also measure with flops (via papi) with the timings", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
Pavel Kus's avatar
Pavel Kus committed
245
246
        BOOL_ENTRY("check_pd", "Check eigenvalues to be positive", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("cannon_for_generalized", "Whether to use Cannons algorithm for the generalized EVP", 1, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
247
248
249
};

#define READONLY_DOUBLE_ENTRY(option_name, option_description) \
250
        { \
251
                BASE_ENTRY(option_name, option_description, 0, 1, 0) \
252
253
254
        }

static const elpa_index_double_entry_t double_entries[] = {
255
        /* Empty for now */
256
};
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
257

258
void elpa_index_free(elpa_index_t index) {
259
260
261
262
263
264
265
#define FREE_OPTION(TYPE, ...) \
        free(index->TYPE##_options.values); \
        free(index->TYPE##_options.is_set); \
        free(index->TYPE##_options.notified);

        FOR_ALL_TYPES(FREE_OPTION);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
266
267
268
        free(index);
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
269
270
271
static int compar(const void *a, const void *b) {
        return strcmp(((elpa_index_int_entry_t *) a)->base.name,
                      ((elpa_index_int_entry_t *) b)->base.name);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
272
273
}

274
275
276
#define IMPLEMENT_FIND_ENTRY(TYPE, ...) \
        static int find_##TYPE##_entry(char *name) { \
                elpa_index_##TYPE##_entry_t *entry; \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
277
                elpa_index_##TYPE##_entry_t key = { .base = {.name = name} } ; \
278
                size_t nmembers = nelements(TYPE##_entries); \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
279
                entry = lfind((const void*) &key, (const void *) TYPE##_entries, &nmembers, sizeof(elpa_index_##TYPE##_entry_t), compar); \
280
281
282
283
284
285
286
287
288
289
290
291
292
293
                if (entry) { \
                        return (entry - &TYPE##_entries[0]); \
                } else { \
                        return -1; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_FIND_ENTRY)


#define IMPLEMENT_GETENV(TYPE, PRINTF_SPEC, ...) \
        static int getenv_##TYPE(elpa_index_t index, const char *env_variable, enum NOTIFY_FLAGS notify_flag, int n, TYPE *value, const char *error_string) { \
                int err; \
                char *env_value = getenv(env_variable); \
                if (env_value) { \
294
                        err = elpa_##TYPE##_string_to_value(TYPE##_entries[n].base.name, env_value, value); \
295
296
297
298
299
                        if (err != ELPA_OK) { \
                                fprintf(stderr, "ELPA: Error interpreting environment variable %s with value '%s': %s\n", \
                                                TYPE##_entries[n].base.name, env_value, elpa_strerr(err)); \
                        } else {\
                                const char *value_string = NULL; \
300
                                if (elpa_##TYPE##_value_to_string(TYPE##_entries[n].base.name, *value, &value_string) == ELPA_OK) { \
301
                                        if (!(index->TYPE##_options.notified[n] & notify_flag)) { \
Pavel Kus's avatar
Pavel Kus committed
302
                                                if (elpa_index_is_printing_mpi_rank(index)) { \
303
304
305
                                                        fprintf(stderr, "ELPA: %s '%s' is set to %s due to environment variable %s\n", \
                                                                      error_string, TYPE##_entries[n].base.name, value_string, env_variable); \
                                                } \
306
307
308
                                                index->TYPE##_options.notified[n] |= notify_flag; \
                                        } \
                                } else { \
Pavel Kus's avatar
Pavel Kus committed
309
                                        if (elpa_index_is_printing_mpi_rank(index)) { \
310
                                                fprintf(stderr, "ELPA: %s '%s' is set to '" PRINTF_SPEC "' due to environment variable %s\n", \
311
                                                        error_string, TYPE##_entries[n].base.name, *value, env_variable);\
312
                                        } \
313
314
315
316
317
318
319
320
321
                                } \
                                return 1; \
                        } \
                } \
                return 0; \
        }
FOR_ALL_TYPES(IMPLEMENT_GETENV)


322
#define IMPLEMENT_GET_FUNCTION(TYPE, PRINTF_SPEC, SCANF_SPEC, ERROR_VALUE) \
323
324
        TYPE elpa_index_get_##TYPE##_value(elpa_index_t index, char *name, int *error) { \
                TYPE ret; \
325
326
327
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        int from_env = 0; \
                        if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                                from_env = getenv_##TYPE(index, TYPE##_entries[n].base.env_force, NOTIFY_ENV_FORCE, n, &ret, "Option"); \
                        } \
                        if (!from_env) { \
                                ret = index->TYPE##_options.values[n]; \
                        } \
                        if (error != NULL) { \
                                *error = ELPA_OK; \
                        } \
                        return ret; \
                } else { \
                        if (error != NULL) { \
                                *error = ELPA_ERROR_ENTRY_NOT_FOUND; \
                        } \
                        return ERROR_VALUE; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_GET_FUNCTION)


#define IMPLEMENT_LOC_FUNCTION(TYPE, ...) \
        TYPE* elpa_index_get_##TYPE##_loc(elpa_index_t index, char *name) { \
353
354
355
                if (sizeof(TYPE##_entries) == 0) { \
                        return NULL; \
                } \
356
357
358
359
360
361
362
363
364
365
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        return &index->TYPE##_options.values[n]; \
                } else { \
                        return NULL; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_LOC_FUNCTION)


366
#define IMPLEMENT_SET_FUNCTION(TYPE, PRINTF_SPEC, ...) \
367
        int elpa_index_set_##TYPE##_value(elpa_index_t index, char *name, TYPE value) { \
368
369
370
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
371
372
373
374
375
376
                int n = find_##TYPE##_entry(name); \
                if (n < 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                }; \
                if (TYPE##_entries[n].valid != NULL) { \
                        if(!TYPE##_entries[n].valid(index, n, value)) { \
377
                                return ELPA_ERROR_ENTRY_INVALID_VALUE; \
378
379
380
                        }; \
                } \
                if (TYPE##_entries[n].base.once & index->TYPE##_options.is_set[n]) { \
381
382
                        return ELPA_ERROR_ENTRY_ALREADY_SET; \
                } \
383
                if (TYPE##_entries[n].base.readonly) { \
384
                        return ELPA_ERROR_ENTRY_READONLY; \
385
386
387
388
389
390
391
                } \
                index->TYPE##_options.values[n] = value; \
                index->TYPE##_options.is_set[n] = 1; \
                return ELPA_OK; \
        }
FOR_ALL_TYPES(IMPLEMENT_SET_FUNCTION)

Pavel Kus's avatar
Pavel Kus committed
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
#define IMPLEMENT_SET_FROM_LOAD_FUNCTION(TYPE, PRINTF_SPEC, ...) \
        int elpa_index_set_from_load_##TYPE##_value(elpa_index_t index, char *name, TYPE value, int explicit) { \
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
                int n = find_##TYPE##_entry(name); \
                if (n < 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                }; \
                index->TYPE##_options.values[n] = value; \
                if(explicit) \
                        index->TYPE##_options.is_set[n] = 1; \
                return ELPA_OK; \
        }
FOR_ALL_TYPES(IMPLEMENT_SET_FROM_LOAD_FUNCTION)

408
409
410

#define IMPLEMENT_IS_SET_FUNCTION(TYPE, ...) \
        int elpa_index_##TYPE##_value_is_set(elpa_index_t index, char *name) { \
411
412
413
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        if (index->TYPE##_options.is_set[n]) { \
                                return 1; \
                        } else { \
                                return 0; \
                        } \
                } else { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_IS_SET_FUNCTION)


int elpa_index_value_is_set(elpa_index_t index, char *name) {
        int res = ELPA_ERROR;

#define RET_IF_SET(TYPE, ...) \
        res = elpa_index_##TYPE##_value_is_set(index, name); \
        if (res >= 0) { \
                return res; \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
435
        }
436
437
438

        FOR_ALL_TYPES(RET_IF_SET)

439
440
441
442
        fprintf(stderr, "ELPA Error: Could not find entry '%s'\n", name);
        return res;
}

443
444
445
446
447
int elpa_index_int_is_valid(elpa_index_t index, char *name, int new_value) {
        int n = find_int_entry(name); \
        if (n >= 0) { \
                if (int_entries[n].valid == NULL) {
                        return ELPA_OK;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
448
                } else {
449
                        return int_entries[n].valid(index, n, new_value) ? ELPA_OK : ELPA_ERROR;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
450
451
                }
        }
452
        return ELPA_ERROR_ENTRY_NOT_FOUND;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
453
454
}

455
int elpa_int_value_to_string(char *name, int value, const char **string) {
456
457
458
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
459
        }
460
        if (int_entries[n].to_string == NULL) {
461
                return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
462
463
464
        }
        *string = int_entries[n].to_string(value);
        return ELPA_OK;
465
466
}

467
468

int elpa_int_value_to_strlen(char *name, int value) {
469
        const char *string = NULL;
470
        elpa_int_value_to_string(name, value, &string);
471
        if (string == NULL) {
472
473
474
                return 0;
        } else {
                return strlen(string);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
475
476
        }
}
477

478
479
480
481
482
483

int elpa_index_int_value_to_strlen(elpa_index_t index, char *name) {
        int n = find_int_entry(name);
        if (n < 0) {
                return 0;
        }
484
        return elpa_int_value_to_strlen(name, index->int_options.values[n]);
485
486
487
488
}


int elpa_int_string_to_value(char *name, char *string, int *value) {
489
490
491
492
493
494
495
496
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }

        if (int_entries[n].to_string == NULL) {
                int val, ret;
                ret = sscanf(string, "%d", &val);
497
                if (ret == 1) {
498
                        *value = val;
499
500
                        return ELPA_OK;
                } else {
501
                        return ELPA_ERROR_ENTRY_INVALID_VALUE;
502
503
504
                }
        }

505
506
        for (int i = 0; i < int_entries[n].cardinality(NULL); i++) {
                int candidate = int_entries[n].enumerate(NULL, i);
507
508
509
                if (strcmp(string, int_entries[n].to_string(candidate)) == 0) {
                        *value = candidate;
                        return ELPA_OK;
510
                }
511
        }
512
        return ELPA_ERROR_ENTRY_INVALID_VALUE;
513
514
}

515
int elpa_double_string_to_value(char *name, char *string, double *value) {
516
517
        double val;
        int ret = sscanf(string, "%lf", &val);
518
        if (ret == 1) {
519
520
                *value = val;
                return ELPA_OK;
521
        } else {
522
523
                /* \todo: remove */
                fprintf(stderr, "ELPA: DEBUG: Could not parse double value '%s' for option '%s'\n", string, name);
524
                return ELPA_ERROR_ENTRY_INVALID_VALUE;
525
526
527
        }
}

528
int elpa_double_value_to_string(char *name, double value, const char **string) {
529
        return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
530
}
531

532
int elpa_option_cardinality(char *name) {
533
534
535
536
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].cardinality) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }
537
        return int_entries[n].cardinality(NULL);
538
}
539

540
int elpa_option_enumerate(char *name, int i) {
541
542
543
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].enumerate) {
                return 0;
544
        }
545
        return int_entries[n].enumerate(NULL, i);
546
547
}

548

549
/* Helper functions for simple int entries */
550
static int cardinality_bool(elpa_index_t index) {
551
552
        return 2;
}
553

554
555
static int valid_bool(elpa_index_t index, int n, int new_value) {
        return (0 <= new_value) && (new_value < 2);
556
557
}

558
static int enumerate_identity(elpa_index_t index, int i) {
559
560
561
        return i;
}

562
563
564
565
566
567
568
569
570
571
/* Helper functions for specific options */

#define NAME_CASE(name, value, ...) \
        case value: \
                return #name;

#define VALID_CASE(name, value) \
        case value: \
                return 1;

572
#define VALID_CASE_3(name, value, available, other_checks) \
573
        case value: \
574
                return available && (other_checks(value));
575
576
577
578
579
580

static const char* elpa_solver_name(int solver) {
        switch(solver) {
                ELPA_FOR_ALL_SOLVERS(NAME_CASE)
                default:
                        return "(Invalid solver)";
581
582
583
        }
}

584
static int number_of_solvers(elpa_index_t index) {
585
        return ELPA_NUMBER_OF_SOLVERS;
586
587
}

588
static int solver_enumerate(elpa_index_t index, int i) {
589
#define OPTION_RANK(name, value, ...) \
590
        +(value >= sizeof(array_of_size_value)/sizeof(int) ? 0 : 1)
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607

#define EMPTY()
#define DEFER1(m) m EMPTY()
#define EVAL(...) __VA_ARGS__

#define ENUMERATE_CASE(name, value, ...) \
        { const int array_of_size_value[value]; \
        case 0 DEFER1(INNER_ITERATOR)()(OPTION_RANK): \
                return value; }

        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_SOLVERS
                EVAL(ELPA_FOR_ALL_SOLVERS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
608
609
610
}


Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
611
static int solver_is_valid(elpa_index_t index, int n, int new_value) {
612
613
614
615
616
        switch(new_value) {
                ELPA_FOR_ALL_SOLVERS(VALID_CASE)
                default:
                        return 0;
        }
617
618
}

619
static int number_of_real_kernels(elpa_index_t index) {
620
621
        return ELPA_2STAGE_NUMBER_OF_REAL_KERNELS;
}
622

623
static int real_kernel_enumerate(elpa_index_t index,int i) {
624
625
626
627
628
629
630
631
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_REAL_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_REAL_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}
632

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
633
static const char *real_kernel_name(int kernel) {
634
635
636
637
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(NAME_CASE)
                default:
                        return "(Invalid real kernel)";
638
        }
639
}
640

641
642
643
#define REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_REAL_GPU ? gpu_is_active : 1

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
644
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value) {
645
646
647
648
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_REAL_DEFAULT;
        }
649
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
650
        switch(new_value) {
651
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(VALID_CASE_3, REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
652
653
                default:
                        return 0;
654
        }
655
}
656

657
static int number_of_complex_kernels(elpa_index_t index) {
658
659
        return ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS;
}
660

661

662
static int complex_kernel_enumerate(elpa_index_t index,int i) {
663
664
665
666
667
668
669
670
671
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
672
static const char *complex_kernel_name(int kernel) {
673
674
675
676
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(NAME_CASE)
                default:
                        return "(Invalid complex kernel)";
677
        }
678
}
679

680
681
682
#define COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_COMPLEX_GPU ? gpu_is_active : 1

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
683
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value) {
684
685
686
687
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_COMPLEX_DEFAULT;
        }
688
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
689
        switch(new_value) {
690
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(VALID_CASE_3, COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
691
692
693
694
                default:
                        return 0;
        }
}
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
695

696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
static const char* elpa_autotune_level_name(int level) {
        switch(level) {
                ELPA_FOR_ALL_AUTOTUNE_LEVELS(NAME_CASE)
                default:
                        return "(Invalid autotune level)";
        }
}

static const char* elpa_autotune_domain_name(int domain) {
        switch(domain) {
                ELPA_FOR_ALL_AUTOTUNE_DOMAINS(NAME_CASE)
                default:
                        return "(Invalid autotune domain)";
        }
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
712
713
714
715
static int na_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

716
717
718
719
static int nev_is_valid(elpa_index_t index, int n, int new_value) {
        if (!elpa_index_int_value_is_set(index, "na")) {
                return 0;
        }
720
        return 0 <= new_value && new_value <= elpa_index_get_int_value(index, "na", NULL);
721
722
723
724
725
726
}

static int is_positive(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
727
728
729
730
731
732
733
734
735
static int bw_is_valid(elpa_index_t index, int n, int new_value) {
        int na;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }

        na = elpa_index_get_int_value(index, "na", NULL);
        return (0 <= new_value) && (new_value < na);
}
736

737
738
739
740
static int gpu_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value == 0 || new_value == 1;
}

741
static int band_to_full_cardinality(elpa_index_t index) {
742
	return 10;
743
}
744
static int band_to_full_enumerate(elpa_index_t index, int i) {
745
	return i+1;
746
747
}

Pavel Kus's avatar
Pavel Kus committed
748
// TODO shouldnt it be only for ELPA2??
749
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value) {
750
751
	int max_block=10;
        return (1 <= new_value) && (new_value <= max_block);
752
753
}

754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
static int stripewidth_real_cardinality(elpa_index_t index) {
	return 17;
}

static int stripewidth_complex_cardinality(elpa_index_t index) {
	return 17;
}

static int stripewidth_real_enumerate(elpa_index_t index, int i) {
	switch(i) {
	  case 0:
	    return 32;
	  case 1:
	    return 36;
	  case 2:
	    return 40;
	  case 3:
	    return 44;
	  case 4:
	    return 48;
	  case 5:
	    return 52;
	  case 6:
	    return 56;
	  case 7:
	    return 60;
	  case 8:
	    return 64;
	  case 9:
	    return 68;
	  case 10:
	    return 72;
	  case 11:
	    return 76;
	  case 12:
	    return 80;
	  case 13:
	    return 84;
	  case 14:
	    return 88;
	  case 15:
	    return 92;
	  case 16:
	    return 96;
	}
}

static int stripewidth_complex_enumerate(elpa_index_t index, int i) {
	switch(i) {
	  case 0:
	    return 48;
	  case 1:
	    return 56;
	  case 2:
	    return 64;
	  case 3:
	    return 72;
	  case 4:
	    return 80;
	  case 5:
	    return 88;
	  case 6:
	    return 96;
	  case 7:
	    return 104;
	  case 8:
	    return 112;
	  case 9:
	    return 120;
	  case 10:
	    return 128;
	  case 11:
	    return 136;
	  case 12:
	    return 144;
	  case 13:
	    return 152;
	  case 14:
	    return 160;
	  case 15:
	    return 168;
	  case 16:
	    return 176;
	}
}

static int stripewidth_real_is_valid(elpa_index_t index, int n, int new_value) {
	return (32 <= new_value) && (new_value <= 96);
}

static int stripewidth_complex_is_valid(elpa_index_t index, int n, int new_value) {
	return (48 <= new_value) && (new_value <= 176);
}

Pavel Kus's avatar
Pavel Kus committed
848
static int omp_threads_cardinality(elpa_index_t index) {
Andreas Marek's avatar
Andreas Marek committed
849
850
851
852
853
854
855
856
857
858
859
860
861
862
	int max_threads;
#ifdef WITH_OPENMP
	if (set_max_threads_glob == 0) {
		max_threads_glob = omp_get_max_threads();
		set_max_threads_glob = 1;
	}
#else
	max_threads_glob = 1;
	set_max_threads_glob = 1;
#endif
	max_threads = max_threads_glob;
	return max_threads;
}

Pavel Kus's avatar
Pavel Kus committed
863
static int omp_threads_enumerate(elpa_index_t index, int i) {
Andreas Marek's avatar
Andreas Marek committed
864
865
866
867
868
869
        return i + 1;
}

static int omp_threads_is_valid(elpa_index_t index, int n, int new_value) {
        int max_threads;
#ifdef WITH_OPENMP
870
871
872
873
	if (set_max_threads_glob == 0) {
		max_threads_glob = omp_get_max_threads();
		set_max_threads_glob = 1;
	}
Andreas Marek's avatar
Andreas Marek committed
874
#else
875
876
	max_threads_glob = 1;
	set_max_threads_glob = 1;
Andreas Marek's avatar
Andreas Marek committed
877
#endif
878
	max_threads = max_threads_glob;
Andreas Marek's avatar
Andreas Marek committed
879
        return (1 <= new_value) && (new_value <= max_threads);
Andreas Marek's avatar
Andreas Marek committed
880
881
}

882

883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
static int valid_with_gpu(elpa_index_t index, int n, int new_value) {
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
        if (gpu_is_active == 1) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

static int valid_with_gpu_elpa1(elpa_index_t index, int n, int new_value) {
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
        if ((solver == ELPA_SOLVER_1STAGE) && (gpu_is_active == 1)) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

static int valid_with_gpu_elpa2(elpa_index_t index, int n, int new_value) {
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
        if ((solver == ELPA_SOLVER_2STAGE) && (gpu_is_active == 1)) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
static int max_stored_rows_cardinality(elpa_index_t index) {
	return 8;
}

static int max_stored_rows_enumerate(elpa_index_t index, int i) {
	switch(i) {
	  case 0:
	    return 15;
	  case 1:
	    return 31;
	  case 2:
	    return 47;
	  case 3:
	    return 63;
	  case 4:
	    return 79;
	  case 5:
	    return 95;
	  case 6:
	    return 111;
	  case 7:
	    return 127;
	}
}

static int max_stored_rows_is_valid(elpa_index_t index, int n, int new_value) {
941
942
943
944
945
946
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_2STAGE) {
                return new_value == 15;
        } else {
                return (15 <= new_value) && (new_value <= 127);
        }
947
948
949
}


950
951
952
// TODO: this shoudl definitely be improved (too many options to test in autotuning)
static const int TILE_SIZE_STEP = 128;

953
static int min_tile_size_cardinality(elpa_index_t index) {
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
        int na;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);
        return na/TILE_SIZE_STEP;
}

static int min_tile_size_enumerate(elpa_index_t index, int i) {
        return (i+1) * TILE_SIZE_STEP;
}

static int min_tile_size_is_valid(elpa_index_t index, int n, int new_value) {
       return new_value % TILE_SIZE_STEP == 0;
970
}
971

972
static int intermediate_bandwidth_cardinality(elpa_index_t index) {
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
        int na, nblk;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);

        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

        return na/nblk;
}

static int intermediate_bandwidth_enumerate(elpa_index_t index, int i) {
        int nblk;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

        return (i+1) * nblk;
}

static int intermediate_bandwidth_is_valid(elpa_index_t index, int n, int new_value) {
        int na, nblk;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);

        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == nblk;
        } else {
                if((new_value <= 1 ) || (new_value > na ))
                  return 0;
                if(new_value % nblk != 0) {
                  fprintf(stderr, "intermediate bandwidth has to be multiple of nblk\n");
                  return 0;
                }
1023
        }
1024
1025
}

1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
static int cannon_buffer_size_cardinality(elpa_index_t index) {
        return 2;
}

static int cannon_buffer_size_enumerate(elpa_index_t index, int i) {
        int np_rows;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "num_process_rows") != 1) {
                return 0;
        }
        np_rows = elpa_index_get_int_value(index, "num_process_rows", NULL);

        // TODO: 0 is both error code and legal value?
        if(i == 0)
          return 0;
        else
          return np_rows - 1;
}

static int cannon_buffer_size_is_valid(elpa_index_t index, int n, int new_value) {
        int np_rows;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "num_process_rows") != 1) {
                return 0;
        }
        np_rows = elpa_index_get_int_value(index, "num_process_rows", NULL);

        return ((new_value >= 0) && (new_value < np_rows));
}

1058
elpa_index_t elpa_index_instance() {
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
        elpa_index_t index = (elpa_index_t) calloc(1, sizeof(struct elpa_index_struct));

#define ALLOCATE(TYPE, PRINTF_SPEC, ...) \
        index->TYPE##_options.values = (TYPE*) calloc(nelements(TYPE##_entries), sizeof(TYPE)); \
        index->TYPE##_options.is_set = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        index->TYPE##_options.notified = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        for (int n = 0; n < nelements(TYPE##_entries); n++) { \
                TYPE default_value = TYPE##_entries[n].default_value; \
                if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                        getenv_##TYPE(index, TYPE##_entries[n].base.env_default, NOTIFY_ENV_DEFAULT, n, &default_value, "Default for option"); \
                } \
                index->TYPE##_options.values[n] = default_value; \
        }

        FOR_ALL_TYPES(ALLOCATE)

        return index;
1076
}
1077

Pavel Kus's avatar
Pavel Kus committed
1078
1079
1080
1081
1082
1083
1084
static int is_tunable_but_overriden(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
        return (int_entries[i].autotune_level != 0) &&
               (int_entries[i].autotune_level <= autotune_level) &&
               (int_entries[i].autotune_domain & autotune_domain) &&
               (index->int_options.is_set[i]);
}

1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
static int is_tunable(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
        return (int_entries[i].autotune_level != 0) &&
               (int_entries[i].autotune_level <= autotune_level) &&
               (int_entries[i].autotune_domain & autotune_domain) &&
               (!index->int_options.is_set[i]);
}

int elpa_index_autotune_cardinality(elpa_index_t index, int autotune_level, int autotune_domain) {
        int N = 1;

        for (int i = 0; i < nelements(int_entries); i++) { \
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
1097
                        N *= int_entries[i].cardinality(index);
1098
1099
1100
1101
1102
                }
        }
        return N;
}

1103
1104
void elpa_index_print_int_parameter(elpa_index_t index, char* buff, int i)
{
Pavel Kus's avatar
Pavel Kus committed
1105
        int value = index->int_options.values[i];
1106
1107
        sprintf(buff, "%s = ", int_entries[i].base.name);
        if (int_entries[i].to_string) {
Pavel Kus's avatar
Pavel Kus committed
1108
                sprintf(buff, "%s%d -> %s\n", buff, value, int_entries[i].to_string(value));
1109
        } else {
Pavel Kus's avatar
Pavel Kus committed
1110
                sprintf(buff, "%s%d\n", buff, value);
1111
1112
1113
        }
}

Pavel Kus's avatar
Pavel Kus committed
1114
1115
int elpa_index_set_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int current) {
        int current_cpy = current;
1116
        char buff[100];
1117
        int debug = elpa_index_get_int_value(index, "debug", NULL);
Pavel Kus's avatar
Pavel Kus committed
1118

Pavel Kus's avatar
Pavel Kus committed
1119
        //if(elpa_index_is_printing_mpi_rank(index)) fprintf(stderr, "***Trying a new autotuning index %d\n", current);
1120
1121
        for (int i = 0; i < nelements(int_entries); i++) {
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
Pavel Kus's avatar
Pavel Kus committed
1122
                        int value = int_entries[i].enumerate(index, current_cpy % int_entries[i].cardinality(index));
Pavel Kus's avatar
Pavel Kus committed
1123
                        //if(elpa_index_is_printing_mpi_rank(index)) fprintf(stderr, "  * val[%d] = %d -> %d\n", i, current_cpy % int_entries[i].cardinality(index), value);
1124
1125
1126
1127
                        /* Try to set option i to that value */
                        if (int_entries[i].valid(index, i, value)) {
                                index->int_options.values[i] = value;
                        } else {
Pavel Kus's avatar
Pavel Kus committed
1128
                                //if(elpa_index_is_printing_mpi_rank(index)) fprintf(stderr, "  *NOT VALID becaluse of i %d (%s) and value %d translated to %d\n", i, int_entries[i].base.name, current_cpy % int_entries[i].cardinality(index), value);
1129
1130
                                return 0;
                        }
Pavel Kus's avatar
Pavel Kus committed
1131
                        current_cpy /= int_entries[i].cardinality(index);
1132
1133
                }
        }
Pavel Kus's avatar
Pavel Kus committed
1134
        if (debug == 1 && elpa_index_is_printing_mpi_rank(index)) {
Pavel Kus's avatar
Pavel Kus committed
1135
                fprintf(stderr, "\n*** AUTOTUNING: setting a new combination of parameters, idx %d ***\n", current);
1136
                elpa_index_print_autotune_parameters(index, autotune_level, autotune_domain);
1137
                fprintf(stderr, "***\n\n");
1138
1139
1140
1141
1142
        }

        /* Could set all values */
        return 1;
}
1143

1144
int elpa_index_print_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain) {
1145
        char buff[100];
Pavel Kus's avatar
Pavel Kus committed
1146
        if (elpa_index_is_printing_mpi_rank(index)) {