elpa_index.c 55.7 KB
Newer Older
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
//    This file is part of ELPA.
//
//    The ELPA library was originally created by the ELPA consortium,
//    consisting of the following organizations:
//
//    - Max Planck Computing and Data Facility (MPCDF), formerly known as
//      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
//    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
//      Informatik,
//    - Technische Universität München, Lehrstuhl für Informatik mit
//      Schwerpunkt Wissenschaftliches Rechnen ,
//    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
//    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
//      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
//      and
//    - IBM Deutschland GmbH
//
//    This particular source code file contains additions, changes and
//    enhancements authored by Intel Corporation which is not part of
//    the ELPA consortium.
//
//    More information can be found here:
//    http://elpa.mpcdf.mpg.de/
//
//    ELPA is free software: you can redistribute it and/or modify
//    it under the terms of the version 3 of the license of the
//    GNU Lesser General Public License as published by the Free
//    Software Foundation.
//
//    ELPA is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU Lesser General Public License for more details.
//
//    You should have received a copy of the GNU Lesser General Public License
//    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
//
//    ELPA reflects a substantial effort on the part of the original
//    ELPA consortium, and we ask you to respect the spirit of the
//    license that we chose: i.e., please contribute any changes you
//    may have back to the original ELPA library distribution, and keep
//    any derivatives of ELPA under the same license that we chose for
//    the original distribution, the GNU Lesser General Public License.
//
//    Authors: L. Huedepohl and A. Marek, MPCDF
Pavel Kus's avatar
Pavel Kus committed
46
#include <assert.h>
Pavel Kus's avatar
Pavel Kus committed
47
48
#include <stdio.h>
#include <stdlib.h>
49
#include <elpa/elpa.h>
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
50
#include "elpa_index.h"
51

52
53
#include <execinfo.h>

Andreas Marek's avatar
Andreas Marek committed
54
55
56
57
58
59
60
61
62
#include "config.h"

#ifdef WITH_OPENMP
#include <omp.h>
#endif

int max_threads_glob;
int set_max_threads_glob=0;

63
64
static int enumerate_identity(elpa_index_t index, int i);
static int cardinality_bool(elpa_index_t index);
65
66
static int valid_bool(elpa_index_t index, int n, int new_value);

67
68
static int number_of_solvers(elpa_index_t index);
static int solver_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
69
static int solver_is_valid(elpa_index_t index, int n, int new_value);
70
71
static const char* elpa_solver_name(int solver);

72
73
static int number_of_real_kernels(elpa_index_t index);
static int real_kernel_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
74
75
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *real_kernel_name(int kernel);
76

77
78
static int number_of_complex_kernels(elpa_index_t index);
static int complex_kernel_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
79
80
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *complex_kernel_name(int kernel);
81

82
83
static int band_to_full_cardinality(elpa_index_t index);
static int band_to_full_enumerate(elpa_index_t index, int i);
84
85
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value);

86
87
88
89
90
91
92
93
static int stripewidth_real_cardinality(elpa_index_t index);
static int stripewidth_real_enumerate(elpa_index_t index, int i);
static int stripewidth_real_is_valid(elpa_index_t index, int n, int new_value);

static int stripewidth_complex_cardinality(elpa_index_t index);
static int stripewidth_complex_enumerate(elpa_index_t index, int i);
static int stripewidth_complex_is_valid(elpa_index_t index, int n, int new_value);

Pavel Kus's avatar
Pavel Kus committed
94
95
96
static int omp_threads_cardinality(elpa_index_t index);
static int omp_threads_enumerate(elpa_index_t index, int i);
static int omp_threads_is_valid(elpa_index_t index, int n, int new_value);
Andreas Marek's avatar
Andreas Marek committed
97

98
99
100
101
static int max_stored_rows_cardinality(elpa_index_t index);
static int max_stored_rows_enumerate(elpa_index_t index, int i);
static int max_stored_rows_is_valid(elpa_index_t index, int n, int new_value);

102
static int min_tile_size_cardinality(elpa_index_t index);
103
104
105
106
107
108
static int min_tile_size_enumerate(elpa_index_t index, int i);
static int min_tile_size_is_valid(elpa_index_t index, int n, int new_value);

static int valid_with_gpu(elpa_index_t index, int n, int new_value);
static int valid_with_gpu_elpa1(elpa_index_t index, int n, int new_value);
static int valid_with_gpu_elpa2(elpa_index_t index, int n, int new_value);
109

110
static int intermediate_bandwidth_cardinality(elpa_index_t index);
111
112
static int intermediate_bandwidth_enumerate(elpa_index_t index, int i);
static int intermediate_bandwidth_is_valid(elpa_index_t index, int n, int new_value);
113

114
115
116
117
static int cannon_buffer_size_cardinality(elpa_index_t index);
static int cannon_buffer_size_enumerate(elpa_index_t index, int i);
static int cannon_buffer_size_is_valid(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
118
static int na_is_valid(elpa_index_t index, int n, int new_value);
119
static int nev_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
120
static int bw_is_valid(elpa_index_t index, int n, int new_value);
121
static int gpu_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
122

123
124
static int is_positive(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
125
126
static int elpa_double_string_to_value(char *name, char *string, double *value);
static int elpa_double_value_to_string(char *name, double value, const char **string);
127

Pavel Kus's avatar
Pavel Kus committed
128
#define BASE_ENTRY(option_name, option_description, once_value, readonly_value, print_flag_value) \
129
130
131
132
133
134
135
                .base = { \
                        .name = option_name, \
                        .description = option_description, \
                        .once = once_value, \
                        .readonly = readonly_value, \
                        .env_default = "ELPA_DEFAULT_" option_name, \
                        .env_force = "ELPA_FORCE_" option_name, \
Pavel Kus's avatar
Pavel Kus committed
136
                        .print_flag = print_flag_value, \
137
                }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
138

Pavel Kus's avatar
Pavel Kus committed
139
#define INT_PARAMETER_ENTRY(option_name, option_description, valid_func, print_flag) \
140
        { \
Pavel Kus's avatar
Pavel Kus committed
141
                BASE_ENTRY(option_name, option_description, 1, 0, print_flag), \
142
                .valid = valid_func, \
143
        }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
144

Pavel Kus's avatar
Pavel Kus committed
145
#define BOOL_ENTRY(option_name, option_description, default, tune_level, tune_domain, print_flag) \
146
        { \
Pavel Kus's avatar
Pavel Kus committed
147
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
148
                .default_value = default, \
149
150
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
151
152
153
                .cardinality = cardinality_bool, \
                .enumerate = enumerate_identity, \
                .valid = valid_bool, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
154
155
        }

Pavel Kus's avatar
Pavel Kus committed
156
#define INT_ENTRY(option_name, option_description, default, tune_level, tune_domain, card_func, enumerate_func, valid_func, to_string_func, print_flag) \
157
        { \
Pavel Kus's avatar
Pavel Kus committed
158
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
159
                .default_value = default, \
160
161
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
162
163
164
165
                .cardinality = card_func, \
                .enumerate = enumerate_func, \
                .valid = valid_func, \
                .to_string = to_string_func, \
166
167
        }

Pavel Kus's avatar
Pavel Kus committed
168
#define INT_ANY_ENTRY(option_name, option_description, print_flag) \
169
        { \
Pavel Kus's avatar
Pavel Kus committed
170
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
171
172
        }

173
174
/* The order here is important! Tunable options that are dependent on other
 * tunable options must appear later in the list than their prerequisites */
175
static const elpa_index_int_entry_t int_entries[] = {
Pavel Kus's avatar
Pavel Kus committed
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
        INT_PARAMETER_ENTRY("na", "Global matrix has size (na * na)", na_is_valid, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("nev", "Number of eigenvectors to be computed, 0 <= nev <= na", nev_is_valid, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("nblk", "Block size of scalapack block-cyclic distribution", is_positive, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("local_nrows", "Number of matrix rows stored on this process", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("local_ncols", "Number of matrix columns stored on this process", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("process_row", "Process row number in the 2D domain decomposition", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("process_col", "Process column number in the 2D domain decomposition", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("process_id", "Process rank", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("is_process_id_zero", "Is it a process with rank zero?", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("num_process_rows", "Number of process row number in the 2D domain decomposition", NULL, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("num_process_cols", "Number of process column number in the 2D domain decomposition", NULL, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("num_processes", "Total number of processes", NULL, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("bandwidth", "If specified, a band matrix with this bandwidth is expected as input; bandwidth must be multiply of nblk", bw_is_valid, PRINT_YES),
        INT_ANY_ENTRY("mpi_comm_rows", "Communicator for inter-row communication", PRINT_NO),
        INT_ANY_ENTRY("mpi_comm_cols", "Communicator for inter-column communication", PRINT_NO),
        INT_ANY_ENTRY("mpi_comm_parent", "Parent communicator", PRINT_NO),
        INT_ANY_ENTRY("blacs_context", "BLACS context", PRINT_NO),
193
        INT_ENTRY("solver", "Solver to use", ELPA_SOLVER_1STAGE, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
194
                        number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name, PRINT_YES),
195
        INT_ENTRY("gpu", "Use GPU acceleration", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
196
                        cardinality_bool, enumerate_identity, gpu_is_valid, NULL, PRINT_YES),
197
198
        //default of gpu ussage for individual phases is 1. However, it is only evaluated, if GPU is used at all, which first has to be determined
        //by the parameter gpu and presence of the device
199
        INT_ENTRY("gpu_tridiag", "Use GPU acceleration for ELPA1 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
200
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa1, NULL, PRINT_YES),
201
        INT_ENTRY("gpu_solve_tridi", "Use GPU acceleration for ELPA solve tridi", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
202
                        cardinality_bool, enumerate_identity, valid_with_gpu, NULL, PRINT_YES),
203
        INT_ENTRY("gpu_trans_ev", "Use GPU acceleration for ELPA1 trans ev", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
204
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa1, NULL, PRINT_YES),
205
        INT_ENTRY("gpu_bandred", "Use GPU acceleration for ELPA2 band reduction", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
206
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
207
        INT_ENTRY("gpu_tridiag_band", "Use GPU acceleration for ELPA2 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
208
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
209
        INT_ENTRY("gpu_trans_ev_tridi_to_band", "Use GPU acceleration for ELPA2 trans_ev_tridi_to_band", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
210
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
211
        INT_ENTRY("gpu_trans_ev_band_to_full", "Use GPU acceleration for ELPA2 trans_ev_band_to_full", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
212
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
213
        INT_ENTRY("real_kernel", "Real kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_REAL_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, \
Pavel Kus's avatar
Pavel Kus committed
214
                        number_of_real_kernels, real_kernel_enumerate, real_kernel_is_valid, real_kernel_name, PRINT_YES),
215
        INT_ENTRY("complex_kernel", "Complex kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_COMPLEX_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_COMPLEX, \
Pavel Kus's avatar
Pavel Kus committed
216
                        number_of_complex_kernels, complex_kernel_enumerate, complex_kernel_is_valid, complex_kernel_name, PRINT_YES),
217

218
        INT_ENTRY("min_tile_size", "Minimal tile size used internally in elpa1_tridiag and elpa2_bandred", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
219
                        min_tile_size_cardinality, min_tile_size_enumerate, min_tile_size_is_valid, NULL, PRINT_YES),
220
        INT_ENTRY("intermediate_bandwidth", "Specifies the intermediate bandwidth in ELPA2 full->banded step. Must be a multiple of nblk", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
221
                        intermediate_bandwidth_cardinality, intermediate_bandwidth_enumerate, intermediate_bandwidth_is_valid, NULL, PRINT_YES),
222

223
        INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
224
                        band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL, PRINT_YES),
225
        INT_ENTRY("stripewidth_real", "Stripewidth_real, default 48. Must be a multiple of 4", 48, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_REAL,
226
                        stripewidth_real_cardinality, stripewidth_real_enumerate, stripewidth_real_is_valid, NULL, PRINT_YES),
227
        INT_ENTRY("stripewidth_complex", "Stripewidth_complex, default 96. Must be a multiple of 8", 96, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_COMPLEX,
228
229
                        stripewidth_complex_cardinality, stripewidth_complex_enumerate, stripewidth_complex_is_valid, NULL, PRINT_YES),

230
        INT_ENTRY("max_stored_rows", "Maximum number of stored rows used in ELPA 1 backtransformation, default 63", 63, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
231
                        max_stored_rows_cardinality, max_stored_rows_enumerate, max_stored_rows_is_valid, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
232
#ifdef WITH_OPENMP
233
        INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
234
                        omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
235
#else
236
        INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
237
                        omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
238
#endif
239
        INT_ENTRY("cannon_buffer_size", "Increasing the buffer size might make it faster, but costs memory", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
240
                        cannon_buffer_size_cardinality, cannon_buffer_size_enumerate, cannon_buffer_size_is_valid, NULL, PRINT_YES),
241
        //BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_REAL),
Pavel Kus's avatar
Pavel Kus committed
242
243
244
245
246
247
        BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_REAL, PRINT_YES),
        BOOL_ENTRY("timings", "Enable time measurement", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("debug", "Emit verbose debugging messages", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("print_flops", "Print FLOP rates on task 0", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("check_pd", "Check eigenvalues to be positive", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("cannon_for_generalized", "Whether to use Cannons algorithm for the generalized EVP", 1, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
248
249
250
};

#define READONLY_DOUBLE_ENTRY(option_name, option_description) \
251
        { \
252
                BASE_ENTRY(option_name, option_description, 0, 1, 0) \
253
254
255
        }

static const elpa_index_double_entry_t double_entries[] = {
256
        /* Empty for now */
257
};
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
258

259
void elpa_index_free(elpa_index_t index) {
260
261
262
263
264
265
266
#define FREE_OPTION(TYPE, ...) \
        free(index->TYPE##_options.values); \
        free(index->TYPE##_options.is_set); \
        free(index->TYPE##_options.notified);

        FOR_ALL_TYPES(FREE_OPTION);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
267
268
269
270
271
        free(index);
}

static int compar(const void *key, const void *member) {
        const char *name = (const char *) key;
272
        elpa_index_int_entry_t *entry = (elpa_index_int_entry_t *) member;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
273

274
        int l1 = strlen(entry->base.name);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
275
276
277
278
        int l2 = strlen(name);
        if (l1 != l2) {
                return 1;
        }
279
        if (strncmp(name, entry->base.name, l1) == 0) {
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
280
281
282
283
284
285
                return 0;
        } else {
                return 1;
        }
}

286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
#define IMPLEMENT_FIND_ENTRY(TYPE, ...) \
        static int find_##TYPE##_entry(char *name) { \
                elpa_index_##TYPE##_entry_t *entry; \
                size_t nmembers = nelements(TYPE##_entries); \
                entry = lfind((const void*) name, (const void *) TYPE##_entries, &nmembers, sizeof(elpa_index_##TYPE##_entry_t), compar); \
                if (entry) { \
                        return (entry - &TYPE##_entries[0]); \
                } else { \
                        return -1; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_FIND_ENTRY)


#define IMPLEMENT_GETENV(TYPE, PRINTF_SPEC, ...) \
        static int getenv_##TYPE(elpa_index_t index, const char *env_variable, enum NOTIFY_FLAGS notify_flag, int n, TYPE *value, const char *error_string) { \
                int err; \
303
                int is_process_id_zero = elpa_index_get_int_value(index, "is_process_id_zero", NULL); \
304
305
                char *env_value = getenv(env_variable); \
                if (env_value) { \
306
                        err = elpa_##TYPE##_string_to_value(TYPE##_entries[n].base.name, env_value, value); \
307
308
309
310
311
                        if (err != ELPA_OK) { \
                                fprintf(stderr, "ELPA: Error interpreting environment variable %s with value '%s': %s\n", \
                                                TYPE##_entries[n].base.name, env_value, elpa_strerr(err)); \
                        } else {\
                                const char *value_string = NULL; \
312
                                if (elpa_##TYPE##_value_to_string(TYPE##_entries[n].base.name, *value, &value_string) == ELPA_OK) { \
313
                                        if (!(index->TYPE##_options.notified[n] & notify_flag)) { \
314
                                                if (is_process_id_zero == 1) { \
315
316
317
                                                        fprintf(stderr, "ELPA: %s '%s' is set to %s due to environment variable %s\n", \
                                                                      error_string, TYPE##_entries[n].base.name, value_string, env_variable); \
                                                } \
318
319
320
                                                index->TYPE##_options.notified[n] |= notify_flag; \
                                        } \
                                } else { \
321
                                        if (is_process_id_zero == 1) { \
322
                                                fprintf(stderr, "ELPA: %s '%s' is set to '" PRINTF_SPEC "' due to environment variable %s\n", \
323
                                                        error_string, TYPE##_entries[n].base.name, *value, env_variable);\
324
                                        } \
325
326
327
328
329
330
331
332
333
334
335
336
                                } \
                                return 1; \
                        } \
                } \
                return 0; \
        }
FOR_ALL_TYPES(IMPLEMENT_GETENV)


#define IMPLEMENT_GET_FUNCTION(TYPE, PRINTF_SPEC, ERROR_VALUE) \
        TYPE elpa_index_get_##TYPE##_value(elpa_index_t index, char *name, int *error) { \
                TYPE ret; \
337
338
339
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        int from_env = 0; \
                        if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                                from_env = getenv_##TYPE(index, TYPE##_entries[n].base.env_force, NOTIFY_ENV_FORCE, n, &ret, "Option"); \
                        } \
                        if (!from_env) { \
                                ret = index->TYPE##_options.values[n]; \
                        } \
                        if (error != NULL) { \
                                *error = ELPA_OK; \
                        } \
                        return ret; \
                } else { \
                        if (error != NULL) { \
                                *error = ELPA_ERROR_ENTRY_NOT_FOUND; \
                        } \
                        return ERROR_VALUE; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_GET_FUNCTION)


#define IMPLEMENT_LOC_FUNCTION(TYPE, ...) \
        TYPE* elpa_index_get_##TYPE##_loc(elpa_index_t index, char *name) { \
365
366
367
                if (sizeof(TYPE##_entries) == 0) { \
                        return NULL; \
                } \
368
369
370
371
372
373
374
375
376
377
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        return &index->TYPE##_options.values[n]; \
                } else { \
                        return NULL; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_LOC_FUNCTION)


378
#define IMPLEMENT_SET_FUNCTION(TYPE, PRINTF_SPEC, ...) \
379
        int elpa_index_set_##TYPE##_value(elpa_index_t index, char *name, TYPE value) { \
380
381
382
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
383
384
385
386
387
388
                int n = find_##TYPE##_entry(name); \
                if (n < 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                }; \
                if (TYPE##_entries[n].valid != NULL) { \
                        if(!TYPE##_entries[n].valid(index, n, value)) { \
389
                                return ELPA_ERROR_ENTRY_INVALID_VALUE; \
390
391
392
                        }; \
                } \
                if (TYPE##_entries[n].base.once & index->TYPE##_options.is_set[n]) { \
393
394
                        return ELPA_ERROR_ENTRY_ALREADY_SET; \
                } \
395
                if (TYPE##_entries[n].base.readonly) { \
396
                        return ELPA_ERROR_ENTRY_READONLY; \
397
398
399
400
401
402
403
                } \
                index->TYPE##_options.values[n] = value; \
                index->TYPE##_options.is_set[n] = 1; \
                return ELPA_OK; \
        }
FOR_ALL_TYPES(IMPLEMENT_SET_FUNCTION)

Pavel Kus's avatar
Pavel Kus committed
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
#define IMPLEMENT_SET_FROM_LOAD_FUNCTION(TYPE, PRINTF_SPEC, ...) \
        int elpa_index_set_from_load_##TYPE##_value(elpa_index_t index, char *name, TYPE value, int explicit) { \
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
                int n = find_##TYPE##_entry(name); \
                if (n < 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                }; \
                index->TYPE##_options.values[n] = value; \
                if(explicit) \
                        index->TYPE##_options.is_set[n] = 1; \
                return ELPA_OK; \
        }
FOR_ALL_TYPES(IMPLEMENT_SET_FROM_LOAD_FUNCTION)

420
421
422

#define IMPLEMENT_IS_SET_FUNCTION(TYPE, ...) \
        int elpa_index_##TYPE##_value_is_set(elpa_index_t index, char *name) { \
423
424
425
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        if (index->TYPE##_options.is_set[n]) { \
                                return 1; \
                        } else { \
                                return 0; \
                        } \
                } else { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_IS_SET_FUNCTION)


int elpa_index_value_is_set(elpa_index_t index, char *name) {
        int res = ELPA_ERROR;

#define RET_IF_SET(TYPE, ...) \
        res = elpa_index_##TYPE##_value_is_set(index, name); \
        if (res >= 0) { \
                return res; \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
447
        }
448
449
450

        FOR_ALL_TYPES(RET_IF_SET)

451
452
453
454
        fprintf(stderr, "ELPA Error: Could not find entry '%s'\n", name);
        return res;
}

455
456
457
458
459
int elpa_index_int_is_valid(elpa_index_t index, char *name, int new_value) {
        int n = find_int_entry(name); \
        if (n >= 0) { \
                if (int_entries[n].valid == NULL) {
                        return ELPA_OK;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
460
                } else {
461
                        return int_entries[n].valid(index, n, new_value) ? ELPA_OK : ELPA_ERROR;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
462
463
                }
        }
464
        return ELPA_ERROR_ENTRY_NOT_FOUND;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
465
466
}

467
int elpa_int_value_to_string(char *name, int value, const char **string) {
468
469
470
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
471
        }
472
        if (int_entries[n].to_string == NULL) {
473
                return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
474
475
476
        }
        *string = int_entries[n].to_string(value);
        return ELPA_OK;
477
478
}

479
480

int elpa_int_value_to_strlen(char *name, int value) {
481
        const char *string = NULL;
482
        elpa_int_value_to_string(name, value, &string);
483
        if (string == NULL) {
484
485
486
                return 0;
        } else {
                return strlen(string);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
487
488
        }
}
489

490
491
492
493
494
495

int elpa_index_int_value_to_strlen(elpa_index_t index, char *name) {
        int n = find_int_entry(name);
        if (n < 0) {
                return 0;
        }
496
        return elpa_int_value_to_strlen(name, index->int_options.values[n]);
497
498
499
500
}


int elpa_int_string_to_value(char *name, char *string, int *value) {
501
502
503
504
505
506
507
508
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }

        if (int_entries[n].to_string == NULL) {
                int val, ret;
                ret = sscanf(string, "%d", &val);
509
                if (ret == 1) {
510
                        *value = val;
511
512
                        return ELPA_OK;
                } else {
513
                        return ELPA_ERROR_ENTRY_INVALID_VALUE;
514
515
516
                }
        }

517
518
        for (int i = 0; i < int_entries[n].cardinality(NULL); i++) {
                int candidate = int_entries[n].enumerate(NULL, i);
519
520
521
                if (strcmp(string, int_entries[n].to_string(candidate)) == 0) {
                        *value = candidate;
                        return ELPA_OK;
522
                }
523
        }
524
        return ELPA_ERROR_ENTRY_INVALID_VALUE;
525
526
}

527
int elpa_double_string_to_value(char *name, char *string, double *value) {
528
529
        double val;
        int ret = sscanf(string, "%lf", &val);
530
        if (ret == 1) {
531
532
                *value = val;
                return ELPA_OK;
533
        } else {
534
535
                /* \todo: remove */
                fprintf(stderr, "ELPA: DEBUG: Could not parse double value '%s' for option '%s'\n", string, name);
536
                return ELPA_ERROR_ENTRY_INVALID_VALUE;
537
538
539
        }
}

540
int elpa_double_value_to_string(char *name, double value, const char **string) {
541
        return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
542
}
543

544
int elpa_option_cardinality(char *name) {
545
546
547
548
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].cardinality) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }
549
        return int_entries[n].cardinality(NULL);
550
}
551

552
int elpa_option_enumerate(char *name, int i) {
553
554
555
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].enumerate) {
                return 0;
556
        }
557
        return int_entries[n].enumerate(NULL, i);
558
559
}

560

561
/* Helper functions for simple int entries */
562
static int cardinality_bool(elpa_index_t index) {
563
564
        return 2;
}
565

566
567
static int valid_bool(elpa_index_t index, int n, int new_value) {
        return (0 <= new_value) && (new_value < 2);
568
569
}

570
static int enumerate_identity(elpa_index_t index, int i) {
571
572
573
        return i;
}

574
575
576
577
578
579
580
581
582
583
/* Helper functions for specific options */

#define NAME_CASE(name, value, ...) \
        case value: \
                return #name;

#define VALID_CASE(name, value) \
        case value: \
                return 1;

584
#define VALID_CASE_3(name, value, available, other_checks) \
585
        case value: \
586
                return available && (other_checks(value));
587
588
589
590
591
592

static const char* elpa_solver_name(int solver) {
        switch(solver) {
                ELPA_FOR_ALL_SOLVERS(NAME_CASE)
                default:
                        return "(Invalid solver)";
593
594
595
        }
}

596
static int number_of_solvers(elpa_index_t index) {
597
        return ELPA_NUMBER_OF_SOLVERS;
598
599
}

600
static int solver_enumerate(elpa_index_t index, int i) {
601
#define OPTION_RANK(name, value, ...) \
602
        +(value >= sizeof(array_of_size_value)/sizeof(int) ? 0 : 1)
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619

#define EMPTY()
#define DEFER1(m) m EMPTY()
#define EVAL(...) __VA_ARGS__

#define ENUMERATE_CASE(name, value, ...) \
        { const int array_of_size_value[value]; \
        case 0 DEFER1(INNER_ITERATOR)()(OPTION_RANK): \
                return value; }

        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_SOLVERS
                EVAL(ELPA_FOR_ALL_SOLVERS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
620
621
622
}


Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
623
static int solver_is_valid(elpa_index_t index, int n, int new_value) {
624
625
626
627
628
        switch(new_value) {
                ELPA_FOR_ALL_SOLVERS(VALID_CASE)
                default:
                        return 0;
        }
629
630
}

631
static int number_of_real_kernels(elpa_index_t index) {
632
633
        return ELPA_2STAGE_NUMBER_OF_REAL_KERNELS;
}
634

635
static int real_kernel_enumerate(elpa_index_t index,int i) {
636
637
638
639
640
641
642
643
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_REAL_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_REAL_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}
644

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
645
static const char *real_kernel_name(int kernel) {
646
647
648
649
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(NAME_CASE)
                default:
                        return "(Invalid real kernel)";
650
        }
651
}
652

653
654
655
#define REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_REAL_GPU ? gpu_is_active : 1

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
656
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value) {
657
658
659
660
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_REAL_DEFAULT;
        }
661
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
662
        switch(new_value) {
663
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(VALID_CASE_3, REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
664
665
                default:
                        return 0;
666
        }
667
}
668

669
static int number_of_complex_kernels(elpa_index_t index) {
670
671
        return ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS;
}
672

673

674
static int complex_kernel_enumerate(elpa_index_t index,int i) {
675
676
677
678
679
680
681
682
683
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
684
static const char *complex_kernel_name(int kernel) {
685
686
687
688
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(NAME_CASE)
                default:
                        return "(Invalid complex kernel)";
689
        }
690
}
691

692
693
694
#define COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_COMPLEX_GPU ? gpu_is_active : 1

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
695
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value) {
696
697
698
699
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_COMPLEX_DEFAULT;
        }
700
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
701
        switch(new_value) {
702
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(VALID_CASE_3, COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
703
704
705
706
                default:
                        return 0;
        }
}
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
707

708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
static const char* elpa_autotune_level_name(int level) {
        switch(level) {
                ELPA_FOR_ALL_AUTOTUNE_LEVELS(NAME_CASE)
                default:
                        return "(Invalid autotune level)";
        }
}

static const char* elpa_autotune_domain_name(int domain) {
        switch(domain) {
                ELPA_FOR_ALL_AUTOTUNE_DOMAINS(NAME_CASE)
                default:
                        return "(Invalid autotune domain)";
        }
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
724
725
726
727
static int na_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

728
729
730
731
static int nev_is_valid(elpa_index_t index, int n, int new_value) {
        if (!elpa_index_int_value_is_set(index, "na")) {
                return 0;
        }
732
        return 0 <= new_value && new_value <= elpa_index_get_int_value(index, "na", NULL);
733
734
735
736
737
738
}

static int is_positive(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
739
740
741
742
743
744
745
746
747
static int bw_is_valid(elpa_index_t index, int n, int new_value) {
        int na;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }

        na = elpa_index_get_int_value(index, "na", NULL);
        return (0 <= new_value) && (new_value < na);
}
748

749
750
751
752
static int gpu_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value == 0 || new_value == 1;
}

753
static int band_to_full_cardinality(elpa_index_t index) {
754
	return 10;
755
}
756
static int band_to_full_enumerate(elpa_index_t index, int i) {
757
	return i+1;
758
759
}

Pavel Kus's avatar
Pavel Kus committed
760
// TODO shouldnt it be only for ELPA2??
761
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value) {
762
763
	int max_block=10;
        return (1 <= new_value) && (new_value <= max_block);
764
765
}

766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
static int stripewidth_real_cardinality(elpa_index_t index) {
	return 17;
}

static int stripewidth_complex_cardinality(elpa_index_t index) {
	return 17;
}

static int stripewidth_real_enumerate(elpa_index_t index, int i) {
	switch(i) {
	  case 0:
	    return 32;
	  case 1:
	    return 36;
	  case 2:
	    return 40;
	  case 3:
	    return 44;
	  case 4:
	    return 48;
	  case 5:
	    return 52;
	  case 6:
	    return 56;
	  case 7:
	    return 60;
	  case 8:
	    return 64;
	  case 9:
	    return 68;
	  case 10:
	    return 72;
	  case 11:
	    return 76;
	  case 12:
	    return 80;
	  case 13:
	    return 84;
	  case 14:
	    return 88;
	  case 15:
	    return 92;
	  case 16:
	    return 96;
	}
}

static int stripewidth_complex_enumerate(elpa_index_t index, int i) {
	switch(i) {
	  case 0:
	    return 48;
	  case 1:
	    return 56;
	  case 2:
	    return 64;
	  case 3:
	    return 72;
	  case 4:
	    return 80;
	  case 5:
	    return 88;
	  case 6:
	    return 96;
	  case 7:
	    return 104;
	  case 8:
	    return 112;
	  case 9:
	    return 120;
	  case 10:
	    return 128;
	  case 11:
	    return 136;
	  case 12:
	    return 144;
	  case 13:
	    return 152;
	  case 14:
	    return 160;
	  case 15:
	    return 168;
	  case 16:
	    return 176;
	}
}

static int stripewidth_real_is_valid(elpa_index_t index, int n, int new_value) {
	return (32 <= new_value) && (new_value <= 96);
}

static int stripewidth_complex_is_valid(elpa_index_t index, int n, int new_value) {
	return (48 <= new_value) && (new_value <= 176);
}

Pavel Kus's avatar
Pavel Kus committed
860
static int omp_threads_cardinality(elpa_index_t index) {
Andreas Marek's avatar
Andreas Marek committed
861
862
863
864
865
866
867
868
869
870
871
872
873
874
	int max_threads;
#ifdef WITH_OPENMP
	if (set_max_threads_glob == 0) {
		max_threads_glob = omp_get_max_threads();
		set_max_threads_glob = 1;
	}
#else
	max_threads_glob = 1;
	set_max_threads_glob = 1;
#endif
	max_threads = max_threads_glob;
	return max_threads;
}

Pavel Kus's avatar
Pavel Kus committed
875
static int omp_threads_enumerate(elpa_index_t index, int i) {
Andreas Marek's avatar
Andreas Marek committed
876
877
878
879
880
881
        return i + 1;
}

static int omp_threads_is_valid(elpa_index_t index, int n, int new_value) {
        int max_threads;
#ifdef WITH_OPENMP
882
883
884
885
	if (set_max_threads_glob == 0) {
		max_threads_glob = omp_get_max_threads();
		set_max_threads_glob = 1;
	}
Andreas Marek's avatar
Andreas Marek committed
886
#else
887
888
	max_threads_glob = 1;
	set_max_threads_glob = 1;
Andreas Marek's avatar
Andreas Marek committed
889
#endif
890
	max_threads = max_threads_glob;
Andreas Marek's avatar
Andreas Marek committed
891
        return (1 <= new_value) && (new_value <= max_threads);
Andreas Marek's avatar
Andreas Marek committed
892
893
}

894

895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
static int valid_with_gpu(elpa_index_t index, int n, int new_value) {
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
        if (gpu_is_active == 1) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

static int valid_with_gpu_elpa1(elpa_index_t index, int n, int new_value) {
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
        if ((solver == ELPA_SOLVER_1STAGE) && (gpu_is_active == 1)) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

static int valid_with_gpu_elpa2(elpa_index_t index, int n, int new_value) {
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
        if ((solver == ELPA_SOLVER_2STAGE) && (gpu_is_active == 1)) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
static int max_stored_rows_cardinality(elpa_index_t index) {
	return 8;
}

static int max_stored_rows_enumerate(elpa_index_t index, int i) {
	switch(i) {
	  case 0:
	    return 15;
	  case 1:
	    return 31;
	  case 2:
	    return 47;
	  case 3:
	    return 63;
	  case 4:
	    return 79;
	  case 5:
	    return 95;
	  case 6:
	    return 111;
	  case 7:
	    return 127;
	}
}

static int max_stored_rows_is_valid(elpa_index_t index, int n, int new_value) {
953
954
955
956
957
958
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_2STAGE) {
                return new_value == 15;
        } else {
                return (15 <= new_value) && (new_value <= 127);
        }
959
960
961
}


962
963
964
// TODO: this shoudl definitely be improved (too many options to test in autotuning)
static const int TILE_SIZE_STEP = 128;

965
static int min_tile_size_cardinality(elpa_index_t index) {
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
        int na;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);
        return na/TILE_SIZE_STEP;
}

static int min_tile_size_enumerate(elpa_index_t index, int i) {
        return (i+1) * TILE_SIZE_STEP;
}

static int min_tile_size_is_valid(elpa_index_t index, int n, int new_value) {
       return new_value % TILE_SIZE_STEP == 0;
982
}
983

984
static int intermediate_bandwidth_cardinality(elpa_index_t index) {
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
        int na, nblk;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);

        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

        return na/nblk;
}

static int intermediate_bandwidth_enumerate(elpa_index_t index, int i) {
        int nblk;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

        return (i+1) * nblk;
}

static int intermediate_bandwidth_is_valid(elpa_index_t index, int n, int new_value) {
        int na, nblk;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);

        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == nblk;
        } else {
                if((new_value <= 1 ) || (new_value > na ))
                  return 0;
                if(new_value % nblk != 0) {
                  fprintf(stderr, "intermediate bandwidth has to be multiple of nblk\n");
                  return 0;
                }
1035
        }
1036
1037
}

1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
static int cannon_buffer_size_cardinality(elpa_index_t index) {
        return 2;
}

static int cannon_buffer_size_enumerate(elpa_index_t index, int i) {
        int np_rows;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "num_process_rows") != 1) {
                return 0;
        }
        np_rows = elpa_index_get_int_value(index, "num_process_rows", NULL);

        // TODO: 0 is both error code and legal value?
        if(i == 0)
          return 0;
        else
          return np_rows - 1;
}

static int cannon_buffer_size_is_valid(elpa_index_t index, int n, int new_value) {
        int np_rows;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "num_process_rows") != 1) {
                return 0;
        }
        np_rows = elpa_index_get_int_value(index, "num_process_rows", NULL);

        return ((new_value >= 0) && (new_value < np_rows));
}

1070
elpa_index_t elpa_index_instance() {
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
        elpa_index_t index = (elpa_index_t) calloc(1, sizeof(struct elpa_index_struct));

#define ALLOCATE(TYPE, PRINTF_SPEC, ...) \
        index->TYPE##_options.values = (TYPE*) calloc(nelements(TYPE##_entries), sizeof(TYPE)); \
        index->TYPE##_options.is_set = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        index->TYPE##_options.notified = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        for (int n = 0; n < nelements(TYPE##_entries); n++) { \
                TYPE default_value = TYPE##_entries[n].default_value; \
                if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                        getenv_##TYPE(index, TYPE##_entries[n].base.env_default, NOTIFY_ENV_DEFAULT, n, &default_value, "Default for option"); \
                } \
                index->TYPE##_options.values[n] = default_value; \
        }

        FOR_ALL_TYPES(ALLOCATE)

        return index;
1088
}
1089

Pavel Kus's avatar
Pavel Kus committed
1090
1091
1092
1093
1094
1095
1096
static int is_tunable_but_overriden(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
        return (int_entries[i].autotune_level != 0) &&
               (int_entries[i].autotune_level <= autotune_level) &&
               (int_entries[i].autotune_domain & autotune_domain) &&
               (index->int_options.is_set[i]);
}

1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
static int is_tunable(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
        return (int_entries[i].autotune_level != 0) &&
               (int_entries[i].autotune_level <= autotune_level) &&
               (int_entries[i].autotune_domain & autotune_domain) &&
               (!index->int_options.is_set[i]);
}

int elpa_index_autotune_cardinality(elpa_index_t index, int autotune_level, int autotune_domain) {
        int N = 1;

        for (int i = 0; i < nelements(int_entries); i++) { \
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
1109
                        N *= int_entries[i].cardinality(index);
1110
1111
1112
1113
1114
                }
        }
        return N;
}

1115
1116
void elpa_index_print_int_parameter(elpa_index_t index, char* buff, int i)
{
Pavel Kus's avatar
Pavel Kus committed
1117
        int value = index->int_options.values[i];
1118
1119
        sprintf(buff, "%s = ", int_entries[i].base.name);
        if (int_entries[i].to_string) {
Pavel Kus's avatar
Pavel Kus committed
1120
                sprintf(buff, "%s%d -> %s\n", buff, value, int_entries[i].to_string(value));
1121
        } else {
Pavel Kus's avatar
Pavel Kus committed
1122
                sprintf(buff, "%s%d\n", buff, value);
1123
1124
1125
        }
}

Pavel Kus's avatar
Pavel Kus committed
1126
1127
int elpa_index_set_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int current) {
        int current_cpy = current;
1128
        char buff[100];
1129
        int debug = elpa_index_get_int_value(index, "debug", NULL);
Pavel Kus's avatar
Pavel Kus committed
1130
1131
1132
        int is_process_id_zero = elpa_index_get_int_value(index, "is_process_id_zero", NULL);

        //if(is_process_id_zero) fprintf(stderr, "***Trying a new autotuning index %d\n", current);
1133
1134
        for (int i = 0; i < nelements(int_entries); i++) {
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
Pavel Kus's avatar
Pavel Kus committed
1135
                        int value = int_entries[i].enumerate(index, current_cpy % int_entries[i].cardinality(index));
Pavel Kus's avatar
Pavel Kus committed
1136
                        //if(is_process_id_zero) fprintf(stderr, "  * val[%d] = %d -> %d\n", i, current_cpy % int_entries[i].cardinality(index), value);
1137
1138
1139
1140
                        /* Try to set option i to that value */
                        if (int_entries[i].valid(index, i, value)) {
                                index->int_options.values[i] = value;
                        } else {
Pavel Kus's avatar
Pavel Kus committed
1141
                                //if(is_process_id_zero) fprintf(stderr, "  *NOT VALID becaluse of i %d (%s) and value %d translated to %d\n", i, int_entries[i].base.name, current_cpy % int_entries[i].cardinality(index), value);
1142
1143
                                return 0;
                        }
Pavel Kus's avatar
Pavel Kus committed
1144
                        current_cpy /= int_entries[i].cardinality(index);
1145
1146
                }
        }
1147
        if (debug == 1 && is_process_id_zero) {
Pavel Kus's avatar
Pavel Kus committed
1148
                fprintf(stderr, "\n*** AUTOTUNING: setting a new combination of parameters, idx %d ***\n", current);
1149
                elpa_index_print_autotune_parameters(index, autotune_level, autotune_domain);
1150
                fprintf(stderr, "***\n\n");
1151
1152
1153
1154
1155
        }

        /* Could set all values */
        return 1;
}
1156

1157
int elpa_index_print_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain) {
1158
        char buff[100];
1159
1160
1161
1162
1163
1164
1165
        int is_process_id_zero = elpa_index_get_int_value(index, "is_process_id_zero", NULL);
        if (is_process_id_zero) {
                for (int i = 0; i < nelements(int_entries); i++) {
                        if (is_tunable(index, i, autotune_level, autotune_domain)) {
                                elpa_index_print_int_parameter(index, buff, i);
                                fprintf(stderr, "%s", buff);
                        }
1166
1167
1168
1169
                }
        }
        return 1;
}
Pavel Kus's avatar
Pavel Kus committed
1170

Pavel Kus's avatar
Pavel Kus committed
1171
int elpa_index_print_autotune_state(elpa_index_t index, int autotune_level, int autotune_domain, int min_loc,
1172
                                    double min_val, int current, int cardinality, char* file_name) {
Pavel Kus's avatar
Pavel Kus committed
1173
1174
1175
        char buff[100];
        elpa_index_t index_best;
        int min_loc_cpy = min_loc;
1176
1177
        FILE *f;

Pavel Kus's avatar
Pavel Kus committed
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
        // get index with the currently best parameters
        index_best = elpa_index_instance();

        if(min_loc_cpy > -1){
                for (int i = 0; i < nelements(int_entries); i++) {
                        if (is_tunable(index, i, autotune_level, autotune_domain)) {

                                int value = int_entries[i].enumerate(index, min_loc_cpy % int_entries[i].cardinality(index));
                                /* we are setting the value for output only, we do not need to check consistency */
                                index_best->int_options.values[i] = value;
                                min_loc_cpy /= int_entries[i].cardinality(index);
                        }
                }
        }
        int is_process_id_zero = elpa_index_get_int_value(index, "is_process_id_zero", NULL);
        if (is_process_id_zero) {
Pavel Kus's avatar
Pavel Kus committed
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
                int output_to_file = (strlen(file_name) > 0);
                if(output_to_file) {
                        f = fopen(file_name, "w");
                        if(f == NULL){
                                fprintf(stderr, "Cannot open file %s in elpa_index_print_autotune_state\n", file_name);
                                return 0;
                        }
                }
                else {
                        f = stdout;
                }

                if(!output_to_file)
Pavel Kus's avatar
Pavel Kus committed
1207
                        fprintf(f, "\n");
1208
1209
                fprintf(f, "*** AUTOTUNING STATE ***\n");
                fprintf(f, "** This is the state of the autotuning object\n");
1210
1211
                fprintf(f, "autotune_level = %d -> %s\n", autotune_level, elpa_autotune_level_name(autotune_level));
                fprintf(f, "autotune_domain = %d -> %s\n", autotune_domain, elpa_autotune_domain_name(autotune_domain));
1212
1213
1214
1215
                fprintf(f, "autotune_cardinality = %d\n", cardinality);
                fprintf(f, "current_idx = %d\n", current);
                fprintf(f, "best_idx = %d\n", min_loc);
                fprintf(f, "best_time = %g\n", min_val);
Pavel Kus's avatar
Pavel Kus committed
1216
                if(min_loc_cpy > -1) {
1217
                        fprintf(f, "** The following parameters are autotuned with so far the best values\n");
Pavel Kus's avatar
Pavel Kus committed
1218
1219
1220
                        for (int i = 0; i < nelements(int_entries); i++) {
                                if (is_tunable(index, i, autotune_level, autotune_domain)) {
                                        elpa_index_print_int_parameter(index_best, buff, i);
1221
                                        fprintf(f, "%s", buff);
Pavel Kus's avatar
Pavel Kus committed
1222
1223
                                }
                        }
1224
                        fprintf(f, "** The following parameters would be autotuned on the selected autotuning level, but were overridden by the set() method\n");
Pavel Kus's avatar
Pavel Kus committed
1225
1226
                        for (int i = 0; i < nelements(int_entries); i++) {
                                if (is_tunable_but_overriden(index, i, autotune_level, autotune_domain)) {
Pavel Kus's avatar
Pavel Kus committed
1227
                                        elpa_index_print_int_parameter(index, buff, i);
1228
                                        fprintf(f, "%s", buff);
Pavel Kus's avatar
Pavel Kus committed
1229
1230
1231
                                }
                        }
                }else{
1232
                        fprintf(f, "** No output after first step\n");
Pavel Kus's avatar
Pavel Kus committed
1233
                }
Pavel Kus's avatar