elpa_index.c 35.8 KB
Newer Older
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
//    This file is part of ELPA.
//
//    The ELPA library was originally created by the ELPA consortium,
//    consisting of the following organizations:
//
//    - Max Planck Computing and Data Facility (MPCDF), formerly known as
//      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
//    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
//      Informatik,
//    - Technische Universität München, Lehrstuhl für Informatik mit
//      Schwerpunkt Wissenschaftliches Rechnen ,
//    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
//    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
//      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
//      and
//    - IBM Deutschland GmbH
//
//    This particular source code file contains additions, changes and
//    enhancements authored by Intel Corporation which is not part of
//    the ELPA consortium.
//
//    More information can be found here:
//    http://elpa.mpcdf.mpg.de/
//
//    ELPA is free software: you can redistribute it and/or modify
//    it under the terms of the version 3 of the license of the
//    GNU Lesser General Public License as published by the Free
//    Software Foundation.
//
//    ELPA is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU Lesser General Public License for more details.
//
//    You should have received a copy of the GNU Lesser General Public License
//    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
//
//    ELPA reflects a substantial effort on the part of the original
//    ELPA consortium, and we ask you to respect the spirit of the
//    license that we chose: i.e., please contribute any changes you
//    may have back to the original ELPA library distribution, and keep
//    any derivatives of ELPA under the same license that we chose for
//    the original distribution, the GNU Lesser General Public License.
//
//    Authors: L. Huedepohl and A. Marek, MPCDF
46
#include <elpa/elpa.h>
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
47
#include "elpa_index.h"
48

49
50
#include <execinfo.h>

Andreas Marek's avatar
Andreas Marek committed
51
52
53
54
55
56
57
58
59
#include "config.h"

#ifdef WITH_OPENMP
#include <omp.h>
#endif

int max_threads_glob;
int set_max_threads_glob=0;

60
61
static int enumerate_identity(elpa_index_t index, int i);
static int cardinality_bool(elpa_index_t index);
62
63
static int valid_bool(elpa_index_t index, int n, int new_value);

64
65
static int number_of_solvers(elpa_index_t index);
static int solver_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
66
static int solver_is_valid(elpa_index_t index, int n, int new_value);
67
68
static const char* elpa_solver_name(int solver);

69
70
static int number_of_real_kernels(elpa_index_t index);
static int real_kernel_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
71
72
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *real_kernel_name(int kernel);
73

74
75
static int number_of_complex_kernels(elpa_index_t index);
static int complex_kernel_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
76
77
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *complex_kernel_name(int kernel);
78

79
80
static int band_to_full_cardinality(elpa_index_t index);
static int band_to_full_enumerate(elpa_index_t index, int i);
81
82
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value);

83
84
85
static int elpa_omp_threads_cardinality(elpa_index_t index);
static int elpa_omp_threads_enumerate(elpa_index_t index, int i);
static int elpa_omp_threads_is_valid(elpa_index_t index, int n, int new_value);
Andreas Marek's avatar
Andreas Marek committed
86

87
static int min_tile_size_cardinality(elpa_index_t index);
88

89
static int intermediate_bandwidth_cardinality(elpa_index_t index);
90
91
static int intermediate_bandwidth_enumerate(elpa_index_t index, int i);
static int intermediate_bandwidth_is_valid(elpa_index_t index, int n, int new_value);
92

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
93
static int na_is_valid(elpa_index_t index, int n, int new_value);
94
static int nev_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
95
static int bw_is_valid(elpa_index_t index, int n, int new_value);
96
static int gpu_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
97

98
99
static int is_positive(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
100
101
static int elpa_double_string_to_value(char *name, char *string, double *value);
static int elpa_double_value_to_string(char *name, double value, const char **string);
102

103
#define BASE_ENTRY(option_name, option_description, once_value, readonly_value) \
104
105
106
107
108
109
110
111
                .base = { \
                        .name = option_name, \
                        .description = option_description, \
                        .once = once_value, \
                        .readonly = readonly_value, \
                        .env_default = "ELPA_DEFAULT_" option_name, \
                        .env_force = "ELPA_FORCE_" option_name, \
                }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
112

113
#define INT_PARAMETER_ENTRY(option_name, option_description, valid_func) \
114
        { \
115
                BASE_ENTRY(option_name, option_description, 1, 0), \
116
                .valid = valid_func, \
117
        }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
118

119
#define BOOL_ENTRY(option_name, option_description, default, tune_level, tune_domain) \
120
        { \
121
                BASE_ENTRY(option_name, option_description, 0, 0), \
122
                .default_value = default, \
123
124
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
125
126
127
                .cardinality = cardinality_bool, \
                .enumerate = enumerate_identity, \
                .valid = valid_bool, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
128
129
        }

130
#define INT_ENTRY(option_name, option_description, default, tune_level, tune_domain, card_func, enumerate_func, valid_func, to_string_func) \
131
        { \
132
                BASE_ENTRY(option_name, option_description, 0, 0), \
133
                .default_value = default, \
134
135
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
136
137
138
139
                .cardinality = card_func, \
                .enumerate = enumerate_func, \
                .valid = valid_func, \
                .to_string = to_string_func, \
140
141
        }

142
#define INT_ANY_ENTRY(option_name, option_description) \
143
        { \
144
                BASE_ENTRY(option_name, option_description, 0, 0), \
145
146
        }

147
148
/* The order here is important! Tunable options that are dependent on other
 * tunable options must appear later in the list than their prerequisites */
149
static const elpa_index_int_entry_t int_entries[] = {
150
151
152
153
154
155
156
157
        INT_PARAMETER_ENTRY("na", "Global matrix has size (na * na)", na_is_valid),
        INT_PARAMETER_ENTRY("nev", "Number of eigenvectors to be computed, 0 <= nev <= na", nev_is_valid),
        INT_PARAMETER_ENTRY("nblk", "Block size of scalapack block-cyclic distribution", is_positive),
        INT_PARAMETER_ENTRY("local_nrows", "Number of matrix rows stored on this process", NULL),
        INT_PARAMETER_ENTRY("local_ncols", "Number of matrix columns stored on this process", NULL),
        INT_PARAMETER_ENTRY("process_row", "Process row number in the 2D domain decomposition", NULL),
        INT_PARAMETER_ENTRY("process_col", "Process column number in the 2D domain decomposition", NULL),
        INT_PARAMETER_ENTRY("bandwidth", "If specified, a band matrix with this bandwidth is expected as input; bandwidth must be multiply of nblk", bw_is_valid),
158
        INT_PARAMETER_ENTRY("suppress_warnings", "If specified, warnings will NOT be printed on this mpi rank", NULL),
159
160
161
        INT_ANY_ENTRY("mpi_comm_rows", "Communicator for inter-row communication"),
        INT_ANY_ENTRY("mpi_comm_cols", "Communicator for inter-column communication"),
        INT_ANY_ENTRY("mpi_comm_parent", "Parent communicator"),
162
        INT_ANY_ENTRY("blacs_context", "BLACS context"),
163
        INT_ENTRY("solver", "Solver to use", ELPA_SOLVER_1STAGE, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
164
                        number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name),
165
166
        INT_ENTRY("gpu", "Use GPU acceleration", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, gpu_is_valid, NULL),
167
168
169
170
171
172
173
174
        //default of gpu ussage for individual phases is 1. However, it is only evaluated, if GPU is used at all, which first has to be determined
        //by the parameter gpu and presence of the device
        INT_ENTRY("gpu_tridiag", "Use GPU acceleration for ELPA1 tridiagonalization", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
        INT_ENTRY("gpu_solve_tridi", "Use GPU acceleration for ELPA solve tridi", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
        INT_ENTRY("gpu_trans_ev", "Use GPU acceleration for ELPA1 trans ev", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
175
        INT_ENTRY("real_kernel", "Real kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_REAL_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
176
177
                        number_of_real_kernels, real_kernel_enumerate, \
                        real_kernel_is_valid, real_kernel_name),
178
179
180
181
182
183
184
185
        INT_ENTRY("gpu_bandred", "Use GPU acceleration for ELPA2 band reduction", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
        INT_ENTRY("gpu_tridiag_band", "Use GPU acceleration for ELPA2 tridiagonalization", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
        INT_ENTRY("gpu_trans_ev_tridi_to_band", "Use GPU acceleration for ELPA2 trans_ev_tridi_to_band", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
        INT_ENTRY("gpu_trans_ev_band_to_full", "Use GPU acceleration for ELPA2 trans_ev_band_to_full", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
186
        INT_ENTRY("complex_kernel", "Complex kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_COMPLEX_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_COMPLEX, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
187
188
                        number_of_complex_kernels, complex_kernel_enumerate, \
                        complex_kernel_is_valid, complex_kernel_name),
189

190
191
        INT_ENTRY("min_tile_size", "Minimal tile size used internally in elpa1_tridiag and elpa2_bandred", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        min_tile_size_cardinality, NULL, NULL, NULL),
192
        INT_ENTRY("intermediate_bandwidth", "Specifies the intermediate bandwidth in ELPA2 full->banded step. Must be a multiple of nblk", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
193
                        intermediate_bandwidth_cardinality, intermediate_bandwidth_enumerate, intermediate_bandwidth_is_valid, NULL),
194

195
	INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL),
Andreas Marek's avatar
Andreas Marek committed
196
197
198
199
200
#ifdef WITH_OPENMP
	INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL),
#else
	INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL),
#endif
201
202
        //BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_REAL),
        BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_REAL),
203
204
205
206
        BOOL_ENTRY("timings", "Enable time measurement", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
        BOOL_ENTRY("debug", "Emit verbose debugging messages", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
        BOOL_ENTRY("print_flops", "Print FLOP rates on task 0", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
        BOOL_ENTRY("check_pd", "Check eigenvalues to be positive", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
207
208
209
};

#define READONLY_DOUBLE_ENTRY(option_name, option_description) \
210
        { \
211
                BASE_ENTRY(option_name, option_description, 0, 1, 0) \
212
213
214
        }

static const elpa_index_double_entry_t double_entries[] = {
215
        /* Empty for now */
216
};
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
217

218
void elpa_index_free(elpa_index_t index) {
219
220
221
222
223
224
225
#define FREE_OPTION(TYPE, ...) \
        free(index->TYPE##_options.values); \
        free(index->TYPE##_options.is_set); \
        free(index->TYPE##_options.notified);

        FOR_ALL_TYPES(FREE_OPTION);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
226
227
228
229
230
        free(index);
}

static int compar(const void *key, const void *member) {
        const char *name = (const char *) key;
231
        elpa_index_int_entry_t *entry = (elpa_index_int_entry_t *) member;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
232

233
        int l1 = strlen(entry->base.name);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
234
235
236
237
        int l2 = strlen(name);
        if (l1 != l2) {
                return 1;
        }
238
        if (strncmp(name, entry->base.name, l1) == 0) {
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
239
240
241
242
243
244
                return 0;
        } else {
                return 1;
        }
}

245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
#define IMPLEMENT_FIND_ENTRY(TYPE, ...) \
        static int find_##TYPE##_entry(char *name) { \
                elpa_index_##TYPE##_entry_t *entry; \
                size_t nmembers = nelements(TYPE##_entries); \
                entry = lfind((const void*) name, (const void *) TYPE##_entries, &nmembers, sizeof(elpa_index_##TYPE##_entry_t), compar); \
                if (entry) { \
                        return (entry - &TYPE##_entries[0]); \
                } else { \
                        return -1; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_FIND_ENTRY)


#define IMPLEMENT_GETENV(TYPE, PRINTF_SPEC, ...) \
        static int getenv_##TYPE(elpa_index_t index, const char *env_variable, enum NOTIFY_FLAGS notify_flag, int n, TYPE *value, const char *error_string) { \
                int err; \
                char *env_value = getenv(env_variable); \
                if (env_value) { \
264
                        err = elpa_##TYPE##_string_to_value(TYPE##_entries[n].base.name, env_value, value); \
265
266
267
268
269
                        if (err != ELPA_OK) { \
                                fprintf(stderr, "ELPA: Error interpreting environment variable %s with value '%s': %s\n", \
                                                TYPE##_entries[n].base.name, env_value, elpa_strerr(err)); \
                        } else {\
                                const char *value_string = NULL; \
270
                                if (elpa_##TYPE##_value_to_string(TYPE##_entries[n].base.name, *value, &value_string) == ELPA_OK) { \
271
                                        if (!(index->TYPE##_options.notified[n] & notify_flag)) { \
272
273
274
275
                                                if (! elpa_index_int_value_is_set(index, "suppress_warnings")) { \
                                                        fprintf(stderr, "ELPA: %s '%s' is set to %s due to environment variable %s\n", \
                                                                      error_string, TYPE##_entries[n].base.name, value_string, env_variable); \
                                                } \
276
277
278
                                                index->TYPE##_options.notified[n] |= notify_flag; \
                                        } \
                                } else { \
279
280
                                        if (! elpa_index_int_value_is_set(index, "suppress_warnings")) { \
                                                fprintf(stderr, "ELPA: %s '%s' is set to '" PRINTF_SPEC "' due to environment variable %s\n", \
281
                                                        error_string, TYPE##_entries[n].base.name, *value, env_variable);\
282
                                        } \
283
284
285
286
287
288
289
290
291
292
293
294
                                } \
                                return 1; \
                        } \
                } \
                return 0; \
        }
FOR_ALL_TYPES(IMPLEMENT_GETENV)


#define IMPLEMENT_GET_FUNCTION(TYPE, PRINTF_SPEC, ERROR_VALUE) \
        TYPE elpa_index_get_##TYPE##_value(elpa_index_t index, char *name, int *error) { \
                TYPE ret; \
295
296
297
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        int from_env = 0; \
                        if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                                from_env = getenv_##TYPE(index, TYPE##_entries[n].base.env_force, NOTIFY_ENV_FORCE, n, &ret, "Option"); \
                        } \
                        if (!from_env) { \
                                ret = index->TYPE##_options.values[n]; \
                        } \
                        if (error != NULL) { \
                                *error = ELPA_OK; \
                        } \
                        return ret; \
                } else { \
                        if (error != NULL) { \
                                *error = ELPA_ERROR_ENTRY_NOT_FOUND; \
                        } \
                        return ERROR_VALUE; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_GET_FUNCTION)


#define IMPLEMENT_LOC_FUNCTION(TYPE, ...) \
        TYPE* elpa_index_get_##TYPE##_loc(elpa_index_t index, char *name) { \
323
324
325
                if (sizeof(TYPE##_entries) == 0) { \
                        return NULL; \
                } \
326
327
328
329
330
331
332
333
334
335
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        return &index->TYPE##_options.values[n]; \
                } else { \
                        return NULL; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_LOC_FUNCTION)


336
#define IMPLEMENT_SET_FUNCTION(TYPE, PRINTF_SPEC, ...) \
337
        int elpa_index_set_##TYPE##_value(elpa_index_t index, char *name, TYPE value) { \
338
339
340
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
341
342
343
344
345
346
                int n = find_##TYPE##_entry(name); \
                if (n < 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                }; \
                if (TYPE##_entries[n].valid != NULL) { \
                        if(!TYPE##_entries[n].valid(index, n, value)) { \
347
                                return ELPA_ERROR_ENTRY_INVALID_VALUE; \
348
349
350
                        }; \
                } \
                if (TYPE##_entries[n].base.once & index->TYPE##_options.is_set[n]) { \
351
352
                        return ELPA_ERROR_ENTRY_ALREADY_SET; \
                } \
353
                if (TYPE##_entries[n].base.readonly) { \
354
                        return ELPA_ERROR_ENTRY_READONLY; \
355
356
357
358
359
360
361
362
363
364
                } \
                index->TYPE##_options.values[n] = value; \
                index->TYPE##_options.is_set[n] = 1; \
                return ELPA_OK; \
        }
FOR_ALL_TYPES(IMPLEMENT_SET_FUNCTION)


#define IMPLEMENT_IS_SET_FUNCTION(TYPE, ...) \
        int elpa_index_##TYPE##_value_is_set(elpa_index_t index, char *name) { \
365
366
367
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        if (index->TYPE##_options.is_set[n]) { \
                                return 1; \
                        } else { \
                                return 0; \
                        } \
                } else { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_IS_SET_FUNCTION)


int elpa_index_value_is_set(elpa_index_t index, char *name) {
        int res = ELPA_ERROR;

#define RET_IF_SET(TYPE, ...) \
        res = elpa_index_##TYPE##_value_is_set(index, name); \
        if (res >= 0) { \
                return res; \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
389
        }
390
391
392

        FOR_ALL_TYPES(RET_IF_SET)

393
394
395
396
        fprintf(stderr, "ELPA Error: Could not find entry '%s'\n", name);
        return res;
}

397
398
399
400
401
int elpa_index_int_is_valid(elpa_index_t index, char *name, int new_value) {
        int n = find_int_entry(name); \
        if (n >= 0) { \
                if (int_entries[n].valid == NULL) {
                        return ELPA_OK;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
402
                } else {
403
                        return int_entries[n].valid(index, n, new_value) ? ELPA_OK : ELPA_ERROR;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
404
405
                }
        }
406
        return ELPA_ERROR_ENTRY_NOT_FOUND;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
407
408
}

409
int elpa_int_value_to_string(char *name, int value, const char **string) {
410
411
412
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
413
        }
414
        if (int_entries[n].to_string == NULL) {
415
                return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
416
417
418
        }
        *string = int_entries[n].to_string(value);
        return ELPA_OK;
419
420
}

421
422

int elpa_int_value_to_strlen(char *name, int value) {
423
        const char *string = NULL;
424
        elpa_int_value_to_string(name, value, &string);
425
        if (string == NULL) {
426
427
428
                return 0;
        } else {
                return strlen(string);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
429
430
        }
}
431

432
433
434
435
436
437

int elpa_index_int_value_to_strlen(elpa_index_t index, char *name) {
        int n = find_int_entry(name);
        if (n < 0) {
                return 0;
        }
438
        return elpa_int_value_to_strlen(name, index->int_options.values[n]);
439
440
441
442
}


int elpa_int_string_to_value(char *name, char *string, int *value) {
443
444
445
446
447
448
449
450
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }

        if (int_entries[n].to_string == NULL) {
                int val, ret;
                ret = sscanf(string, "%d", &val);
451
                if (ret == 1) {
452
                        *value = val;
453
454
                        return ELPA_OK;
                } else {
455
                        return ELPA_ERROR_ENTRY_INVALID_VALUE;
456
457
458
                }
        }

459
460
        for (int i = 0; i < int_entries[n].cardinality(NULL); i++) {
                int candidate = int_entries[n].enumerate(NULL, i);
461
462
463
                if (strcmp(string, int_entries[n].to_string(candidate)) == 0) {
                        *value = candidate;
                        return ELPA_OK;
464
                }
465
        }
466
        return ELPA_ERROR_ENTRY_INVALID_VALUE;
467
468
}

469
int elpa_double_string_to_value(char *name, char *string, double *value) {
470
471
        double val;
        int ret = sscanf(string, "%lf", &val);
472
        if (ret == 1) {
473
474
                *value = val;
                return ELPA_OK;
475
        } else {
476
477
                /* \todo: remove */
                fprintf(stderr, "ELPA: DEBUG: Could not parse double value '%s' for option '%s'\n", string, name);
478
                return ELPA_ERROR_ENTRY_INVALID_VALUE;
479
480
481
        }
}

482
int elpa_double_value_to_string(char *name, double value, const char **string) {
483
        return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
484
}
485

486
int elpa_option_cardinality(char *name) {
487
488
489
490
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].cardinality) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }
491
        return int_entries[n].cardinality(NULL);
492
}
493

494
int elpa_option_enumerate(char *name, int i) {
495
496
497
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].enumerate) {
                return 0;
498
        }
499
        return int_entries[n].enumerate(NULL, i);
500
501
}

502

503
/* Helper functions for simple int entries */
504
static int cardinality_bool(elpa_index_t index) {
505
506
        return 2;
}
507

508
509
static int valid_bool(elpa_index_t index, int n, int new_value) {
        return (0 <= new_value) && (new_value < 2);
510
511
}

512
static int enumerate_identity(elpa_index_t index, int i) {
513
514
515
        return i;
}

516
517
518
519
520
521
522
523
524
525
/* Helper functions for specific options */

#define NAME_CASE(name, value, ...) \
        case value: \
                return #name;

#define VALID_CASE(name, value) \
        case value: \
                return 1;

526
#define VALID_CASE_3(name, value, available, other_checks) \
527
        case value: \
528
                return available && (other_checks(value));
529
530
531
532
533
534

static const char* elpa_solver_name(int solver) {
        switch(solver) {
                ELPA_FOR_ALL_SOLVERS(NAME_CASE)
                default:
                        return "(Invalid solver)";
535
536
537
        }
}

538
static int number_of_solvers(elpa_index_t index) {
539
        return ELPA_NUMBER_OF_SOLVERS;
540
541
}

542
static int solver_enumerate(elpa_index_t index, int i) {
543
#define OPTION_RANK(name, value, ...) \
544
        +(value >= sizeof(array_of_size_value)/sizeof(int) ? 0 : 1)
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561

#define EMPTY()
#define DEFER1(m) m EMPTY()
#define EVAL(...) __VA_ARGS__

#define ENUMERATE_CASE(name, value, ...) \
        { const int array_of_size_value[value]; \
        case 0 DEFER1(INNER_ITERATOR)()(OPTION_RANK): \
                return value; }

        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_SOLVERS
                EVAL(ELPA_FOR_ALL_SOLVERS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
562
563
564
}


Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
565
static int solver_is_valid(elpa_index_t index, int n, int new_value) {
566
567
568
569
570
        switch(new_value) {
                ELPA_FOR_ALL_SOLVERS(VALID_CASE)
                default:
                        return 0;
        }
571
572
}

573
static int number_of_real_kernels(elpa_index_t index) {
574
575
        return ELPA_2STAGE_NUMBER_OF_REAL_KERNELS;
}
576

577
static int real_kernel_enumerate(elpa_index_t index,int i) {
578
579
580
581
582
583
584
585
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_REAL_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_REAL_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}
586

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
587
static const char *real_kernel_name(int kernel) {
588
589
590
591
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(NAME_CASE)
                default:
                        return "(Invalid real kernel)";
592
        }
593
}
594

595
596
597
#define REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_REAL_GPU ? gpu_is_active : 1

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
598
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value) {
599
600
601
602
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_REAL_DEFAULT;
        }
603
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
604
        switch(new_value) {
605
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(VALID_CASE_3, REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
606
607
                default:
                        return 0;
608
        }
609
}
610

611
static int number_of_complex_kernels(elpa_index_t index) {
612
613
        return ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS;
}
614

615

616
static int complex_kernel_enumerate(elpa_index_t index,int i) {
617
618
619
620
621
622
623
624
625
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
626
static const char *complex_kernel_name(int kernel) {
627
628
629
630
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(NAME_CASE)
                default:
                        return "(Invalid complex kernel)";
631
        }
632
}
633

634
635
636
#define COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_COMPLEX_GPU ? gpu_is_active : 1

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
637
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value) {
638
639
640
641
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_COMPLEX_DEFAULT;
        }
642
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
643
        switch(new_value) {
644
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(VALID_CASE_3, COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
645
646
647
648
                default:
                        return 0;
        }
}
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
649
650
651
652
653

static int na_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

654
655
656
657
static int nev_is_valid(elpa_index_t index, int n, int new_value) {
        if (!elpa_index_int_value_is_set(index, "na")) {
                return 0;
        }
658
        return 0 <= new_value && new_value <= elpa_index_get_int_value(index, "na", NULL);
659
660
661
662
663
664
}

static int is_positive(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
665
666
667
668
669
670
671
672
673
static int bw_is_valid(elpa_index_t index, int n, int new_value) {
        int na;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }

        na = elpa_index_get_int_value(index, "na", NULL);
        return (0 <= new_value) && (new_value < na);
}
674

675
676
677
678
static int gpu_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value == 0 || new_value == 1;
}

679
static int band_to_full_cardinality(elpa_index_t index) {
680
	return 10;
681
682
}

683
static int band_to_full_enumerate(elpa_index_t index, int i) {
684
	return i+1;
685
686
687
}

static int band_to_full_is_valid(elpa_index_t index, int n, int new_value) {
688
689
	int max_block=10;
        return (1 <= new_value) && (new_value <= max_block);
690
691
}

692
static int elpa_omp_threads_cardinality(elpa_index_t index) {
Andreas Marek's avatar
Andreas Marek committed
693
694
695
696
697
	int max_threads;
#ifdef WITH_OPENMP
	if (set_max_threads_glob == 0) {
		max_threads_glob = omp_get_max_threads();
		set_max_threads_glob = 1;
698
		//printf("Setting global max threads to %d \n",max_threads_glob);
Andreas Marek's avatar
Andreas Marek committed
699
700
701
702
703
704
	}
#else
	max_threads_glob = 1;
	set_max_threads_glob = 1;
#endif
	max_threads = max_threads_glob;
705
	//printf("Setting max threads to %d \n",max_threads);
Andreas Marek's avatar
Andreas Marek committed
706
707
708
	return max_threads;
}

709
static int elpa_omp_threads_enumerate(elpa_index_t index, int i) {
Andreas Marek's avatar
Andreas Marek committed
710
711
712
713
714
715
        return i + 1;
}

static int omp_threads_is_valid(elpa_index_t index, int n, int new_value) {
        int max_threads;
#ifdef WITH_OPENMP
716
        max_threads = max_threads_glob;
Andreas Marek's avatar
Andreas Marek committed
717
718
719
#else
        max_threads = 1;
#endif
720
	//printf("In valid max threads to %d \n",max_threads);
Andreas Marek's avatar
Andreas Marek committed
721
        return (1 <= new_value) && (new_value <= max_threads);
Andreas Marek's avatar
Andreas Marek committed
722
723
}

724
static int min_tile_size_cardinality(elpa_index_t index) {
725
726
727
728
        /* TODO */
        fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
        abort();
}
729

730
static int intermediate_bandwidth_cardinality(elpa_index_t index) {
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
        int na, nblk;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);

        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

        return na/nblk;
}

static int intermediate_bandwidth_enumerate(elpa_index_t index, int i) {
        int nblk;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

        return (i+1) * nblk;

}

static int intermediate_bandwidth_is_valid(elpa_index_t index, int n, int new_value) {
        int na, nblk;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);

        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

        if((new_value <= 1 ) || (new_value > na ))
          return 0;
        if(new_value % nblk != 0) {
          fprintf(stderr, "intermediate bandwidth has to be multiple of nblk\n");
          return 0;
        }
778
779
}

780
elpa_index_t elpa_index_instance() {
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
        elpa_index_t index = (elpa_index_t) calloc(1, sizeof(struct elpa_index_struct));

#define ALLOCATE(TYPE, PRINTF_SPEC, ...) \
        index->TYPE##_options.values = (TYPE*) calloc(nelements(TYPE##_entries), sizeof(TYPE)); \
        index->TYPE##_options.is_set = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        index->TYPE##_options.notified = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        for (int n = 0; n < nelements(TYPE##_entries); n++) { \
                TYPE default_value = TYPE##_entries[n].default_value; \
                if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                        getenv_##TYPE(index, TYPE##_entries[n].base.env_default, NOTIFY_ENV_DEFAULT, n, &default_value, "Default for option"); \
                } \
                index->TYPE##_options.values[n] = default_value; \
        }

        FOR_ALL_TYPES(ALLOCATE)

        return index;
798
}
799

800
801
802
803
804
805
806
807
808
809
810
811
static int is_tunable(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
        return (int_entries[i].autotune_level != 0) &&
               (int_entries[i].autotune_level <= autotune_level) &&
               (int_entries[i].autotune_domain & autotune_domain) &&
               (!index->int_options.is_set[i]);
}

int elpa_index_autotune_cardinality(elpa_index_t index, int autotune_level, int autotune_domain) {
        int N = 1;

        for (int i = 0; i < nelements(int_entries); i++) { \
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
812
                        N *= int_entries[i].cardinality(index);
813
814
815
816
817
818
819
820
821
                }
        }
        return N;
}

int elpa_index_set_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int n) {
        int debug = elpa_index_get_int_value(index, "debug", NULL);
        for (int i = 0; i < nelements(int_entries); i++) {
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
822
                        int value = int_entries[i].enumerate(index, n % int_entries[i].cardinality(index));
823
824
825
826
827
828
                        /* Try to set option i to that value */
                        if (int_entries[i].valid(index, i, value)) {
                                index->int_options.values[i] = value;
                        } else {
                                return 0;
                        }
829
                        n /= int_entries[i].cardinality(index);
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
                }
        }
        if (debug == 1) {
                for (int i = 0; i < nelements(int_entries); i++) {
                        if (is_tunable(index, i, autotune_level, autotune_domain)) {
                                fprintf(stderr, "%s = ", int_entries[i].base.name);
                                if (int_entries[i].to_string) {
                                        fprintf(stderr, "%s\n", int_entries[i].to_string(index->int_options.values[i]));
                                } else {
                                        fprintf(stderr, "%d\n", index->int_options.values[i]);
                                }
                        }
                }
                fprintf(stderr, "\n");
        }

        /* Could set all values */
        return 1;
}
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878

int elpa_index_print_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int n) {
        //int debug = elpa_index_get_int_value(index, "debug", NULL);
        //for (int i = 0; i < nelements(int_entries); i++) {
        //        if (is_tunable(index, i, autotune_level, autotune_domain)) {
        //                int value = int_entries[i].enumerate(n % int_entries[i].cardinality());
        //                /* Try to set option i to that value */
        //                if (int_entries[i].valid(index, i, value)) {
        //                        index->int_options.values[i] = value;
        //                } else {
        //                        return 0;
        //                }
        //                n /= int_entries[i].cardinality();
        //        }
        //}
        for (int i = 0; i < nelements(int_entries); i++) {
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
                        fprintf(stderr, " %s = ", int_entries[i].base.name);
                        if (int_entries[i].to_string) {
                                fprintf(stderr, " %s\n", int_entries[i].to_string(index->int_options.values[i]));
                        } else {
                                fprintf(stderr, " %d\n", index->int_options.values[i]);
                        }
                }
        }
        fprintf(stderr, "\n");

        /* Could set all values */
        return 1;
}