elpa_index.c 29 KB
Newer Older
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
//    This file is part of ELPA.
//
//    The ELPA library was originally created by the ELPA consortium,
//    consisting of the following organizations:
//
//    - Max Planck Computing and Data Facility (MPCDF), formerly known as
//      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
//    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
//      Informatik,
//    - Technische Universität München, Lehrstuhl für Informatik mit
//      Schwerpunkt Wissenschaftliches Rechnen ,
//    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
//    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
//      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
//      and
//    - IBM Deutschland GmbH
//
//    This particular source code file contains additions, changes and
//    enhancements authored by Intel Corporation which is not part of
//    the ELPA consortium.
//
//    More information can be found here:
//    http://elpa.mpcdf.mpg.de/
//
//    ELPA is free software: you can redistribute it and/or modify
//    it under the terms of the version 3 of the license of the
//    GNU Lesser General Public License as published by the Free
//    Software Foundation.
//
//    ELPA is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU Lesser General Public License for more details.
//
//    You should have received a copy of the GNU Lesser General Public License
//    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
//
//    ELPA reflects a substantial effort on the part of the original
//    ELPA consortium, and we ask you to respect the spirit of the
//    license that we chose: i.e., please contribute any changes you
//    may have back to the original ELPA library distribution, and keep
//    any derivatives of ELPA under the same license that we chose for
//    the original distribution, the GNU Lesser General Public License.
//
//    Authors: L. Huedepohl and A. Marek, MPCDF
46
#include <elpa/elpa.h>
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
47
#include "elpa_index.h"
48

49
50
51
52
53
54
#include <execinfo.h>

static int enumerate_identity(int i);
static int cardinality_bool(void);
static int valid_bool(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
55
56
57
static int number_of_solvers();
static int solver_enumerate(int i);
static int solver_is_valid(elpa_index_t index, int n, int new_value);
58
59
static const char* elpa_solver_name(int solver);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
60
61
62
63
static int number_of_real_kernels();
static int real_kernel_enumerate(int i);
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *real_kernel_name(int kernel);
64

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
65
66
67
68
static int number_of_complex_kernels();
static int complex_kernel_enumerate(int i);
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *complex_kernel_name(int kernel);
69

70
71
72
73
static int band_to_full_cardinality();
static int band_to_full_enumerate(int i);
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
74
static int na_is_valid(elpa_index_t index, int n, int new_value);
75
static int nev_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
76
static int bw_is_valid(elpa_index_t index, int n, int new_value);
77
static int gpu_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
78

79
80
static int is_positive(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
81
82
static int elpa_double_string_to_value(char *name, char *string, double *value);
static int elpa_double_value_to_string(char *name, double value, const char **string);
83

84
#define BASE_ENTRY(option_name, option_description, once_value, readonly_value) \
85
86
87
88
89
90
91
92
                .base = { \
                        .name = option_name, \
                        .description = option_description, \
                        .once = once_value, \
                        .readonly = readonly_value, \
                        .env_default = "ELPA_DEFAULT_" option_name, \
                        .env_force = "ELPA_FORCE_" option_name, \
                }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
93

94
#define INT_PARAMETER_ENTRY(option_name, option_description, valid_func) \
95
        { \
96
                BASE_ENTRY(option_name, option_description, 1, 0), \
97
                .valid = valid_func, \
98
        }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
99

100
#define BOOL_ENTRY(option_name, option_description, default, tune_level, tune_domain) \
101
        { \
102
                BASE_ENTRY(option_name, option_description, 0, 0), \
103
                .default_value = default, \
104
105
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
106
107
108
                .cardinality = cardinality_bool, \
                .enumerate = enumerate_identity, \
                .valid = valid_bool, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
109
110
        }

111
#define INT_ENTRY(option_name, option_description, default, tune_level, tune_domain, card_func, enumerate_func, valid_func, to_string_func) \
112
        { \
113
                BASE_ENTRY(option_name, option_description, 0, 0), \
114
                .default_value = default, \
115
116
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
117
118
119
120
                .cardinality = card_func, \
                .enumerate = enumerate_func, \
                .valid = valid_func, \
                .to_string = to_string_func, \
121
122
        }

123
#define INT_ANY_ENTRY(option_name, option_description) \
124
        { \
125
                BASE_ENTRY(option_name, option_description, 0, 0), \
126
127
        }

128
129
/* The order here is important! Tunable options that are dependent on other
 * tunable options must appear later in the list than their prerequisites */
130
static const elpa_index_int_entry_t int_entries[] = {
131
132
133
134
135
136
137
138
        INT_PARAMETER_ENTRY("na", "Global matrix has size (na * na)", na_is_valid),
        INT_PARAMETER_ENTRY("nev", "Number of eigenvectors to be computed, 0 <= nev <= na", nev_is_valid),
        INT_PARAMETER_ENTRY("nblk", "Block size of scalapack block-cyclic distribution", is_positive),
        INT_PARAMETER_ENTRY("local_nrows", "Number of matrix rows stored on this process", NULL),
        INT_PARAMETER_ENTRY("local_ncols", "Number of matrix columns stored on this process", NULL),
        INT_PARAMETER_ENTRY("process_row", "Process row number in the 2D domain decomposition", NULL),
        INT_PARAMETER_ENTRY("process_col", "Process column number in the 2D domain decomposition", NULL),
        INT_PARAMETER_ENTRY("bandwidth", "If specified, a band matrix with this bandwidth is expected as input; bandwidth must be multiply of nblk", bw_is_valid),
139
140
141
        INT_ANY_ENTRY("mpi_comm_rows", "Communicator for inter-row communication"),
        INT_ANY_ENTRY("mpi_comm_cols", "Communicator for inter-column communication"),
        INT_ANY_ENTRY("mpi_comm_parent", "Parent communicator"),
142
        INT_ANY_ENTRY("blacs_context", "BLACS context"),
143
        INT_ENTRY("solver", "Solver to use", ELPA_SOLVER_1STAGE, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
144
                        number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name),
145
146
        INT_ENTRY("gpu", "Use GPU acceleration", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, gpu_is_valid, NULL),
147
148
149
150
151
152
153
154
        //default of gpu ussage for individual phases is 1. However, it is only evaluated, if GPU is used at all, which first has to be determined
        //by the parameter gpu and presence of the device
        INT_ENTRY("gpu_tridiag", "Use GPU acceleration for ELPA1 tridiagonalization", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
        INT_ENTRY("gpu_solve_tridi", "Use GPU acceleration for ELPA solve tridi", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
        INT_ENTRY("gpu_trans_ev", "Use GPU acceleration for ELPA1 trans ev", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
155
        INT_ENTRY("real_kernel", "Real kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_REAL_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
156
157
                        number_of_real_kernels, real_kernel_enumerate, \
                        real_kernel_is_valid, real_kernel_name),
158
        INT_ENTRY("complex_kernel", "Complex kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_COMPLEX_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_COMPLEX, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
159
160
                        number_of_complex_kernels, complex_kernel_enumerate, \
                        complex_kernel_is_valid, complex_kernel_name),
161

162
163
164
165
	//INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL),
	INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL),
        //BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_REAL),
        BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_REAL),
166
167
168
169
        BOOL_ENTRY("timings", "Enable time measurement", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
        BOOL_ENTRY("debug", "Emit verbose debugging messages", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
        BOOL_ENTRY("print_flops", "Print FLOP rates on task 0", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
        BOOL_ENTRY("check_pd", "Check eigenvalues to be positive", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
170
171
172
};

#define READONLY_DOUBLE_ENTRY(option_name, option_description) \
173
        { \
174
                BASE_ENTRY(option_name, option_description, 0, 1, 0) \
175
176
177
        }

static const elpa_index_double_entry_t double_entries[] = {
178
        /* Empty for now */
179
};
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
180

181
void elpa_index_free(elpa_index_t index) {
182
183
184
185
186
187
188
#define FREE_OPTION(TYPE, ...) \
        free(index->TYPE##_options.values); \
        free(index->TYPE##_options.is_set); \
        free(index->TYPE##_options.notified);

        FOR_ALL_TYPES(FREE_OPTION);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
189
190
191
192
193
        free(index);
}

static int compar(const void *key, const void *member) {
        const char *name = (const char *) key;
194
        elpa_index_int_entry_t *entry = (elpa_index_int_entry_t *) member;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
195

196
        int l1 = strlen(entry->base.name);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
197
198
199
200
        int l2 = strlen(name);
        if (l1 != l2) {
                return 1;
        }
201
        if (strncmp(name, entry->base.name, l1) == 0) {
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
202
203
204
205
206
207
                return 0;
        } else {
                return 1;
        }
}

208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
#define IMPLEMENT_FIND_ENTRY(TYPE, ...) \
        static int find_##TYPE##_entry(char *name) { \
                elpa_index_##TYPE##_entry_t *entry; \
                size_t nmembers = nelements(TYPE##_entries); \
                entry = lfind((const void*) name, (const void *) TYPE##_entries, &nmembers, sizeof(elpa_index_##TYPE##_entry_t), compar); \
                if (entry) { \
                        return (entry - &TYPE##_entries[0]); \
                } else { \
                        return -1; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_FIND_ENTRY)


#define IMPLEMENT_GETENV(TYPE, PRINTF_SPEC, ...) \
        static int getenv_##TYPE(elpa_index_t index, const char *env_variable, enum NOTIFY_FLAGS notify_flag, int n, TYPE *value, const char *error_string) { \
                int err; \
                char *env_value = getenv(env_variable); \
                if (env_value) { \
227
                        err = elpa_##TYPE##_string_to_value(TYPE##_entries[n].base.name, env_value, value); \
228
229
230
231
232
                        if (err != ELPA_OK) { \
                                fprintf(stderr, "ELPA: Error interpreting environment variable %s with value '%s': %s\n", \
                                                TYPE##_entries[n].base.name, env_value, elpa_strerr(err)); \
                        } else {\
                                const char *value_string = NULL; \
233
                                if (elpa_##TYPE##_value_to_string(TYPE##_entries[n].base.name, *value, &value_string) == ELPA_OK) { \
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
                                        if (!(index->TYPE##_options.notified[n] & notify_flag)) { \
                                                fprintf(stderr, "ELPA: %s '%s' is set to %s due to environment variable %s\n", \
                                                                error_string, TYPE##_entries[n].base.name, value_string, env_variable); \
                                                index->TYPE##_options.notified[n] |= notify_flag; \
                                        } \
                                } else { \
                                        fprintf(stderr, "ELPA: %s '%s' is set to '" PRINTF_SPEC "' due to environment variable %s\n", \
                                                        error_string, TYPE##_entries[n].base.name, *value, env_variable);\
                                } \
                                return 1; \
                        } \
                } \
                return 0; \
        }
FOR_ALL_TYPES(IMPLEMENT_GETENV)


#define IMPLEMENT_GET_FUNCTION(TYPE, PRINTF_SPEC, ERROR_VALUE) \
        TYPE elpa_index_get_##TYPE##_value(elpa_index_t index, char *name, int *error) { \
                TYPE ret; \
254
255
256
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        int from_env = 0; \
                        if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                                from_env = getenv_##TYPE(index, TYPE##_entries[n].base.env_force, NOTIFY_ENV_FORCE, n, &ret, "Option"); \
                        } \
                        if (!from_env) { \
                                ret = index->TYPE##_options.values[n]; \
                        } \
                        if (error != NULL) { \
                                *error = ELPA_OK; \
                        } \
                        return ret; \
                } else { \
                        if (error != NULL) { \
                                *error = ELPA_ERROR_ENTRY_NOT_FOUND; \
                        } \
                        return ERROR_VALUE; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_GET_FUNCTION)


#define IMPLEMENT_LOC_FUNCTION(TYPE, ...) \
        TYPE* elpa_index_get_##TYPE##_loc(elpa_index_t index, char *name) { \
282
283
284
                if (sizeof(TYPE##_entries) == 0) { \
                        return NULL; \
                } \
285
286
287
288
289
290
291
292
293
294
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        return &index->TYPE##_options.values[n]; \
                } else { \
                        return NULL; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_LOC_FUNCTION)


295
#define IMPLEMENT_SET_FUNCTION(TYPE, PRINTF_SPEC, ...) \
296
        int elpa_index_set_##TYPE##_value(elpa_index_t index, char *name, TYPE value) { \
297
298
299
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
300
301
302
303
304
305
                int n = find_##TYPE##_entry(name); \
                if (n < 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                }; \
                if (TYPE##_entries[n].valid != NULL) { \
                        if(!TYPE##_entries[n].valid(index, n, value)) { \
306
                                return ELPA_ERROR_ENTRY_INVALID_VALUE; \
307
308
309
                        }; \
                } \
                if (TYPE##_entries[n].base.once & index->TYPE##_options.is_set[n]) { \
310
311
                        return ELPA_ERROR_ENTRY_ALREADY_SET; \
                } \
312
                if (TYPE##_entries[n].base.readonly) { \
313
                        return ELPA_ERROR_ENTRY_READONLY; \
314
315
316
317
318
319
320
321
322
323
                } \
                index->TYPE##_options.values[n] = value; \
                index->TYPE##_options.is_set[n] = 1; \
                return ELPA_OK; \
        }
FOR_ALL_TYPES(IMPLEMENT_SET_FUNCTION)


#define IMPLEMENT_IS_SET_FUNCTION(TYPE, ...) \
        int elpa_index_##TYPE##_value_is_set(elpa_index_t index, char *name) { \
324
325
326
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        if (index->TYPE##_options.is_set[n]) { \
                                return 1; \
                        } else { \
                                return 0; \
                        } \
                } else { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_IS_SET_FUNCTION)


int elpa_index_value_is_set(elpa_index_t index, char *name) {
        int res = ELPA_ERROR;

#define RET_IF_SET(TYPE, ...) \
        res = elpa_index_##TYPE##_value_is_set(index, name); \
        if (res >= 0) { \
                return res; \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
348
        }
349
350
351

        FOR_ALL_TYPES(RET_IF_SET)

352
353
354
355
        fprintf(stderr, "ELPA Error: Could not find entry '%s'\n", name);
        return res;
}

356
357
358
359
360
int elpa_index_int_is_valid(elpa_index_t index, char *name, int new_value) {
        int n = find_int_entry(name); \
        if (n >= 0) { \
                if (int_entries[n].valid == NULL) {
                        return ELPA_OK;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
361
                } else {
362
                        return int_entries[n].valid(index, n, new_value) ? ELPA_OK : ELPA_ERROR;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
363
364
                }
        }
365
        return ELPA_ERROR_ENTRY_NOT_FOUND;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
366
367
}

368
int elpa_int_value_to_string(char *name, int value, const char **string) {
369
370
371
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
372
        }
373
        if (int_entries[n].to_string == NULL) {
374
                return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
375
376
377
        }
        *string = int_entries[n].to_string(value);
        return ELPA_OK;
378
379
}

380
381

int elpa_int_value_to_strlen(char *name, int value) {
382
        const char *string = NULL;
383
        elpa_int_value_to_string(name, value, &string);
384
        if (string == NULL) {
385
386
387
                return 0;
        } else {
                return strlen(string);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
388
389
        }
}
390

391
392
393
394
395
396

int elpa_index_int_value_to_strlen(elpa_index_t index, char *name) {
        int n = find_int_entry(name);
        if (n < 0) {
                return 0;
        }
397
        return elpa_int_value_to_strlen(name, index->int_options.values[n]);
398
399
400
401
}


int elpa_int_string_to_value(char *name, char *string, int *value) {
402
403
404
405
406
407
408
409
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }

        if (int_entries[n].to_string == NULL) {
                int val, ret;
                ret = sscanf(string, "%d", &val);
410
                if (ret == 1) {
411
                        *value = val;
412
413
                        return ELPA_OK;
                } else {
414
                        return ELPA_ERROR_ENTRY_INVALID_VALUE;
415
416
417
418
419
420
421
422
                }
        }

        for (int i = 0; i < int_entries[n].cardinality(); i++) {
                int candidate = int_entries[n].enumerate(i);
                if (strcmp(string, int_entries[n].to_string(candidate)) == 0) {
                        *value = candidate;
                        return ELPA_OK;
423
                }
424
        }
425
        return ELPA_ERROR_ENTRY_INVALID_VALUE;
426
427
}

428
int elpa_double_string_to_value(char *name, char *string, double *value) {
429
430
        double val;
        int ret = sscanf(string, "%lf", &val);
431
        if (ret == 1) {
432
433
                *value = val;
                return ELPA_OK;
434
        } else {
435
436
                /* \todo: remove */
                fprintf(stderr, "ELPA: DEBUG: Could not parse double value '%s' for option '%s'\n", string, name);
437
                return ELPA_ERROR_ENTRY_INVALID_VALUE;
438
439
440
        }
}

441
int elpa_double_value_to_string(char *name, double value, const char **string) {
442
        return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
443
}
444

445
int elpa_option_cardinality(char *name) {
446
447
448
449
450
451
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].cardinality) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }
        return int_entries[n].cardinality();
}
452

453
int elpa_option_enumerate(char *name, int i) {
454
455
456
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].enumerate) {
                return 0;
457
        }
458
        return int_entries[n].enumerate(i);
459
460
}

461

462
/* Helper functions for simple int entries */
463
464
465
static int cardinality_bool(void) {
        return 2;
}
466

467
468
static int valid_bool(elpa_index_t index, int n, int new_value) {
        return (0 <= new_value) && (new_value < 2);
469
470
}

471
static int enumerate_identity(int i) {
472
473
474
        return i;
}

475
476
477
478
479
480
481
482
483
484
/* Helper functions for specific options */

#define NAME_CASE(name, value, ...) \
        case value: \
                return #name;

#define VALID_CASE(name, value) \
        case value: \
                return 1;

485
#define VALID_CASE_3(name, value, available, other_checks) \
486
        case value: \
487
                return available && (other_checks(value));
488
489
490
491
492
493

static const char* elpa_solver_name(int solver) {
        switch(solver) {
                ELPA_FOR_ALL_SOLVERS(NAME_CASE)
                default:
                        return "(Invalid solver)";
494
495
496
        }
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
497
static int number_of_solvers() {
498
        return ELPA_NUMBER_OF_SOLVERS;
499
500
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
501
static int solver_enumerate(int i) {
502
#define OPTION_RANK(name, value, ...) \
503
        +(value >= sizeof(array_of_size_value)/sizeof(int) ? 0 : 1)
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520

#define EMPTY()
#define DEFER1(m) m EMPTY()
#define EVAL(...) __VA_ARGS__

#define ENUMERATE_CASE(name, value, ...) \
        { const int array_of_size_value[value]; \
        case 0 DEFER1(INNER_ITERATOR)()(OPTION_RANK): \
                return value; }

        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_SOLVERS
                EVAL(ELPA_FOR_ALL_SOLVERS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
521
522
523
}


Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
524
static int solver_is_valid(elpa_index_t index, int n, int new_value) {
525
526
527
528
529
        switch(new_value) {
                ELPA_FOR_ALL_SOLVERS(VALID_CASE)
                default:
                        return 0;
        }
530
531
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
532
static int number_of_real_kernels() {
533
534
        return ELPA_2STAGE_NUMBER_OF_REAL_KERNELS;
}
535

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
536
static int real_kernel_enumerate(int i) {
537
538
539
540
541
542
543
544
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_REAL_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_REAL_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}
545

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
546
static const char *real_kernel_name(int kernel) {
547
548
549
550
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(NAME_CASE)
                default:
                        return "(Invalid real kernel)";
551
        }
552
}
553

554
555
556
#define REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_REAL_GPU ? gpu_is_active : 1

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
557
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value) {
558
559
560
561
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_REAL_DEFAULT;
        }
562
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
563
        switch(new_value) {
564
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(VALID_CASE_3, REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
565
566
                default:
                        return 0;
567
        }
568
}
569

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
570
static int number_of_complex_kernels() {
571
572
        return ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS;
}
573

574

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
575
static int complex_kernel_enumerate(int i) {
576
577
578
579
580
581
582
583
584
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
585
static const char *complex_kernel_name(int kernel) {
586
587
588
589
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(NAME_CASE)
                default:
                        return "(Invalid complex kernel)";
590
        }
591
}
592

593
594
595
#define COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_COMPLEX_GPU ? gpu_is_active : 1

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
596
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value) {
597
598
599
600
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_COMPLEX_DEFAULT;
        }
601
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
602
        switch(new_value) {
603
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(VALID_CASE_3, COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
604
605
606
607
                default:
                        return 0;
        }
}
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
608
609
610
611
612

static int na_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

613
614
615
616
static int nev_is_valid(elpa_index_t index, int n, int new_value) {
        if (!elpa_index_int_value_is_set(index, "na")) {
                return 0;
        }
617
        return 0 <= new_value && new_value <= elpa_index_get_int_value(index, "na", NULL);
618
619
620
621
622
623
}

static int is_positive(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
624
625
626
627
628
629
630
631
632
static int bw_is_valid(elpa_index_t index, int n, int new_value) {
        int na;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }

        na = elpa_index_get_int_value(index, "na", NULL);
        return (0 <= new_value) && (new_value < na);
}
633

634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
static int gpu_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value == 0 || new_value == 1;
}

static int band_to_full_cardinality() {
        /* TODO */
        fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
        abort();
}

static int band_to_full_enumerate(int i) {
        /* TODO */
        fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
        abort();
}

static int band_to_full_is_valid(elpa_index_t index, int n, int new_value) {
        /* TODO */
        fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
        abort();
}

656
elpa_index_t elpa_index_instance() {
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
        elpa_index_t index = (elpa_index_t) calloc(1, sizeof(struct elpa_index_struct));

#define ALLOCATE(TYPE, PRINTF_SPEC, ...) \
        index->TYPE##_options.values = (TYPE*) calloc(nelements(TYPE##_entries), sizeof(TYPE)); \
        index->TYPE##_options.is_set = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        index->TYPE##_options.notified = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        for (int n = 0; n < nelements(TYPE##_entries); n++) { \
                TYPE default_value = TYPE##_entries[n].default_value; \
                if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                        getenv_##TYPE(index, TYPE##_entries[n].base.env_default, NOTIFY_ENV_DEFAULT, n, &default_value, "Default for option"); \
                } \
                index->TYPE##_options.values[n] = default_value; \
        }

        FOR_ALL_TYPES(ALLOCATE)

        return index;
674
}
675

676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
static int is_tunable(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
        return (int_entries[i].autotune_level != 0) &&
               (int_entries[i].autotune_level <= autotune_level) &&
               (int_entries[i].autotune_domain & autotune_domain) &&
               (!index->int_options.is_set[i]);
}

int elpa_index_autotune_cardinality(elpa_index_t index, int autotune_level, int autotune_domain) {
        int N = 1;

        for (int i = 0; i < nelements(int_entries); i++) { \
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
                        N *= int_entries[i].cardinality();
                }
        }
        return N;
}

int elpa_index_set_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int n) {
        int debug = elpa_index_get_int_value(index, "debug", NULL);
        for (int i = 0; i < nelements(int_entries); i++) {
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
                        int value = int_entries[i].enumerate(n % int_entries[i].cardinality());
                        /* Try to set option i to that value */
                        if (int_entries[i].valid(index, i, value)) {
                                index->int_options.values[i] = value;
                        } else {
                                return 0;
                        }
                        n /= int_entries[i].cardinality();
                }
        }
        if (debug == 1) {
                for (int i = 0; i < nelements(int_entries); i++) {
                        if (is_tunable(index, i, autotune_level, autotune_domain)) {
                                fprintf(stderr, "%s = ", int_entries[i].base.name);
                                if (int_entries[i].to_string) {
                                        fprintf(stderr, "%s\n", int_entries[i].to_string(index->int_options.values[i]));
                                } else {
                                        fprintf(stderr, "%d\n", index->int_options.values[i]);
                                }
                        }
                }
                fprintf(stderr, "\n");
        }

        /* Could set all values */
        return 1;
}