elpa_index.c 27.8 KB
Newer Older
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
//    This file is part of ELPA.
//
//    The ELPA library was originally created by the ELPA consortium,
//    consisting of the following organizations:
//
//    - Max Planck Computing and Data Facility (MPCDF), formerly known as
//      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
//    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
//      Informatik,
//    - Technische Universität München, Lehrstuhl für Informatik mit
//      Schwerpunkt Wissenschaftliches Rechnen ,
//    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
//    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
//      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
//      and
//    - IBM Deutschland GmbH
//
//    This particular source code file contains additions, changes and
//    enhancements authored by Intel Corporation which is not part of
//    the ELPA consortium.
//
//    More information can be found here:
//    http://elpa.mpcdf.mpg.de/
//
//    ELPA is free software: you can redistribute it and/or modify
//    it under the terms of the version 3 of the license of the
//    GNU Lesser General Public License as published by the Free
//    Software Foundation.
//
//    ELPA is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU Lesser General Public License for more details.
//
//    You should have received a copy of the GNU Lesser General Public License
//    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
//
//    ELPA reflects a substantial effort on the part of the original
//    ELPA consortium, and we ask you to respect the spirit of the
//    license that we chose: i.e., please contribute any changes you
//    may have back to the original ELPA library distribution, and keep
//    any derivatives of ELPA under the same license that we chose for
//    the original distribution, the GNU Lesser General Public License.
//
//    Authors: L. Huedepohl and A. Marek, MPCDF
46
#include <elpa/elpa.h>
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
47
#include "elpa_index.h"
48

49
50
51
52
53
54
#include <execinfo.h>

static int enumerate_identity(int i);
static int cardinality_bool(void);
static int valid_bool(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
55
56
57
static int number_of_solvers();
static int solver_enumerate(int i);
static int solver_is_valid(elpa_index_t index, int n, int new_value);
58
59
static const char* elpa_solver_name(int solver);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
60
61
62
63
static int number_of_real_kernels();
static int real_kernel_enumerate(int i);
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *real_kernel_name(int kernel);
64

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
65
66
67
68
static int number_of_complex_kernels();
static int complex_kernel_enumerate(int i);
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *complex_kernel_name(int kernel);
69

70
71
72
73
static int band_to_full_cardinality();
static int band_to_full_enumerate(int i);
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
74
static int na_is_valid(elpa_index_t index, int n, int new_value);
75
static int nev_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
76
static int bw_is_valid(elpa_index_t index, int n, int new_value);
77
static int gpu_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
78

79
80
static int is_positive(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
81
82
static int elpa_double_string_to_value(char *name, char *string, double *value);
static int elpa_double_value_to_string(char *name, double value, const char **string);
83

84
#define BASE_ENTRY(option_name, option_description, once_value, readonly_value) \
85
86
87
88
89
90
91
92
                .base = { \
                        .name = option_name, \
                        .description = option_description, \
                        .once = once_value, \
                        .readonly = readonly_value, \
                        .env_default = "ELPA_DEFAULT_" option_name, \
                        .env_force = "ELPA_FORCE_" option_name, \
                }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
93

94
#define INT_PARAMETER_ENTRY(option_name, option_description, valid_func) \
95
        { \
96
                BASE_ENTRY(option_name, option_description, 1, 0), \
97
                .valid = valid_func, \
98
        }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
99

100
#define BOOL_ENTRY(option_name, option_description, default, tune_level, tune_domain) \
101
        { \
102
                BASE_ENTRY(option_name, option_description, 0, 0), \
103
                .default_value = default, \
104
105
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
106
107
108
                .cardinality = cardinality_bool, \
                .enumerate = enumerate_identity, \
                .valid = valid_bool, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
109
110
        }

111
#define INT_ENTRY(option_name, option_description, default, tune_level, tune_domain, card_func, enumerate_func, valid_func, to_string_func) \
112
        { \
113
                BASE_ENTRY(option_name, option_description, 0, 0), \
114
                .default_value = default, \
115
116
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
117
118
119
120
                .cardinality = card_func, \
                .enumerate = enumerate_func, \
                .valid = valid_func, \
                .to_string = to_string_func, \
121
122
        }

123
#define INT_ANY_ENTRY(option_name, option_description) \
124
        { \
125
                BASE_ENTRY(option_name, option_description, 0, 0), \
126
127
        }

128
129
/* The order here is important! Tunable options that are dependent on other
 * tunable options must appear later in the list than their prerequisites */
130
static const elpa_index_int_entry_t int_entries[] = {
131
132
133
134
135
136
137
138
        INT_PARAMETER_ENTRY("na", "Global matrix has size (na * na)", na_is_valid),
        INT_PARAMETER_ENTRY("nev", "Number of eigenvectors to be computed, 0 <= nev <= na", nev_is_valid),
        INT_PARAMETER_ENTRY("nblk", "Block size of scalapack block-cyclic distribution", is_positive),
        INT_PARAMETER_ENTRY("local_nrows", "Number of matrix rows stored on this process", NULL),
        INT_PARAMETER_ENTRY("local_ncols", "Number of matrix columns stored on this process", NULL),
        INT_PARAMETER_ENTRY("process_row", "Process row number in the 2D domain decomposition", NULL),
        INT_PARAMETER_ENTRY("process_col", "Process column number in the 2D domain decomposition", NULL),
        INT_PARAMETER_ENTRY("bandwidth", "If specified, a band matrix with this bandwidth is expected as input; bandwidth must be multiply of nblk", bw_is_valid),
139
140
141
        INT_ANY_ENTRY("mpi_comm_rows", "Communicator for inter-row communication"),
        INT_ANY_ENTRY("mpi_comm_cols", "Communicator for inter-column communication"),
        INT_ANY_ENTRY("mpi_comm_parent", "Parent communicator"),
142
        INT_ENTRY("solver", "Solver to use", ELPA_SOLVER_1STAGE, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
143
                        number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name),
144
145
146
        INT_ENTRY("gpu", "Use GPU acceleration", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, gpu_is_valid, NULL),
        INT_ENTRY("real_kernel", "Real kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_REAL_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
147
148
                        number_of_real_kernels, real_kernel_enumerate, \
                        real_kernel_is_valid, real_kernel_name),
149
        INT_ENTRY("complex_kernel", "Complex kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_COMPLEX_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_COMPLEX, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
150
151
                        number_of_complex_kernels, complex_kernel_enumerate, \
                        complex_kernel_is_valid, complex_kernel_name),
152

153
154
155
156
157
158
	INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL),
        BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_REAL),
        BOOL_ENTRY("timings", "Enable time measurement", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
        BOOL_ENTRY("debug", "Emit verbose debugging messages", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
        BOOL_ENTRY("print_flops", "Print FLOP rates on task 0", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
        BOOL_ENTRY("check_pd", "Check eigenvalues to be positive", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
159
160
161
};

#define READONLY_DOUBLE_ENTRY(option_name, option_description) \
162
        { \
163
                BASE_ENTRY(option_name, option_description, 0, 1, 0) \
164
165
166
        }

static const elpa_index_double_entry_t double_entries[] = {
167
        /* Empty for now */
168
};
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
169

170
void elpa_index_free(elpa_index_t index) {
171
172
173
174
175
176
177
#define FREE_OPTION(TYPE, ...) \
        free(index->TYPE##_options.values); \
        free(index->TYPE##_options.is_set); \
        free(index->TYPE##_options.notified);

        FOR_ALL_TYPES(FREE_OPTION);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
178
179
180
181
182
        free(index);
}

static int compar(const void *key, const void *member) {
        const char *name = (const char *) key;
183
        elpa_index_int_entry_t *entry = (elpa_index_int_entry_t *) member;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
184

185
        int l1 = strlen(entry->base.name);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
186
187
188
189
        int l2 = strlen(name);
        if (l1 != l2) {
                return 1;
        }
190
        if (strncmp(name, entry->base.name, l1) == 0) {
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
191
192
193
194
195
196
                return 0;
        } else {
                return 1;
        }
}

197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
#define IMPLEMENT_FIND_ENTRY(TYPE, ...) \
        static int find_##TYPE##_entry(char *name) { \
                elpa_index_##TYPE##_entry_t *entry; \
                size_t nmembers = nelements(TYPE##_entries); \
                entry = lfind((const void*) name, (const void *) TYPE##_entries, &nmembers, sizeof(elpa_index_##TYPE##_entry_t), compar); \
                if (entry) { \
                        return (entry - &TYPE##_entries[0]); \
                } else { \
                        return -1; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_FIND_ENTRY)


#define IMPLEMENT_GETENV(TYPE, PRINTF_SPEC, ...) \
        static int getenv_##TYPE(elpa_index_t index, const char *env_variable, enum NOTIFY_FLAGS notify_flag, int n, TYPE *value, const char *error_string) { \
                int err; \
                char *env_value = getenv(env_variable); \
                if (env_value) { \
216
                        err = elpa_##TYPE##_string_to_value(TYPE##_entries[n].base.name, env_value, value); \
217
218
219
220
221
                        if (err != ELPA_OK) { \
                                fprintf(stderr, "ELPA: Error interpreting environment variable %s with value '%s': %s\n", \
                                                TYPE##_entries[n].base.name, env_value, elpa_strerr(err)); \
                        } else {\
                                const char *value_string = NULL; \
222
                                if (elpa_##TYPE##_value_to_string(TYPE##_entries[n].base.name, *value, &value_string) == ELPA_OK) { \
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
                                        if (!(index->TYPE##_options.notified[n] & notify_flag)) { \
                                                fprintf(stderr, "ELPA: %s '%s' is set to %s due to environment variable %s\n", \
                                                                error_string, TYPE##_entries[n].base.name, value_string, env_variable); \
                                                index->TYPE##_options.notified[n] |= notify_flag; \
                                        } \
                                } else { \
                                        fprintf(stderr, "ELPA: %s '%s' is set to '" PRINTF_SPEC "' due to environment variable %s\n", \
                                                        error_string, TYPE##_entries[n].base.name, *value, env_variable);\
                                } \
                                return 1; \
                        } \
                } \
                return 0; \
        }
FOR_ALL_TYPES(IMPLEMENT_GETENV)


#define IMPLEMENT_GET_FUNCTION(TYPE, PRINTF_SPEC, ERROR_VALUE) \
        TYPE elpa_index_get_##TYPE##_value(elpa_index_t index, char *name, int *error) { \
                TYPE ret; \
243
244
245
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        int from_env = 0; \
                        if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                                from_env = getenv_##TYPE(index, TYPE##_entries[n].base.env_force, NOTIFY_ENV_FORCE, n, &ret, "Option"); \
                        } \
                        if (!from_env) { \
                                ret = index->TYPE##_options.values[n]; \
                        } \
                        if (error != NULL) { \
                                *error = ELPA_OK; \
                        } \
                        return ret; \
                } else { \
                        if (error != NULL) { \
                                *error = ELPA_ERROR_ENTRY_NOT_FOUND; \
                        } \
                        return ERROR_VALUE; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_GET_FUNCTION)


#define IMPLEMENT_LOC_FUNCTION(TYPE, ...) \
        TYPE* elpa_index_get_##TYPE##_loc(elpa_index_t index, char *name) { \
271
272
273
                if (sizeof(TYPE##_entries) == 0) { \
                        return NULL; \
                } \
274
275
276
277
278
279
280
281
282
283
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        return &index->TYPE##_options.values[n]; \
                } else { \
                        return NULL; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_LOC_FUNCTION)


284
#define IMPLEMENT_SET_FUNCTION(TYPE, PRINTF_SPEC, ...) \
285
        int elpa_index_set_##TYPE##_value(elpa_index_t index, char *name, TYPE value) { \
286
287
288
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
289
290
291
292
293
294
                int n = find_##TYPE##_entry(name); \
                if (n < 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                }; \
                if (TYPE##_entries[n].valid != NULL) { \
                        if(!TYPE##_entries[n].valid(index, n, value)) { \
295
                                return ELPA_ERROR_ENTRY_INVALID_VALUE; \
296
297
298
                        }; \
                } \
                if (TYPE##_entries[n].base.once & index->TYPE##_options.is_set[n]) { \
299
300
                        return ELPA_ERROR_ENTRY_ALREADY_SET; \
                } \
301
                if (TYPE##_entries[n].base.readonly) { \
302
                        return ELPA_ERROR_ENTRY_READONLY; \
303
304
305
306
307
308
309
310
311
312
                } \
                index->TYPE##_options.values[n] = value; \
                index->TYPE##_options.is_set[n] = 1; \
                return ELPA_OK; \
        }
FOR_ALL_TYPES(IMPLEMENT_SET_FUNCTION)


#define IMPLEMENT_IS_SET_FUNCTION(TYPE, ...) \
        int elpa_index_##TYPE##_value_is_set(elpa_index_t index, char *name) { \
313
314
315
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        if (index->TYPE##_options.is_set[n]) { \
                                return 1; \
                        } else { \
                                return 0; \
                        } \
                } else { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_IS_SET_FUNCTION)


int elpa_index_value_is_set(elpa_index_t index, char *name) {
        int res = ELPA_ERROR;

#define RET_IF_SET(TYPE, ...) \
        res = elpa_index_##TYPE##_value_is_set(index, name); \
        if (res >= 0) { \
                return res; \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
337
        }
338
339
340

        FOR_ALL_TYPES(RET_IF_SET)

341
342
343
344
        fprintf(stderr, "ELPA Error: Could not find entry '%s'\n", name);
        return res;
}

345
346
347
348
349
int elpa_index_int_is_valid(elpa_index_t index, char *name, int new_value) {
        int n = find_int_entry(name); \
        if (n >= 0) { \
                if (int_entries[n].valid == NULL) {
                        return ELPA_OK;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
350
                } else {
351
                        return int_entries[n].valid(index, n, new_value) ? ELPA_OK : ELPA_ERROR;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
352
353
                }
        }
354
        return ELPA_ERROR_ENTRY_NOT_FOUND;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
355
356
}

357
int elpa_int_value_to_string(char *name, int value, const char **string) {
358
359
360
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
361
        }
362
        if (int_entries[n].to_string == NULL) {
363
                return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
364
365
366
        }
        *string = int_entries[n].to_string(value);
        return ELPA_OK;
367
368
}

369
370

int elpa_int_value_to_strlen(char *name, int value) {
371
        const char *string = NULL;
372
        elpa_int_value_to_string(name, value, &string);
373
        if (string == NULL) {
374
375
376
                return 0;
        } else {
                return strlen(string);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
377
378
        }
}
379

380
381
382
383
384
385

int elpa_index_int_value_to_strlen(elpa_index_t index, char *name) {
        int n = find_int_entry(name);
        if (n < 0) {
                return 0;
        }
386
        return elpa_int_value_to_strlen(name, index->int_options.values[n]);
387
388
389
390
}


int elpa_int_string_to_value(char *name, char *string, int *value) {
391
392
393
394
395
396
397
398
399
400
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }

        if (int_entries[n].to_string == NULL) {
                int val, ret;
                ret = sscanf(string, "%d", &val);
                if (ret == strlen(string)) {
                        *value = val;
401
402
                        return ELPA_OK;
                } else {
403
                        return ELPA_ERROR_ENTRY_INVALID_VALUE;
404
405
406
407
408
409
410
411
                }
        }

        for (int i = 0; i < int_entries[n].cardinality(); i++) {
                int candidate = int_entries[n].enumerate(i);
                if (strcmp(string, int_entries[n].to_string(candidate)) == 0) {
                        *value = candidate;
                        return ELPA_OK;
412
                }
413
        }
414
        return ELPA_ERROR_ENTRY_INVALID_VALUE;
415
416
}

417
int elpa_double_string_to_value(char *name, char *string, double *value) {
418
419
420
421
422
        double val;
        int ret = sscanf(string, "%lf", &val);
        if (ret == strlen(string)) {
                *value = val;
                return ELPA_OK;
423
        } else {
424
425
                /* \todo: remove */
                fprintf(stderr, "ELPA: DEBUG: Could not parse double value '%s' for option '%s'\n", string, name);
426
                return ELPA_ERROR_ENTRY_INVALID_VALUE;
427
428
429
        }
}

430
int elpa_double_value_to_string(char *name, double value, const char **string) {
431
        return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
432
}
433

434
int elpa_option_cardinality(char *name) {
435
436
437
438
439
440
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].cardinality) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }
        return int_entries[n].cardinality();
}
441

442
int elpa_option_enumerate(char *name, int i) {
443
444
445
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].enumerate) {
                return 0;
446
        }
447
        return int_entries[n].enumerate(i);
448
449
}

450

451
/* Helper functions for simple int entries */
452
453
454
static int cardinality_bool(void) {
        return 2;
}
455

456
457
static int valid_bool(elpa_index_t index, int n, int new_value) {
        return (0 <= new_value) && (new_value < 2);
458
459
}

460
static int enumerate_identity(int i) {
461
462
463
        return i;
}

464
465
466
467
468
469
470
471
472
473
/* Helper functions for specific options */

#define NAME_CASE(name, value, ...) \
        case value: \
                return #name;

#define VALID_CASE(name, value) \
        case value: \
                return 1;

474
#define VALID_CASE_3(name, value, available, other_checks) \
475
        case value: \
476
                return available && (other_checks(value));
477
478
479
480
481
482

static const char* elpa_solver_name(int solver) {
        switch(solver) {
                ELPA_FOR_ALL_SOLVERS(NAME_CASE)
                default:
                        return "(Invalid solver)";
483
484
485
        }
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
486
static int number_of_solvers() {
487
        return ELPA_NUMBER_OF_SOLVERS;
488
489
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
490
static int solver_enumerate(int i) {
491
#define OPTION_RANK(name, value, ...) \
492
        +(value >= sizeof(array_of_size_value)/sizeof(int) ? 0 : 1)
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509

#define EMPTY()
#define DEFER1(m) m EMPTY()
#define EVAL(...) __VA_ARGS__

#define ENUMERATE_CASE(name, value, ...) \
        { const int array_of_size_value[value]; \
        case 0 DEFER1(INNER_ITERATOR)()(OPTION_RANK): \
                return value; }

        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_SOLVERS
                EVAL(ELPA_FOR_ALL_SOLVERS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
510
511
512
}


Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
513
static int solver_is_valid(elpa_index_t index, int n, int new_value) {
514
515
516
517
518
        switch(new_value) {
                ELPA_FOR_ALL_SOLVERS(VALID_CASE)
                default:
                        return 0;
        }
519
520
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
521
static int number_of_real_kernels() {
522
523
        return ELPA_2STAGE_NUMBER_OF_REAL_KERNELS;
}
524

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
525
static int real_kernel_enumerate(int i) {
526
527
528
529
530
531
532
533
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_REAL_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_REAL_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}
534

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
535
static const char *real_kernel_name(int kernel) {
536
537
538
539
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(NAME_CASE)
                default:
                        return "(Invalid real kernel)";
540
        }
541
}
542

543
544
545
#define REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_REAL_GPU ? gpu_is_active : 1

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
546
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value) {
547
548
549
550
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_REAL_DEFAULT;
        }
551
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
552
        switch(new_value) {
553
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(VALID_CASE_3, REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
554
555
                default:
                        return 0;
556
        }
557
}
558

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
559
static int number_of_complex_kernels() {
560
561
        return ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS;
}
562

563

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
564
static int complex_kernel_enumerate(int i) {
565
566
567
568
569
570
571
572
573
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
574
static const char *complex_kernel_name(int kernel) {
575
576
577
578
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(NAME_CASE)
                default:
                        return "(Invalid complex kernel)";
579
        }
580
}
581

582
583
584
#define COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_COMPLEX_GPU ? gpu_is_active : 1

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
585
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value) {
586
587
588
589
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_COMPLEX_DEFAULT;
        }
590
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
591
        switch(new_value) {
592
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(VALID_CASE_3, COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
593
594
595
596
                default:
                        return 0;
        }
}
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
597
598
599
600
601

static int na_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

602
603
604
605
static int nev_is_valid(elpa_index_t index, int n, int new_value) {
        if (!elpa_index_int_value_is_set(index, "na")) {
                return 0;
        }
606
        return 0 <= new_value && new_value <= elpa_index_get_int_value(index, "na", NULL);
607
608
609
610
611
612
}

static int is_positive(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
613
614
615
616
617
618
619
620
621
static int bw_is_valid(elpa_index_t index, int n, int new_value) {
        int na;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }

        na = elpa_index_get_int_value(index, "na", NULL);
        return (0 <= new_value) && (new_value < na);
}
622

623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
static int gpu_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value == 0 || new_value == 1;
}

static int band_to_full_cardinality() {
        /* TODO */
        fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
        abort();
}

static int band_to_full_enumerate(int i) {
        /* TODO */
        fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
        abort();
}

static int band_to_full_is_valid(elpa_index_t index, int n, int new_value) {
        /* TODO */
        fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
        abort();
}

645
elpa_index_t elpa_index_instance() {
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
        elpa_index_t index = (elpa_index_t) calloc(1, sizeof(struct elpa_index_struct));

#define ALLOCATE(TYPE, PRINTF_SPEC, ...) \
        index->TYPE##_options.values = (TYPE*) calloc(nelements(TYPE##_entries), sizeof(TYPE)); \
        index->TYPE##_options.is_set = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        index->TYPE##_options.notified = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        for (int n = 0; n < nelements(TYPE##_entries); n++) { \
                TYPE default_value = TYPE##_entries[n].default_value; \
                if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                        getenv_##TYPE(index, TYPE##_entries[n].base.env_default, NOTIFY_ENV_DEFAULT, n, &default_value, "Default for option"); \
                } \
                index->TYPE##_options.values[n] = default_value; \
        }

        FOR_ALL_TYPES(ALLOCATE)

        return index;
663
}
664

665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
static int is_tunable(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
        return (int_entries[i].autotune_level != 0) &&
               (int_entries[i].autotune_level <= autotune_level) &&
               (int_entries[i].autotune_domain & autotune_domain) &&
               (!index->int_options.is_set[i]);
}

int elpa_index_autotune_cardinality(elpa_index_t index, int autotune_level, int autotune_domain) {
        int N = 1;

        for (int i = 0; i < nelements(int_entries); i++) { \
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
                        N *= int_entries[i].cardinality();
                }
        }
        return N;
}

int elpa_index_set_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int n) {
        int debug = elpa_index_get_int_value(index, "debug", NULL);
        for (int i = 0; i < nelements(int_entries); i++) {
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
                        int value = int_entries[i].enumerate(n % int_entries[i].cardinality());
                        /* Try to set option i to that value */
                        if (int_entries[i].valid(index, i, value)) {
                                index->int_options.values[i] = value;
                        } else {
                                return 0;
                        }
                        n /= int_entries[i].cardinality();
                }
        }
        if (debug == 1) {
                for (int i = 0; i < nelements(int_entries); i++) {
                        if (is_tunable(index, i, autotune_level, autotune_domain)) {
                                fprintf(stderr, "%s = ", int_entries[i].base.name);
                                if (int_entries[i].to_string) {
                                        fprintf(stderr, "%s\n", int_entries[i].to_string(index->int_options.values[i]));
                                } else {
                                        fprintf(stderr, "%d\n", index->int_options.values[i]);
                                }
                        }
                }
                fprintf(stderr, "\n");
        }

        /* Could set all values */
        return 1;
}