elpa_index.c 34.3 KB
Newer Older
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
//    This file is part of ELPA.
//
//    The ELPA library was originally created by the ELPA consortium,
//    consisting of the following organizations:
//
//    - Max Planck Computing and Data Facility (MPCDF), formerly known as
//      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
//    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
//      Informatik,
//    - Technische Universität München, Lehrstuhl für Informatik mit
//      Schwerpunkt Wissenschaftliches Rechnen ,
//    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
//    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
//      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
//      and
//    - IBM Deutschland GmbH
//
//    This particular source code file contains additions, changes and
//    enhancements authored by Intel Corporation which is not part of
//    the ELPA consortium.
//
//    More information can be found here:
//    http://elpa.mpcdf.mpg.de/
//
//    ELPA is free software: you can redistribute it and/or modify
//    it under the terms of the version 3 of the license of the
//    GNU Lesser General Public License as published by the Free
//    Software Foundation.
//
//    ELPA is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU Lesser General Public License for more details.
//
//    You should have received a copy of the GNU Lesser General Public License
//    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
//
//    ELPA reflects a substantial effort on the part of the original
//    ELPA consortium, and we ask you to respect the spirit of the
//    license that we chose: i.e., please contribute any changes you
//    may have back to the original ELPA library distribution, and keep
//    any derivatives of ELPA under the same license that we chose for
//    the original distribution, the GNU Lesser General Public License.
//
//    Authors: L. Huedepohl and A. Marek, MPCDF
46
#include <elpa/elpa.h>
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
47
#include "elpa_index.h"
48

49
50
#include <execinfo.h>

Andreas Marek's avatar
Andreas Marek committed
51
52
53
54
55
56
57
58
59
#include "config.h"

#ifdef WITH_OPENMP
#include <omp.h>
#endif

int max_threads_glob;
int set_max_threads_glob=0;

60
61
static int enumerate_identity(elpa_index_t index, int i);
static int cardinality_bool(elpa_index_t index);
62
63
static int valid_bool(elpa_index_t index, int n, int new_value);

64
65
static int number_of_solvers(elpa_index_t index);
static int solver_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
66
static int solver_is_valid(elpa_index_t index, int n, int new_value);
67
68
static const char* elpa_solver_name(int solver);

69
70
static int number_of_real_kernels(elpa_index_t index);
static int real_kernel_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
71
72
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *real_kernel_name(int kernel);
73

74
75
static int number_of_complex_kernels(elpa_index_t index);
static int complex_kernel_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
76
77
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *complex_kernel_name(int kernel);
78

79
80
static int band_to_full_cardinality(elpa_index_t index);
static int band_to_full_enumerate(elpa_index_t index, int i);
81
82
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value);

83
84
85
static int elpa_omp_threads_cardinality(elpa_index_t index);
static int elpa_omp_threads_enumerate(elpa_index_t index, int i);
static int elpa_omp_threads_is_valid(elpa_index_t index, int n, int new_value);
Andreas Marek's avatar
Andreas Marek committed
86

87
88
static int min_tile_size_cardinality(elpa_index_t index);
static int intermediate_bandwidth_cardinality(elpa_index_t index);
89

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
90
static int na_is_valid(elpa_index_t index, int n, int new_value);
91
static int nev_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
92
static int bw_is_valid(elpa_index_t index, int n, int new_value);
93
static int gpu_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
94

95
96
static int is_positive(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
97
98
static int elpa_double_string_to_value(char *name, char *string, double *value);
static int elpa_double_value_to_string(char *name, double value, const char **string);
99

100
#define BASE_ENTRY(option_name, option_description, once_value, readonly_value) \
101
102
103
104
105
106
107
108
                .base = { \
                        .name = option_name, \
                        .description = option_description, \
                        .once = once_value, \
                        .readonly = readonly_value, \
                        .env_default = "ELPA_DEFAULT_" option_name, \
                        .env_force = "ELPA_FORCE_" option_name, \
                }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
109

110
#define INT_PARAMETER_ENTRY(option_name, option_description, valid_func) \
111
        { \
112
                BASE_ENTRY(option_name, option_description, 1, 0), \
113
                .valid = valid_func, \
114
        }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
115

116
#define BOOL_ENTRY(option_name, option_description, default, tune_level, tune_domain) \
117
        { \
118
                BASE_ENTRY(option_name, option_description, 0, 0), \
119
                .default_value = default, \
120
121
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
122
123
124
                .cardinality = cardinality_bool, \
                .enumerate = enumerate_identity, \
                .valid = valid_bool, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
125
126
        }

127
#define INT_ENTRY(option_name, option_description, default, tune_level, tune_domain, card_func, enumerate_func, valid_func, to_string_func) \
128
        { \
129
                BASE_ENTRY(option_name, option_description, 0, 0), \
130
                .default_value = default, \
131
132
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
133
134
135
136
                .cardinality = card_func, \
                .enumerate = enumerate_func, \
                .valid = valid_func, \
                .to_string = to_string_func, \
137
138
        }

139
#define INT_ANY_ENTRY(option_name, option_description) \
140
        { \
141
                BASE_ENTRY(option_name, option_description, 0, 0), \
142
143
        }

144
145
/* The order here is important! Tunable options that are dependent on other
 * tunable options must appear later in the list than their prerequisites */
146
static const elpa_index_int_entry_t int_entries[] = {
147
148
149
150
151
152
153
154
        INT_PARAMETER_ENTRY("na", "Global matrix has size (na * na)", na_is_valid),
        INT_PARAMETER_ENTRY("nev", "Number of eigenvectors to be computed, 0 <= nev <= na", nev_is_valid),
        INT_PARAMETER_ENTRY("nblk", "Block size of scalapack block-cyclic distribution", is_positive),
        INT_PARAMETER_ENTRY("local_nrows", "Number of matrix rows stored on this process", NULL),
        INT_PARAMETER_ENTRY("local_ncols", "Number of matrix columns stored on this process", NULL),
        INT_PARAMETER_ENTRY("process_row", "Process row number in the 2D domain decomposition", NULL),
        INT_PARAMETER_ENTRY("process_col", "Process column number in the 2D domain decomposition", NULL),
        INT_PARAMETER_ENTRY("bandwidth", "If specified, a band matrix with this bandwidth is expected as input; bandwidth must be multiply of nblk", bw_is_valid),
155
        INT_PARAMETER_ENTRY("suppress_warnings", "If specified, warnings will NOT be printed on this mpi rank", NULL),
156
157
158
        INT_ANY_ENTRY("mpi_comm_rows", "Communicator for inter-row communication"),
        INT_ANY_ENTRY("mpi_comm_cols", "Communicator for inter-column communication"),
        INT_ANY_ENTRY("mpi_comm_parent", "Parent communicator"),
159
        INT_ANY_ENTRY("blacs_context", "BLACS context"),
160
        INT_ENTRY("solver", "Solver to use", ELPA_SOLVER_1STAGE, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
161
                        number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name),
162
163
        INT_ENTRY("gpu", "Use GPU acceleration", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, gpu_is_valid, NULL),
164
165
166
167
168
169
170
171
        //default of gpu ussage for individual phases is 1. However, it is only evaluated, if GPU is used at all, which first has to be determined
        //by the parameter gpu and presence of the device
        INT_ENTRY("gpu_tridiag", "Use GPU acceleration for ELPA1 tridiagonalization", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
        INT_ENTRY("gpu_solve_tridi", "Use GPU acceleration for ELPA solve tridi", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
        INT_ENTRY("gpu_trans_ev", "Use GPU acceleration for ELPA1 trans ev", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
172
        INT_ENTRY("real_kernel", "Real kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_REAL_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
173
174
                        number_of_real_kernels, real_kernel_enumerate, \
                        real_kernel_is_valid, real_kernel_name),
175
176
177
178
179
180
181
182
        INT_ENTRY("gpu_bandred", "Use GPU acceleration for ELPA2 band reduction", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
        INT_ENTRY("gpu_tridiag_band", "Use GPU acceleration for ELPA2 tridiagonalization", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
        INT_ENTRY("gpu_trans_ev_tridi_to_band", "Use GPU acceleration for ELPA2 trans_ev_tridi_to_band", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
        INT_ENTRY("gpu_trans_ev_band_to_full", "Use GPU acceleration for ELPA2 trans_ev_band_to_full", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
183
        INT_ENTRY("complex_kernel", "Complex kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_COMPLEX_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_COMPLEX, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
184
185
                        number_of_complex_kernels, complex_kernel_enumerate, \
                        complex_kernel_is_valid, complex_kernel_name),
186

187
188
        INT_ENTRY("min_tile_size", "Minimal tile size used internally in elpa1_tridiag and elpa2_bandred", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        min_tile_size_cardinality, NULL, NULL, NULL),
189
190
        INT_ENTRY("intermediate_bandwidth", "Specifies the intermediate bandwidth in ELPA2 full->banded step. Must be a multiple of nblk", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        intermediate_bandwidth_cardinality, NULL, NULL, NULL),
191

192
	INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL),
Andreas Marek's avatar
Andreas Marek committed
193
194
195
196
197
#ifdef WITH_OPENMP
	INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL),
#else
	INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL),
#endif
198
199
        //BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_REAL),
        BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_REAL),
200
201
202
203
        BOOL_ENTRY("timings", "Enable time measurement", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
        BOOL_ENTRY("debug", "Emit verbose debugging messages", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
        BOOL_ENTRY("print_flops", "Print FLOP rates on task 0", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
        BOOL_ENTRY("check_pd", "Check eigenvalues to be positive", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
204
205
206
};

#define READONLY_DOUBLE_ENTRY(option_name, option_description) \
207
        { \
208
                BASE_ENTRY(option_name, option_description, 0, 1, 0) \
209
210
211
        }

static const elpa_index_double_entry_t double_entries[] = {
212
        /* Empty for now */
213
};
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
214

215
void elpa_index_free(elpa_index_t index) {
216
217
218
219
220
221
222
#define FREE_OPTION(TYPE, ...) \
        free(index->TYPE##_options.values); \
        free(index->TYPE##_options.is_set); \
        free(index->TYPE##_options.notified);

        FOR_ALL_TYPES(FREE_OPTION);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
223
224
225
226
227
        free(index);
}

static int compar(const void *key, const void *member) {
        const char *name = (const char *) key;
228
        elpa_index_int_entry_t *entry = (elpa_index_int_entry_t *) member;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
229

230
        int l1 = strlen(entry->base.name);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
231
232
233
234
        int l2 = strlen(name);
        if (l1 != l2) {
                return 1;
        }
235
        if (strncmp(name, entry->base.name, l1) == 0) {
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
236
237
238
239
240
241
                return 0;
        } else {
                return 1;
        }
}

242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
#define IMPLEMENT_FIND_ENTRY(TYPE, ...) \
        static int find_##TYPE##_entry(char *name) { \
                elpa_index_##TYPE##_entry_t *entry; \
                size_t nmembers = nelements(TYPE##_entries); \
                entry = lfind((const void*) name, (const void *) TYPE##_entries, &nmembers, sizeof(elpa_index_##TYPE##_entry_t), compar); \
                if (entry) { \
                        return (entry - &TYPE##_entries[0]); \
                } else { \
                        return -1; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_FIND_ENTRY)


#define IMPLEMENT_GETENV(TYPE, PRINTF_SPEC, ...) \
        static int getenv_##TYPE(elpa_index_t index, const char *env_variable, enum NOTIFY_FLAGS notify_flag, int n, TYPE *value, const char *error_string) { \
                int err; \
                char *env_value = getenv(env_variable); \
                if (env_value) { \
261
                        err = elpa_##TYPE##_string_to_value(TYPE##_entries[n].base.name, env_value, value); \
262
263
264
265
266
                        if (err != ELPA_OK) { \
                                fprintf(stderr, "ELPA: Error interpreting environment variable %s with value '%s': %s\n", \
                                                TYPE##_entries[n].base.name, env_value, elpa_strerr(err)); \
                        } else {\
                                const char *value_string = NULL; \
267
                                if (elpa_##TYPE##_value_to_string(TYPE##_entries[n].base.name, *value, &value_string) == ELPA_OK) { \
268
                                        if (!(index->TYPE##_options.notified[n] & notify_flag)) { \
269
270
271
272
                                                if (! elpa_index_int_value_is_set(index, "suppress_warnings")) { \
                                                        fprintf(stderr, "ELPA: %s '%s' is set to %s due to environment variable %s\n", \
                                                                      error_string, TYPE##_entries[n].base.name, value_string, env_variable); \
                                                } \
273
274
275
                                                index->TYPE##_options.notified[n] |= notify_flag; \
                                        } \
                                } else { \
276
277
                                        if (! elpa_index_int_value_is_set(index, "suppress_warnings")) { \
                                                fprintf(stderr, "ELPA: %s '%s' is set to '" PRINTF_SPEC "' due to environment variable %s\n", \
278
                                                        error_string, TYPE##_entries[n].base.name, *value, env_variable);\
279
                                        } \
280
281
282
283
284
285
286
287
288
289
290
291
                                } \
                                return 1; \
                        } \
                } \
                return 0; \
        }
FOR_ALL_TYPES(IMPLEMENT_GETENV)


#define IMPLEMENT_GET_FUNCTION(TYPE, PRINTF_SPEC, ERROR_VALUE) \
        TYPE elpa_index_get_##TYPE##_value(elpa_index_t index, char *name, int *error) { \
                TYPE ret; \
292
293
294
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        int from_env = 0; \
                        if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                                from_env = getenv_##TYPE(index, TYPE##_entries[n].base.env_force, NOTIFY_ENV_FORCE, n, &ret, "Option"); \
                        } \
                        if (!from_env) { \
                                ret = index->TYPE##_options.values[n]; \
                        } \
                        if (error != NULL) { \
                                *error = ELPA_OK; \
                        } \
                        return ret; \
                } else { \
                        if (error != NULL) { \
                                *error = ELPA_ERROR_ENTRY_NOT_FOUND; \
                        } \
                        return ERROR_VALUE; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_GET_FUNCTION)


#define IMPLEMENT_LOC_FUNCTION(TYPE, ...) \
        TYPE* elpa_index_get_##TYPE##_loc(elpa_index_t index, char *name) { \
320
321
322
                if (sizeof(TYPE##_entries) == 0) { \
                        return NULL; \
                } \
323
324
325
326
327
328
329
330
331
332
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        return &index->TYPE##_options.values[n]; \
                } else { \
                        return NULL; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_LOC_FUNCTION)


333
#define IMPLEMENT_SET_FUNCTION(TYPE, PRINTF_SPEC, ...) \
334
        int elpa_index_set_##TYPE##_value(elpa_index_t index, char *name, TYPE value) { \
335
336
337
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
338
339
340
341
342
343
                int n = find_##TYPE##_entry(name); \
                if (n < 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                }; \
                if (TYPE##_entries[n].valid != NULL) { \
                        if(!TYPE##_entries[n].valid(index, n, value)) { \
344
                                return ELPA_ERROR_ENTRY_INVALID_VALUE; \
345
346
347
                        }; \
                } \
                if (TYPE##_entries[n].base.once & index->TYPE##_options.is_set[n]) { \
348
349
                        return ELPA_ERROR_ENTRY_ALREADY_SET; \
                } \
350
                if (TYPE##_entries[n].base.readonly) { \
351
                        return ELPA_ERROR_ENTRY_READONLY; \
352
353
354
355
356
357
358
359
360
361
                } \
                index->TYPE##_options.values[n] = value; \
                index->TYPE##_options.is_set[n] = 1; \
                return ELPA_OK; \
        }
FOR_ALL_TYPES(IMPLEMENT_SET_FUNCTION)


#define IMPLEMENT_IS_SET_FUNCTION(TYPE, ...) \
        int elpa_index_##TYPE##_value_is_set(elpa_index_t index, char *name) { \
362
363
364
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        if (index->TYPE##_options.is_set[n]) { \
                                return 1; \
                        } else { \
                                return 0; \
                        } \
                } else { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_IS_SET_FUNCTION)


int elpa_index_value_is_set(elpa_index_t index, char *name) {
        int res = ELPA_ERROR;

#define RET_IF_SET(TYPE, ...) \
        res = elpa_index_##TYPE##_value_is_set(index, name); \
        if (res >= 0) { \
                return res; \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
386
        }
387
388
389

        FOR_ALL_TYPES(RET_IF_SET)

390
391
392
393
        fprintf(stderr, "ELPA Error: Could not find entry '%s'\n", name);
        return res;
}

394
395
396
397
398
int elpa_index_int_is_valid(elpa_index_t index, char *name, int new_value) {
        int n = find_int_entry(name); \
        if (n >= 0) { \
                if (int_entries[n].valid == NULL) {
                        return ELPA_OK;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
399
                } else {
400
                        return int_entries[n].valid(index, n, new_value) ? ELPA_OK : ELPA_ERROR;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
401
402
                }
        }
403
        return ELPA_ERROR_ENTRY_NOT_FOUND;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
404
405
}

406
int elpa_int_value_to_string(char *name, int value, const char **string) {
407
408
409
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
410
        }
411
        if (int_entries[n].to_string == NULL) {
412
                return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
413
414
415
        }
        *string = int_entries[n].to_string(value);
        return ELPA_OK;
416
417
}

418
419

int elpa_int_value_to_strlen(char *name, int value) {
420
        const char *string = NULL;
421
        elpa_int_value_to_string(name, value, &string);
422
        if (string == NULL) {
423
424
425
                return 0;
        } else {
                return strlen(string);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
426
427
        }
}
428

429
430
431
432
433
434

int elpa_index_int_value_to_strlen(elpa_index_t index, char *name) {
        int n = find_int_entry(name);
        if (n < 0) {
                return 0;
        }
435
        return elpa_int_value_to_strlen(name, index->int_options.values[n]);
436
437
438
439
}


int elpa_int_string_to_value(char *name, char *string, int *value) {
440
441
442
443
444
445
446
447
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }

        if (int_entries[n].to_string == NULL) {
                int val, ret;
                ret = sscanf(string, "%d", &val);
448
                if (ret == 1) {
449
                        *value = val;
450
451
                        return ELPA_OK;
                } else {
452
                        return ELPA_ERROR_ENTRY_INVALID_VALUE;
453
454
455
                }
        }

456
457
        for (int i = 0; i < int_entries[n].cardinality(NULL); i++) {
                int candidate = int_entries[n].enumerate(NULL, i);
458
459
460
                if (strcmp(string, int_entries[n].to_string(candidate)) == 0) {
                        *value = candidate;
                        return ELPA_OK;
461
                }
462
        }
463
        return ELPA_ERROR_ENTRY_INVALID_VALUE;
464
465
}

466
int elpa_double_string_to_value(char *name, char *string, double *value) {
467
468
        double val;
        int ret = sscanf(string, "%lf", &val);
469
        if (ret == 1) {
470
471
                *value = val;
                return ELPA_OK;
472
        } else {
473
474
                /* \todo: remove */
                fprintf(stderr, "ELPA: DEBUG: Could not parse double value '%s' for option '%s'\n", string, name);
475
                return ELPA_ERROR_ENTRY_INVALID_VALUE;
476
477
478
        }
}

479
int elpa_double_value_to_string(char *name, double value, const char **string) {
480
        return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
481
}
482

483
int elpa_option_cardinality(char *name) {
484
485
486
487
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].cardinality) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }
488
        return int_entries[n].cardinality(NULL);
489
}
490

491
int elpa_option_enumerate(char *name, int i) {
492
493
494
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].enumerate) {
                return 0;
495
        }
496
        return int_entries[n].enumerate(NULL, i);
497
498
}

499

500
/* Helper functions for simple int entries */
501
static int cardinality_bool(elpa_index_t index) {
502
503
        return 2;
}
504

505
506
static int valid_bool(elpa_index_t index, int n, int new_value) {
        return (0 <= new_value) && (new_value < 2);
507
508
}

509
static int enumerate_identity(elpa_index_t index, int i) {
510
511
512
        return i;
}

513
514
515
516
517
518
519
520
521
522
/* Helper functions for specific options */

#define NAME_CASE(name, value, ...) \
        case value: \
                return #name;

#define VALID_CASE(name, value) \
        case value: \
                return 1;

523
#define VALID_CASE_3(name, value, available, other_checks) \
524
        case value: \
525
                return available && (other_checks(value));
526
527
528
529
530
531

static const char* elpa_solver_name(int solver) {
        switch(solver) {
                ELPA_FOR_ALL_SOLVERS(NAME_CASE)
                default:
                        return "(Invalid solver)";
532
533
534
        }
}

535
static int number_of_solvers(elpa_index_t index) {
536
        return ELPA_NUMBER_OF_SOLVERS;
537
538
}

539
static int solver_enumerate(elpa_index_t index, int i) {
540
#define OPTION_RANK(name, value, ...) \
541
        +(value >= sizeof(array_of_size_value)/sizeof(int) ? 0 : 1)
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558

#define EMPTY()
#define DEFER1(m) m EMPTY()
#define EVAL(...) __VA_ARGS__

#define ENUMERATE_CASE(name, value, ...) \
        { const int array_of_size_value[value]; \
        case 0 DEFER1(INNER_ITERATOR)()(OPTION_RANK): \
                return value; }

        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_SOLVERS
                EVAL(ELPA_FOR_ALL_SOLVERS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
559
560
561
}


Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
562
static int solver_is_valid(elpa_index_t index, int n, int new_value) {
563
564
565
566
567
        switch(new_value) {
                ELPA_FOR_ALL_SOLVERS(VALID_CASE)
                default:
                        return 0;
        }
568
569
}

570
static int number_of_real_kernels(elpa_index_t index) {
571
572
        return ELPA_2STAGE_NUMBER_OF_REAL_KERNELS;
}
573

574
static int real_kernel_enumerate(elpa_index_t index,int i) {
575
576
577
578
579
580
581
582
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_REAL_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_REAL_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}
583

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
584
static const char *real_kernel_name(int kernel) {
585
586
587
588
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(NAME_CASE)
                default:
                        return "(Invalid real kernel)";
589
        }
590
}
591

592
593
594
#define REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_REAL_GPU ? gpu_is_active : 1

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
595
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value) {
596
597
598
599
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_REAL_DEFAULT;
        }
600
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
601
        switch(new_value) {
602
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(VALID_CASE_3, REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
603
604
                default:
                        return 0;
605
        }
606
}
607

608
static int number_of_complex_kernels(elpa_index_t index) {
609
610
        return ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS;
}
611

612

613
static int complex_kernel_enumerate(elpa_index_t index,int i) {
614
615
616
617
618
619
620
621
622
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
623
static const char *complex_kernel_name(int kernel) {
624
625
626
627
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(NAME_CASE)
                default:
                        return "(Invalid complex kernel)";
628
        }
629
}
630

631
632
633
#define COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_COMPLEX_GPU ? gpu_is_active : 1

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
634
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value) {
635
636
637
638
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_COMPLEX_DEFAULT;
        }
639
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
640
        switch(new_value) {
641
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(VALID_CASE_3, COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
642
643
644
645
                default:
                        return 0;
        }
}
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
646
647
648
649
650

static int na_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

651
652
653
654
static int nev_is_valid(elpa_index_t index, int n, int new_value) {
        if (!elpa_index_int_value_is_set(index, "na")) {
                return 0;
        }
655
        return 0 <= new_value && new_value <= elpa_index_get_int_value(index, "na", NULL);
656
657
658
659
660
661
}

static int is_positive(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
662
663
664
665
666
667
668
669
670
static int bw_is_valid(elpa_index_t index, int n, int new_value) {
        int na;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }

        na = elpa_index_get_int_value(index, "na", NULL);
        return (0 <= new_value) && (new_value < na);
}
671

672
673
674
675
static int gpu_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value == 0 || new_value == 1;
}

676
static int band_to_full_cardinality(elpa_index_t index) {
677
	return 10;
678
679
}

680
static int band_to_full_enumerate(elpa_index_t index, int i) {
681
	return i+1;
682
683
684
}

static int band_to_full_is_valid(elpa_index_t index, int n, int new_value) {
685
686
	int max_block=10;
        return (1 <= new_value) && (new_value <= max_block);
687
688
}

689
static int elpa_omp_threads_cardinality(elpa_index_t index) {
Andreas Marek's avatar
Andreas Marek committed
690
691
692
693
694
	int max_threads;
#ifdef WITH_OPENMP
	if (set_max_threads_glob == 0) {
		max_threads_glob = omp_get_max_threads();
		set_max_threads_glob = 1;
695
		//printf("Setting global max threads to %d \n",max_threads_glob);
Andreas Marek's avatar
Andreas Marek committed
696
697
698
699
700
701
	}
#else
	max_threads_glob = 1;
	set_max_threads_glob = 1;
#endif
	max_threads = max_threads_glob;
702
	//printf("Setting max threads to %d \n",max_threads);
Andreas Marek's avatar
Andreas Marek committed
703
704
705
	return max_threads;
}

706
static int elpa_omp_threads_enumerate(elpa_index_t index, int i) {
Andreas Marek's avatar
Andreas Marek committed
707
708
709
710
711
712
        return i + 1;
}

static int omp_threads_is_valid(elpa_index_t index, int n, int new_value) {
        int max_threads;
#ifdef WITH_OPENMP
713
        max_threads = max_threads_glob;
Andreas Marek's avatar
Andreas Marek committed
714
715
716
#else
        max_threads = 1;
#endif
717
	//printf("In valid max threads to %d \n",max_threads);
Andreas Marek's avatar
Andreas Marek committed
718
        return (1 <= new_value) && (new_value <= max_threads);
Andreas Marek's avatar
Andreas Marek committed
719
720
}

721
static int min_tile_size_cardinality(elpa_index_t index) {
722
723
724
725
        /* TODO */
        fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
        abort();
}
726

727
static int intermediate_bandwidth_cardinality(elpa_index_t index) {
728
729
730
731
732
        /* TODO */
        fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
        abort();
}

733
elpa_index_t elpa_index_instance() {
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
        elpa_index_t index = (elpa_index_t) calloc(1, sizeof(struct elpa_index_struct));

#define ALLOCATE(TYPE, PRINTF_SPEC, ...) \
        index->TYPE##_options.values = (TYPE*) calloc(nelements(TYPE##_entries), sizeof(TYPE)); \
        index->TYPE##_options.is_set = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        index->TYPE##_options.notified = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        for (int n = 0; n < nelements(TYPE##_entries); n++) { \
                TYPE default_value = TYPE##_entries[n].default_value; \
                if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                        getenv_##TYPE(index, TYPE##_entries[n].base.env_default, NOTIFY_ENV_DEFAULT, n, &default_value, "Default for option"); \
                } \
                index->TYPE##_options.values[n] = default_value; \
        }

        FOR_ALL_TYPES(ALLOCATE)

        return index;
751
}
752

753
754
755
756
757
758
759
760
761
762
763
764
static int is_tunable(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
        return (int_entries[i].autotune_level != 0) &&
               (int_entries[i].autotune_level <= autotune_level) &&
               (int_entries[i].autotune_domain & autotune_domain) &&
               (!index->int_options.is_set[i]);
}

int elpa_index_autotune_cardinality(elpa_index_t index, int autotune_level, int autotune_domain) {
        int N = 1;

        for (int i = 0; i < nelements(int_entries); i++) { \
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
765
                        N *= int_entries[i].cardinality(index);
766
767
768
769
770
771
772
773
774
                }
        }
        return N;
}

int elpa_index_set_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int n) {
        int debug = elpa_index_get_int_value(index, "debug", NULL);
        for (int i = 0; i < nelements(int_entries); i++) {
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
775
                        int value = int_entries[i].enumerate(index, n % int_entries[i].cardinality(index));
776
777
778
779
780
781
                        /* Try to set option i to that value */
                        if (int_entries[i].valid(index, i, value)) {
                                index->int_options.values[i] = value;
                        } else {
                                return 0;
                        }
782
                        n /= int_entries[i].cardinality(index);
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
                }
        }
        if (debug == 1) {
                for (int i = 0; i < nelements(int_entries); i++) {
                        if (is_tunable(index, i, autotune_level, autotune_domain)) {
                                fprintf(stderr, "%s = ", int_entries[i].base.name);
                                if (int_entries[i].to_string) {
                                        fprintf(stderr, "%s\n", int_entries[i].to_string(index->int_options.values[i]));
                                } else {
                                        fprintf(stderr, "%d\n", index->int_options.values[i]);
                                }
                        }
                }
                fprintf(stderr, "\n");
        }

        /* Could set all values */
        return 1;
}
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831

int elpa_index_print_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int n) {
        //int debug = elpa_index_get_int_value(index, "debug", NULL);
        //for (int i = 0; i < nelements(int_entries); i++) {
        //        if (is_tunable(index, i, autotune_level, autotune_domain)) {
        //                int value = int_entries[i].enumerate(n % int_entries[i].cardinality());
        //                /* Try to set option i to that value */
        //                if (int_entries[i].valid(index, i, value)) {
        //                        index->int_options.values[i] = value;
        //                } else {
        //                        return 0;
        //                }
        //                n /= int_entries[i].cardinality();
        //        }
        //}
        for (int i = 0; i < nelements(int_entries); i++) {
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
                        fprintf(stderr, " %s = ", int_entries[i].base.name);
                        if (int_entries[i].to_string) {
                                fprintf(stderr, " %s\n", int_entries[i].to_string(index->int_options.values[i]));
                        } else {
                                fprintf(stderr, " %d\n", index->int_options.values[i]);
                        }
                }
        }
        fprintf(stderr, "\n");

        /* Could set all values */
        return 1;
}