elpa_index.c 31.2 KB
Newer Older
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
//    This file is part of ELPA.
//
//    The ELPA library was originally created by the ELPA consortium,
//    consisting of the following organizations:
//
//    - Max Planck Computing and Data Facility (MPCDF), formerly known as
//      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
//    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
//      Informatik,
//    - Technische Universität München, Lehrstuhl für Informatik mit
//      Schwerpunkt Wissenschaftliches Rechnen ,
//    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
//    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
//      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
//      and
//    - IBM Deutschland GmbH
//
//    This particular source code file contains additions, changes and
//    enhancements authored by Intel Corporation which is not part of
//    the ELPA consortium.
//
//    More information can be found here:
//    http://elpa.mpcdf.mpg.de/
//
//    ELPA is free software: you can redistribute it and/or modify
//    it under the terms of the version 3 of the license of the
//    GNU Lesser General Public License as published by the Free
//    Software Foundation.
//
//    ELPA is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU Lesser General Public License for more details.
//
//    You should have received a copy of the GNU Lesser General Public License
//    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
//
//    ELPA reflects a substantial effort on the part of the original
//    ELPA consortium, and we ask you to respect the spirit of the
//    license that we chose: i.e., please contribute any changes you
//    may have back to the original ELPA library distribution, and keep
//    any derivatives of ELPA under the same license that we chose for
//    the original distribution, the GNU Lesser General Public License.
//
//    Authors: L. Huedepohl and A. Marek, MPCDF
46
#include <elpa/elpa.h>
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
47
#include "elpa_index.h"
48

49
50
51
52
53
54
#include <execinfo.h>

static int enumerate_identity(int i);
static int cardinality_bool(void);
static int valid_bool(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
55
56
57
static int number_of_solvers();
static int solver_enumerate(int i);
static int solver_is_valid(elpa_index_t index, int n, int new_value);
58
59
static const char* elpa_solver_name(int solver);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
60
61
62
63
static int number_of_real_kernels();
static int real_kernel_enumerate(int i);
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *real_kernel_name(int kernel);
64

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
65
66
67
68
static int number_of_complex_kernels();
static int complex_kernel_enumerate(int i);
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *complex_kernel_name(int kernel);
69

70
71
72
73
static int band_to_full_cardinality();
static int band_to_full_enumerate(int i);
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value);

Andreas Marek's avatar
Andreas Marek committed
74
75
76
77
static int elpa_omp_threads_cardinality();
static int elpa_omp_threads_enumerate(int i);
static int elpa_omp_threads_is_valid(elpa_index_t index, int n, int new_value);

78
79
static int min_tile_size_cardinality();

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
80
static int na_is_valid(elpa_index_t index, int n, int new_value);
81
static int nev_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
82
static int bw_is_valid(elpa_index_t index, int n, int new_value);
83
static int gpu_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
84

85
86
static int is_positive(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
87
88
static int elpa_double_string_to_value(char *name, char *string, double *value);
static int elpa_double_value_to_string(char *name, double value, const char **string);
89

90
#define BASE_ENTRY(option_name, option_description, once_value, readonly_value) \
91
92
93
94
95
96
97
98
                .base = { \
                        .name = option_name, \
                        .description = option_description, \
                        .once = once_value, \
                        .readonly = readonly_value, \
                        .env_default = "ELPA_DEFAULT_" option_name, \
                        .env_force = "ELPA_FORCE_" option_name, \
                }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
99

100
#define INT_PARAMETER_ENTRY(option_name, option_description, valid_func) \
101
        { \
102
                BASE_ENTRY(option_name, option_description, 1, 0), \
103
                .valid = valid_func, \
104
        }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
105

106
#define BOOL_ENTRY(option_name, option_description, default, tune_level, tune_domain) \
107
        { \
108
                BASE_ENTRY(option_name, option_description, 0, 0), \
109
                .default_value = default, \
110
111
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
112
113
114
                .cardinality = cardinality_bool, \
                .enumerate = enumerate_identity, \
                .valid = valid_bool, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
115
116
        }

117
#define INT_ENTRY(option_name, option_description, default, tune_level, tune_domain, card_func, enumerate_func, valid_func, to_string_func) \
118
        { \
119
                BASE_ENTRY(option_name, option_description, 0, 0), \
120
                .default_value = default, \
121
122
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
123
124
125
126
                .cardinality = card_func, \
                .enumerate = enumerate_func, \
                .valid = valid_func, \
                .to_string = to_string_func, \
127
128
        }

129
#define INT_ANY_ENTRY(option_name, option_description) \
130
        { \
131
                BASE_ENTRY(option_name, option_description, 0, 0), \
132
133
        }

134
135
/* The order here is important! Tunable options that are dependent on other
 * tunable options must appear later in the list than their prerequisites */
136
static const elpa_index_int_entry_t int_entries[] = {
137
138
139
140
141
142
143
144
        INT_PARAMETER_ENTRY("na", "Global matrix has size (na * na)", na_is_valid),
        INT_PARAMETER_ENTRY("nev", "Number of eigenvectors to be computed, 0 <= nev <= na", nev_is_valid),
        INT_PARAMETER_ENTRY("nblk", "Block size of scalapack block-cyclic distribution", is_positive),
        INT_PARAMETER_ENTRY("local_nrows", "Number of matrix rows stored on this process", NULL),
        INT_PARAMETER_ENTRY("local_ncols", "Number of matrix columns stored on this process", NULL),
        INT_PARAMETER_ENTRY("process_row", "Process row number in the 2D domain decomposition", NULL),
        INT_PARAMETER_ENTRY("process_col", "Process column number in the 2D domain decomposition", NULL),
        INT_PARAMETER_ENTRY("bandwidth", "If specified, a band matrix with this bandwidth is expected as input; bandwidth must be multiply of nblk", bw_is_valid),
145
146
147
        INT_ANY_ENTRY("mpi_comm_rows", "Communicator for inter-row communication"),
        INT_ANY_ENTRY("mpi_comm_cols", "Communicator for inter-column communication"),
        INT_ANY_ENTRY("mpi_comm_parent", "Parent communicator"),
148
        INT_ANY_ENTRY("blacs_context", "BLACS context"),
149
        INT_ENTRY("solver", "Solver to use", ELPA_SOLVER_1STAGE, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
150
                        number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name),
151
152
        INT_ENTRY("gpu", "Use GPU acceleration", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, gpu_is_valid, NULL),
153
154
155
156
157
158
159
160
        //default of gpu ussage for individual phases is 1. However, it is only evaluated, if GPU is used at all, which first has to be determined
        //by the parameter gpu and presence of the device
        INT_ENTRY("gpu_tridiag", "Use GPU acceleration for ELPA1 tridiagonalization", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
        INT_ENTRY("gpu_solve_tridi", "Use GPU acceleration for ELPA solve tridi", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
        INT_ENTRY("gpu_trans_ev", "Use GPU acceleration for ELPA1 trans ev", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
161
        INT_ENTRY("real_kernel", "Real kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_REAL_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
162
163
                        number_of_real_kernels, real_kernel_enumerate, \
                        real_kernel_is_valid, real_kernel_name),
164
165
166
167
168
169
170
171
        INT_ENTRY("gpu_bandred", "Use GPU acceleration for ELPA2 band reduction", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
        INT_ENTRY("gpu_tridiag_band", "Use GPU acceleration for ELPA2 tridiagonalization", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
        INT_ENTRY("gpu_trans_ev_tridi_to_band", "Use GPU acceleration for ELPA2 trans_ev_tridi_to_band", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
        INT_ENTRY("gpu_trans_ev_band_to_full", "Use GPU acceleration for ELPA2 trans_ev_band_to_full", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        cardinality_bool, enumerate_identity, NULL, NULL),
172
        INT_ENTRY("complex_kernel", "Complex kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_COMPLEX_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_COMPLEX, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
173
174
                        number_of_complex_kernels, complex_kernel_enumerate, \
                        complex_kernel_is_valid, complex_kernel_name),
175

176
177
178
        INT_ENTRY("min_tile_size", "Minimal tile size used internally in elpa1_tridiag and elpa2_bandred", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        min_tile_size_cardinality, NULL, NULL, NULL),

179
180
	//INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL),
	INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL),
Andreas Marek's avatar
Andreas Marek committed
181
	INT_ENTRY("ELPA_OMP_THREADS", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, elpa_omp_threads_cardinality, elpa_omp_threads_enumerate, elpa_omp_threads_is_valid, NULL),
182
183
        //BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_REAL),
        BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_REAL),
184
185
186
187
        BOOL_ENTRY("timings", "Enable time measurement", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
        BOOL_ENTRY("debug", "Emit verbose debugging messages", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
        BOOL_ENTRY("print_flops", "Print FLOP rates on task 0", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
        BOOL_ENTRY("check_pd", "Check eigenvalues to be positive", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
188
189
190
};

#define READONLY_DOUBLE_ENTRY(option_name, option_description) \
191
        { \
192
                BASE_ENTRY(option_name, option_description, 0, 1, 0) \
193
194
195
        }

static const elpa_index_double_entry_t double_entries[] = {
196
        /* Empty for now */
197
};
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
198

199
void elpa_index_free(elpa_index_t index) {
200
201
202
203
204
205
206
#define FREE_OPTION(TYPE, ...) \
        free(index->TYPE##_options.values); \
        free(index->TYPE##_options.is_set); \
        free(index->TYPE##_options.notified);

        FOR_ALL_TYPES(FREE_OPTION);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
207
208
209
210
211
        free(index);
}

static int compar(const void *key, const void *member) {
        const char *name = (const char *) key;
212
        elpa_index_int_entry_t *entry = (elpa_index_int_entry_t *) member;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
213

214
        int l1 = strlen(entry->base.name);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
215
216
217
218
        int l2 = strlen(name);
        if (l1 != l2) {
                return 1;
        }
219
        if (strncmp(name, entry->base.name, l1) == 0) {
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
220
221
222
223
224
225
                return 0;
        } else {
                return 1;
        }
}

226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
#define IMPLEMENT_FIND_ENTRY(TYPE, ...) \
        static int find_##TYPE##_entry(char *name) { \
                elpa_index_##TYPE##_entry_t *entry; \
                size_t nmembers = nelements(TYPE##_entries); \
                entry = lfind((const void*) name, (const void *) TYPE##_entries, &nmembers, sizeof(elpa_index_##TYPE##_entry_t), compar); \
                if (entry) { \
                        return (entry - &TYPE##_entries[0]); \
                } else { \
                        return -1; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_FIND_ENTRY)


#define IMPLEMENT_GETENV(TYPE, PRINTF_SPEC, ...) \
        static int getenv_##TYPE(elpa_index_t index, const char *env_variable, enum NOTIFY_FLAGS notify_flag, int n, TYPE *value, const char *error_string) { \
                int err; \
                char *env_value = getenv(env_variable); \
                if (env_value) { \
245
                        err = elpa_##TYPE##_string_to_value(TYPE##_entries[n].base.name, env_value, value); \
246
247
248
249
250
                        if (err != ELPA_OK) { \
                                fprintf(stderr, "ELPA: Error interpreting environment variable %s with value '%s': %s\n", \
                                                TYPE##_entries[n].base.name, env_value, elpa_strerr(err)); \
                        } else {\
                                const char *value_string = NULL; \
251
                                if (elpa_##TYPE##_value_to_string(TYPE##_entries[n].base.name, *value, &value_string) == ELPA_OK) { \
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
                                        if (!(index->TYPE##_options.notified[n] & notify_flag)) { \
                                                fprintf(stderr, "ELPA: %s '%s' is set to %s due to environment variable %s\n", \
                                                                error_string, TYPE##_entries[n].base.name, value_string, env_variable); \
                                                index->TYPE##_options.notified[n] |= notify_flag; \
                                        } \
                                } else { \
                                        fprintf(stderr, "ELPA: %s '%s' is set to '" PRINTF_SPEC "' due to environment variable %s\n", \
                                                        error_string, TYPE##_entries[n].base.name, *value, env_variable);\
                                } \
                                return 1; \
                        } \
                } \
                return 0; \
        }
FOR_ALL_TYPES(IMPLEMENT_GETENV)


#define IMPLEMENT_GET_FUNCTION(TYPE, PRINTF_SPEC, ERROR_VALUE) \
        TYPE elpa_index_get_##TYPE##_value(elpa_index_t index, char *name, int *error) { \
                TYPE ret; \
272
273
274
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        int from_env = 0; \
                        if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                                from_env = getenv_##TYPE(index, TYPE##_entries[n].base.env_force, NOTIFY_ENV_FORCE, n, &ret, "Option"); \
                        } \
                        if (!from_env) { \
                                ret = index->TYPE##_options.values[n]; \
                        } \
                        if (error != NULL) { \
                                *error = ELPA_OK; \
                        } \
                        return ret; \
                } else { \
                        if (error != NULL) { \
                                *error = ELPA_ERROR_ENTRY_NOT_FOUND; \
                        } \
                        return ERROR_VALUE; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_GET_FUNCTION)


#define IMPLEMENT_LOC_FUNCTION(TYPE, ...) \
        TYPE* elpa_index_get_##TYPE##_loc(elpa_index_t index, char *name) { \
300
301
302
                if (sizeof(TYPE##_entries) == 0) { \
                        return NULL; \
                } \
303
304
305
306
307
308
309
310
311
312
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        return &index->TYPE##_options.values[n]; \
                } else { \
                        return NULL; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_LOC_FUNCTION)


313
#define IMPLEMENT_SET_FUNCTION(TYPE, PRINTF_SPEC, ...) \
314
        int elpa_index_set_##TYPE##_value(elpa_index_t index, char *name, TYPE value) { \
315
316
317
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
318
319
320
321
322
323
                int n = find_##TYPE##_entry(name); \
                if (n < 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                }; \
                if (TYPE##_entries[n].valid != NULL) { \
                        if(!TYPE##_entries[n].valid(index, n, value)) { \
324
                                return ELPA_ERROR_ENTRY_INVALID_VALUE; \
325
326
327
                        }; \
                } \
                if (TYPE##_entries[n].base.once & index->TYPE##_options.is_set[n]) { \
328
329
                        return ELPA_ERROR_ENTRY_ALREADY_SET; \
                } \
330
                if (TYPE##_entries[n].base.readonly) { \
331
                        return ELPA_ERROR_ENTRY_READONLY; \
332
333
334
335
336
337
338
339
340
341
                } \
                index->TYPE##_options.values[n] = value; \
                index->TYPE##_options.is_set[n] = 1; \
                return ELPA_OK; \
        }
FOR_ALL_TYPES(IMPLEMENT_SET_FUNCTION)


#define IMPLEMENT_IS_SET_FUNCTION(TYPE, ...) \
        int elpa_index_##TYPE##_value_is_set(elpa_index_t index, char *name) { \
342
343
344
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        if (index->TYPE##_options.is_set[n]) { \
                                return 1; \
                        } else { \
                                return 0; \
                        } \
                } else { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_IS_SET_FUNCTION)


int elpa_index_value_is_set(elpa_index_t index, char *name) {
        int res = ELPA_ERROR;

#define RET_IF_SET(TYPE, ...) \
        res = elpa_index_##TYPE##_value_is_set(index, name); \
        if (res >= 0) { \
                return res; \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
366
        }
367
368
369

        FOR_ALL_TYPES(RET_IF_SET)

370
371
372
373
        fprintf(stderr, "ELPA Error: Could not find entry '%s'\n", name);
        return res;
}

374
375
376
377
378
int elpa_index_int_is_valid(elpa_index_t index, char *name, int new_value) {
        int n = find_int_entry(name); \
        if (n >= 0) { \
                if (int_entries[n].valid == NULL) {
                        return ELPA_OK;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
379
                } else {
380
                        return int_entries[n].valid(index, n, new_value) ? ELPA_OK : ELPA_ERROR;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
381
382
                }
        }
383
        return ELPA_ERROR_ENTRY_NOT_FOUND;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
384
385
}

386
int elpa_int_value_to_string(char *name, int value, const char **string) {
387
388
389
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
390
        }
391
        if (int_entries[n].to_string == NULL) {
392
                return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
393
394
395
        }
        *string = int_entries[n].to_string(value);
        return ELPA_OK;
396
397
}

398
399

int elpa_int_value_to_strlen(char *name, int value) {
400
        const char *string = NULL;
401
        elpa_int_value_to_string(name, value, &string);
402
        if (string == NULL) {
403
404
405
                return 0;
        } else {
                return strlen(string);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
406
407
        }
}
408

409
410
411
412
413
414

int elpa_index_int_value_to_strlen(elpa_index_t index, char *name) {
        int n = find_int_entry(name);
        if (n < 0) {
                return 0;
        }
415
        return elpa_int_value_to_strlen(name, index->int_options.values[n]);
416
417
418
419
}


int elpa_int_string_to_value(char *name, char *string, int *value) {
420
421
422
423
424
425
426
427
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }

        if (int_entries[n].to_string == NULL) {
                int val, ret;
                ret = sscanf(string, "%d", &val);
428
                if (ret == 1) {
429
                        *value = val;
430
431
                        return ELPA_OK;
                } else {
432
                        return ELPA_ERROR_ENTRY_INVALID_VALUE;
433
434
435
436
437
438
439
440
                }
        }

        for (int i = 0; i < int_entries[n].cardinality(); i++) {
                int candidate = int_entries[n].enumerate(i);
                if (strcmp(string, int_entries[n].to_string(candidate)) == 0) {
                        *value = candidate;
                        return ELPA_OK;
441
                }
442
        }
443
        return ELPA_ERROR_ENTRY_INVALID_VALUE;
444
445
}

446
int elpa_double_string_to_value(char *name, char *string, double *value) {
447
448
        double val;
        int ret = sscanf(string, "%lf", &val);
449
        if (ret == 1) {
450
451
                *value = val;
                return ELPA_OK;
452
        } else {
453
454
                /* \todo: remove */
                fprintf(stderr, "ELPA: DEBUG: Could not parse double value '%s' for option '%s'\n", string, name);
455
                return ELPA_ERROR_ENTRY_INVALID_VALUE;
456
457
458
        }
}

459
int elpa_double_value_to_string(char *name, double value, const char **string) {
460
        return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
461
}
462

463
int elpa_option_cardinality(char *name) {
464
465
466
467
468
469
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].cardinality) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }
        return int_entries[n].cardinality();
}
470

471
int elpa_option_enumerate(char *name, int i) {
472
473
474
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].enumerate) {
                return 0;
475
        }
476
        return int_entries[n].enumerate(i);
477
478
}

479

480
/* Helper functions for simple int entries */
481
482
483
static int cardinality_bool(void) {
        return 2;
}
484

485
486
static int valid_bool(elpa_index_t index, int n, int new_value) {
        return (0 <= new_value) && (new_value < 2);
487
488
}

489
static int enumerate_identity(int i) {
490
491
492
        return i;
}

493
494
495
496
497
498
499
500
501
502
/* Helper functions for specific options */

#define NAME_CASE(name, value, ...) \
        case value: \
                return #name;

#define VALID_CASE(name, value) \
        case value: \
                return 1;

503
#define VALID_CASE_3(name, value, available, other_checks) \
504
        case value: \
505
                return available && (other_checks(value));
506
507
508
509
510
511

static const char* elpa_solver_name(int solver) {
        switch(solver) {
                ELPA_FOR_ALL_SOLVERS(NAME_CASE)
                default:
                        return "(Invalid solver)";
512
513
514
        }
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
515
static int number_of_solvers() {
516
        return ELPA_NUMBER_OF_SOLVERS;
517
518
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
519
static int solver_enumerate(int i) {
520
#define OPTION_RANK(name, value, ...) \
521
        +(value >= sizeof(array_of_size_value)/sizeof(int) ? 0 : 1)
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538

#define EMPTY()
#define DEFER1(m) m EMPTY()
#define EVAL(...) __VA_ARGS__

#define ENUMERATE_CASE(name, value, ...) \
        { const int array_of_size_value[value]; \
        case 0 DEFER1(INNER_ITERATOR)()(OPTION_RANK): \
                return value; }

        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_SOLVERS
                EVAL(ELPA_FOR_ALL_SOLVERS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
539
540
541
}


Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
542
static int solver_is_valid(elpa_index_t index, int n, int new_value) {
543
544
545
546
547
        switch(new_value) {
                ELPA_FOR_ALL_SOLVERS(VALID_CASE)
                default:
                        return 0;
        }
548
549
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
550
static int number_of_real_kernels() {
551
552
        return ELPA_2STAGE_NUMBER_OF_REAL_KERNELS;
}
553

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
554
static int real_kernel_enumerate(int i) {
555
556
557
558
559
560
561
562
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_REAL_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_REAL_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}
563

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
564
static const char *real_kernel_name(int kernel) {
565
566
567
568
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(NAME_CASE)
                default:
                        return "(Invalid real kernel)";
569
        }
570
}
571

572
573
574
#define REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_REAL_GPU ? gpu_is_active : 1

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
575
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value) {
576
577
578
579
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_REAL_DEFAULT;
        }
580
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
581
        switch(new_value) {
582
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(VALID_CASE_3, REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
583
584
                default:
                        return 0;
585
        }
586
}
587

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
588
static int number_of_complex_kernels() {
589
590
        return ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS;
}
591

592

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
593
static int complex_kernel_enumerate(int i) {
594
595
596
597
598
599
600
601
602
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
603
static const char *complex_kernel_name(int kernel) {
604
605
606
607
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(NAME_CASE)
                default:
                        return "(Invalid complex kernel)";
608
        }
609
}
610

611
612
613
#define COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_COMPLEX_GPU ? gpu_is_active : 1

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
614
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value) {
615
616
617
618
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_COMPLEX_DEFAULT;
        }
619
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
620
        switch(new_value) {
621
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(VALID_CASE_3, COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
622
623
624
625
                default:
                        return 0;
        }
}
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
626
627
628
629
630

static int na_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

631
632
633
634
static int nev_is_valid(elpa_index_t index, int n, int new_value) {
        if (!elpa_index_int_value_is_set(index, "na")) {
                return 0;
        }
635
        return 0 <= new_value && new_value <= elpa_index_get_int_value(index, "na", NULL);
636
637
638
639
640
641
}

static int is_positive(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
642
643
644
645
646
647
648
649
650
static int bw_is_valid(elpa_index_t index, int n, int new_value) {
        int na;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }

        na = elpa_index_get_int_value(index, "na", NULL);
        return (0 <= new_value) && (new_value < na);
}
651

652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
static int gpu_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value == 0 || new_value == 1;
}

static int band_to_full_cardinality() {
        /* TODO */
        fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
        abort();
}

static int band_to_full_enumerate(int i) {
        /* TODO */
        fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
        abort();
}

static int band_to_full_is_valid(elpa_index_t index, int n, int new_value) {
        /* TODO */
        fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
        abort();
}

Andreas Marek's avatar
Andreas Marek committed
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
static int elpa_omp_threads_cardinality() {
        /* TODO */
        fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
        abort();
}

static int elpa_omp_threads_enumerate(int i) {
        /* TODO */
        fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
        abort();
}

static int elpa_omp_threads_is_valid(elpa_index_t index, int n, int new_value) {
        /* TODO */
        fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
        abort();
}

692
693
694
695
696
static int min_tile_size_cardinality() {
        /* TODO */
        fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
        abort();
}
697
elpa_index_t elpa_index_instance() {
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
        elpa_index_t index = (elpa_index_t) calloc(1, sizeof(struct elpa_index_struct));

#define ALLOCATE(TYPE, PRINTF_SPEC, ...) \
        index->TYPE##_options.values = (TYPE*) calloc(nelements(TYPE##_entries), sizeof(TYPE)); \
        index->TYPE##_options.is_set = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        index->TYPE##_options.notified = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        for (int n = 0; n < nelements(TYPE##_entries); n++) { \
                TYPE default_value = TYPE##_entries[n].default_value; \
                if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                        getenv_##TYPE(index, TYPE##_entries[n].base.env_default, NOTIFY_ENV_DEFAULT, n, &default_value, "Default for option"); \
                } \
                index->TYPE##_options.values[n] = default_value; \
        }

        FOR_ALL_TYPES(ALLOCATE)

        return index;
715
}
716

717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
static int is_tunable(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
        return (int_entries[i].autotune_level != 0) &&
               (int_entries[i].autotune_level <= autotune_level) &&
               (int_entries[i].autotune_domain & autotune_domain) &&
               (!index->int_options.is_set[i]);
}

int elpa_index_autotune_cardinality(elpa_index_t index, int autotune_level, int autotune_domain) {
        int N = 1;

        for (int i = 0; i < nelements(int_entries); i++) { \
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
                        N *= int_entries[i].cardinality();
                }
        }
        return N;
}

int elpa_index_set_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int n) {
        int debug = elpa_index_get_int_value(index, "debug", NULL);
        for (int i = 0; i < nelements(int_entries); i++) {
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
                        int value = int_entries[i].enumerate(n % int_entries[i].cardinality());
                        /* Try to set option i to that value */
                        if (int_entries[i].valid(index, i, value)) {
                                index->int_options.values[i] = value;
                        } else {
                                return 0;
                        }
                        n /= int_entries[i].cardinality();
                }
        }
        if (debug == 1) {
                for (int i = 0; i < nelements(int_entries); i++) {
                        if (is_tunable(index, i, autotune_level, autotune_domain)) {
                                fprintf(stderr, "%s = ", int_entries[i].base.name);
                                if (int_entries[i].to_string) {
                                        fprintf(stderr, "%s\n", int_entries[i].to_string(index->int_options.values[i]));
                                } else {
                                        fprintf(stderr, "%d\n", index->int_options.values[i]);
                                }
                        }
                }
                fprintf(stderr, "\n");
        }

        /* Could set all values */
        return 1;
}