elpa_index.c 39.9 KB
Newer Older
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
//    This file is part of ELPA.
//
//    The ELPA library was originally created by the ELPA consortium,
//    consisting of the following organizations:
//
//    - Max Planck Computing and Data Facility (MPCDF), formerly known as
//      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
//    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
//      Informatik,
//    - Technische Universität München, Lehrstuhl für Informatik mit
//      Schwerpunkt Wissenschaftliches Rechnen ,
//    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
//    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
//      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
//      and
//    - IBM Deutschland GmbH
//
//    This particular source code file contains additions, changes and
//    enhancements authored by Intel Corporation which is not part of
//    the ELPA consortium.
//
//    More information can be found here:
//    http://elpa.mpcdf.mpg.de/
//
//    ELPA is free software: you can redistribute it and/or modify
//    it under the terms of the version 3 of the license of the
//    GNU Lesser General Public License as published by the Free
//    Software Foundation.
//
//    ELPA is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU Lesser General Public License for more details.
//
//    You should have received a copy of the GNU Lesser General Public License
//    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
//
//    ELPA reflects a substantial effort on the part of the original
//    ELPA consortium, and we ask you to respect the spirit of the
//    license that we chose: i.e., please contribute any changes you
//    may have back to the original ELPA library distribution, and keep
//    any derivatives of ELPA under the same license that we chose for
//    the original distribution, the GNU Lesser General Public License.
//
//    Authors: L. Huedepohl and A. Marek, MPCDF
46
#include <elpa/elpa.h>
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
47
#include "elpa_index.h"
48

49
50
#include <execinfo.h>

Andreas Marek's avatar
Andreas Marek committed
51
52
53
54
55
56
57
58
59
#include "config.h"

#ifdef WITH_OPENMP
#include <omp.h>
#endif

int max_threads_glob;
int set_max_threads_glob=0;

60
61
static int enumerate_identity(elpa_index_t index, int i);
static int cardinality_bool(elpa_index_t index);
62
63
static int valid_bool(elpa_index_t index, int n, int new_value);

64
65
static int number_of_solvers(elpa_index_t index);
static int solver_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
66
static int solver_is_valid(elpa_index_t index, int n, int new_value);
67
68
static const char* elpa_solver_name(int solver);

69
70
static int number_of_real_kernels(elpa_index_t index);
static int real_kernel_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
71
72
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *real_kernel_name(int kernel);
73

74
75
static int number_of_complex_kernels(elpa_index_t index);
static int complex_kernel_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
76
77
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *complex_kernel_name(int kernel);
78

79
80
static int band_to_full_cardinality(elpa_index_t index);
static int band_to_full_enumerate(elpa_index_t index, int i);
81
82
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value);

Pavel Kus's avatar
Pavel Kus committed
83
84
85
static int omp_threads_cardinality(elpa_index_t index);
static int omp_threads_enumerate(elpa_index_t index, int i);
static int omp_threads_is_valid(elpa_index_t index, int n, int new_value);
Andreas Marek's avatar
Andreas Marek committed
86

87
88
89
90
static int max_stored_rows_cardinality(elpa_index_t index);
static int max_stored_rows_enumerate(elpa_index_t index, int i);
static int max_stored_rows_is_valid(elpa_index_t index, int n, int new_value);

91
static int min_tile_size_cardinality(elpa_index_t index);
92
93
94
95
96
97
static int min_tile_size_enumerate(elpa_index_t index, int i);
static int min_tile_size_is_valid(elpa_index_t index, int n, int new_value);

static int valid_with_gpu(elpa_index_t index, int n, int new_value);
static int valid_with_gpu_elpa1(elpa_index_t index, int n, int new_value);
static int valid_with_gpu_elpa2(elpa_index_t index, int n, int new_value);
98

99
static int intermediate_bandwidth_cardinality(elpa_index_t index);
100
101
static int intermediate_bandwidth_enumerate(elpa_index_t index, int i);
static int intermediate_bandwidth_is_valid(elpa_index_t index, int n, int new_value);
102

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
103
static int na_is_valid(elpa_index_t index, int n, int new_value);
104
static int nev_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
105
static int bw_is_valid(elpa_index_t index, int n, int new_value);
106
static int gpu_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
107

108
109
static int is_positive(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
110
111
static int elpa_double_string_to_value(char *name, char *string, double *value);
static int elpa_double_value_to_string(char *name, double value, const char **string);
112

113
#define BASE_ENTRY(option_name, option_description, once_value, readonly_value) \
114
115
116
117
118
119
120
121
                .base = { \
                        .name = option_name, \
                        .description = option_description, \
                        .once = once_value, \
                        .readonly = readonly_value, \
                        .env_default = "ELPA_DEFAULT_" option_name, \
                        .env_force = "ELPA_FORCE_" option_name, \
                }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
122

123
#define INT_PARAMETER_ENTRY(option_name, option_description, valid_func) \
124
        { \
125
                BASE_ENTRY(option_name, option_description, 1, 0), \
126
                .valid = valid_func, \
127
        }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
128

129
#define BOOL_ENTRY(option_name, option_description, default, tune_level, tune_domain) \
130
        { \
131
                BASE_ENTRY(option_name, option_description, 0, 0), \
132
                .default_value = default, \
133
134
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
135
136
137
                .cardinality = cardinality_bool, \
                .enumerate = enumerate_identity, \
                .valid = valid_bool, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
138
139
        }

140
#define INT_ENTRY(option_name, option_description, default, tune_level, tune_domain, card_func, enumerate_func, valid_func, to_string_func) \
141
        { \
142
                BASE_ENTRY(option_name, option_description, 0, 0), \
143
                .default_value = default, \
144
145
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
146
147
148
149
                .cardinality = card_func, \
                .enumerate = enumerate_func, \
                .valid = valid_func, \
                .to_string = to_string_func, \
150
151
        }

152
#define INT_ANY_ENTRY(option_name, option_description) \
153
        { \
154
                BASE_ENTRY(option_name, option_description, 0, 0), \
155
156
        }

157
158
/* The order here is important! Tunable options that are dependent on other
 * tunable options must appear later in the list than their prerequisites */
159
static const elpa_index_int_entry_t int_entries[] = {
160
161
162
163
164
165
166
167
        INT_PARAMETER_ENTRY("na", "Global matrix has size (na * na)", na_is_valid),
        INT_PARAMETER_ENTRY("nev", "Number of eigenvectors to be computed, 0 <= nev <= na", nev_is_valid),
        INT_PARAMETER_ENTRY("nblk", "Block size of scalapack block-cyclic distribution", is_positive),
        INT_PARAMETER_ENTRY("local_nrows", "Number of matrix rows stored on this process", NULL),
        INT_PARAMETER_ENTRY("local_ncols", "Number of matrix columns stored on this process", NULL),
        INT_PARAMETER_ENTRY("process_row", "Process row number in the 2D domain decomposition", NULL),
        INT_PARAMETER_ENTRY("process_col", "Process column number in the 2D domain decomposition", NULL),
        INT_PARAMETER_ENTRY("bandwidth", "If specified, a band matrix with this bandwidth is expected as input; bandwidth must be multiply of nblk", bw_is_valid),
168
        INT_PARAMETER_ENTRY("suppress_warnings", "If specified, warnings will NOT be printed on this mpi rank", NULL),
169
170
171
        INT_ANY_ENTRY("mpi_comm_rows", "Communicator for inter-row communication"),
        INT_ANY_ENTRY("mpi_comm_cols", "Communicator for inter-column communication"),
        INT_ANY_ENTRY("mpi_comm_parent", "Parent communicator"),
172
        INT_ANY_ENTRY("blacs_context", "BLACS context"),
173
        INT_ENTRY("solver", "Solver to use", ELPA_SOLVER_1STAGE, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
174
                        number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name),
175
        INT_ENTRY("gpu", "Use GPU acceleration", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
176
                        cardinality_bool, enumerate_identity, gpu_is_valid, NULL),
177
178
        //default of gpu ussage for individual phases is 1. However, it is only evaluated, if GPU is used at all, which first has to be determined
        //by the parameter gpu and presence of the device
179
        INT_ENTRY("gpu_tridiag", "Use GPU acceleration for ELPA1 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
180
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa1, NULL),
181
        INT_ENTRY("gpu_solve_tridi", "Use GPU acceleration for ELPA solve tridi", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
182
                        cardinality_bool, enumerate_identity, valid_with_gpu, NULL),
183
        INT_ENTRY("gpu_trans_ev", "Use GPU acceleration for ELPA1 trans ev", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
184
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa1, NULL),
185
        INT_ENTRY("gpu_bandred", "Use GPU acceleration for ELPA2 band reduction", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
186
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL),
187
        INT_ENTRY("gpu_tridiag_band", "Use GPU acceleration for ELPA2 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
188
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL),
189
        INT_ENTRY("gpu_trans_ev_tridi_to_band", "Use GPU acceleration for ELPA2 trans_ev_tridi_to_band", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
190
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL),
191
        INT_ENTRY("gpu_trans_ev_band_to_full", "Use GPU acceleration for ELPA2 trans_ev_band_to_full", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
192
193
194
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL),
        INT_ENTRY("real_kernel", "Real kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_REAL_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, \
                        number_of_real_kernels, real_kernel_enumerate, real_kernel_is_valid, real_kernel_name),
195
        INT_ENTRY("complex_kernel", "Complex kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_COMPLEX_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_COMPLEX, \
196
                        number_of_complex_kernels, complex_kernel_enumerate, complex_kernel_is_valid, complex_kernel_name),
197

198
        INT_ENTRY("min_tile_size", "Minimal tile size used internally in elpa1_tridiag and elpa2_bandred", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
199
                        min_tile_size_cardinality, min_tile_size_enumerate, min_tile_size_is_valid, NULL),
200
        INT_ENTRY("intermediate_bandwidth", "Specifies the intermediate bandwidth in ELPA2 full->banded step. Must be a multiple of nblk", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
201
                        intermediate_bandwidth_cardinality, intermediate_bandwidth_enumerate, intermediate_bandwidth_is_valid, NULL),
202

203
204
205
206
        INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
                        band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL),
        INT_ENTRY("max_stored_rows", "Maximum number of stored rows used in ELPA 1 backtransformation, default 63", 63, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
                        max_stored_rows_cardinality, max_stored_rows_enumerate, max_stored_rows_is_valid, NULL),
Andreas Marek's avatar
Andreas Marek committed
207
#ifdef WITH_OPENMP
208
209
        INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY,
                        omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL),
Andreas Marek's avatar
Andreas Marek committed
210
#else
211
212
        INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL),
Andreas Marek's avatar
Andreas Marek committed
213
#endif
214
215
216
        INT_ENTRY("cannon_buffer_size", "Increasing the buffer size might make it faster, but costs memory", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
                        NULL, NULL, NULL, NULL),
                        //cannon_buffer_size_cardinality, cannon_buffer_size_enumerate, cannon_buffer_size_is_valid, NULL),
217
218
        //BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_REAL),
        BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_REAL),
219
220
221
222
        BOOL_ENTRY("timings", "Enable time measurement", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
        BOOL_ENTRY("debug", "Emit verbose debugging messages", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
        BOOL_ENTRY("print_flops", "Print FLOP rates on task 0", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
        BOOL_ENTRY("check_pd", "Check eigenvalues to be positive", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
223
        BOOL_ENTRY("cannon_for_generalized", "Whether to use Cannons algorithm for the generalized EVP", 1, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
224
225
226
};

#define READONLY_DOUBLE_ENTRY(option_name, option_description) \
227
        { \
228
                BASE_ENTRY(option_name, option_description, 0, 1, 0) \
229
230
231
        }

static const elpa_index_double_entry_t double_entries[] = {
232
        /* Empty for now */
233
};
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
234

235
void elpa_index_free(elpa_index_t index) {
236
237
238
239
240
241
242
#define FREE_OPTION(TYPE, ...) \
        free(index->TYPE##_options.values); \
        free(index->TYPE##_options.is_set); \
        free(index->TYPE##_options.notified);

        FOR_ALL_TYPES(FREE_OPTION);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
243
244
245
246
247
        free(index);
}

static int compar(const void *key, const void *member) {
        const char *name = (const char *) key;
248
        elpa_index_int_entry_t *entry = (elpa_index_int_entry_t *) member;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
249

250
        int l1 = strlen(entry->base.name);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
251
252
253
254
        int l2 = strlen(name);
        if (l1 != l2) {
                return 1;
        }
255
        if (strncmp(name, entry->base.name, l1) == 0) {
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
256
257
258
259
260
261
                return 0;
        } else {
                return 1;
        }
}

262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
#define IMPLEMENT_FIND_ENTRY(TYPE, ...) \
        static int find_##TYPE##_entry(char *name) { \
                elpa_index_##TYPE##_entry_t *entry; \
                size_t nmembers = nelements(TYPE##_entries); \
                entry = lfind((const void*) name, (const void *) TYPE##_entries, &nmembers, sizeof(elpa_index_##TYPE##_entry_t), compar); \
                if (entry) { \
                        return (entry - &TYPE##_entries[0]); \
                } else { \
                        return -1; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_FIND_ENTRY)


#define IMPLEMENT_GETENV(TYPE, PRINTF_SPEC, ...) \
        static int getenv_##TYPE(elpa_index_t index, const char *env_variable, enum NOTIFY_FLAGS notify_flag, int n, TYPE *value, const char *error_string) { \
                int err; \
                char *env_value = getenv(env_variable); \
                if (env_value) { \
281
                        err = elpa_##TYPE##_string_to_value(TYPE##_entries[n].base.name, env_value, value); \
282
283
284
285
286
                        if (err != ELPA_OK) { \
                                fprintf(stderr, "ELPA: Error interpreting environment variable %s with value '%s': %s\n", \
                                                TYPE##_entries[n].base.name, env_value, elpa_strerr(err)); \
                        } else {\
                                const char *value_string = NULL; \
287
                                if (elpa_##TYPE##_value_to_string(TYPE##_entries[n].base.name, *value, &value_string) == ELPA_OK) { \
288
                                        if (!(index->TYPE##_options.notified[n] & notify_flag)) { \
289
290
291
292
                                                if (! elpa_index_int_value_is_set(index, "suppress_warnings")) { \
                                                        fprintf(stderr, "ELPA: %s '%s' is set to %s due to environment variable %s\n", \
                                                                      error_string, TYPE##_entries[n].base.name, value_string, env_variable); \
                                                } \
293
294
295
                                                index->TYPE##_options.notified[n] |= notify_flag; \
                                        } \
                                } else { \
296
297
                                        if (! elpa_index_int_value_is_set(index, "suppress_warnings")) { \
                                                fprintf(stderr, "ELPA: %s '%s' is set to '" PRINTF_SPEC "' due to environment variable %s\n", \
298
                                                        error_string, TYPE##_entries[n].base.name, *value, env_variable);\
299
                                        } \
300
301
302
303
304
305
306
307
308
309
310
311
                                } \
                                return 1; \
                        } \
                } \
                return 0; \
        }
FOR_ALL_TYPES(IMPLEMENT_GETENV)


#define IMPLEMENT_GET_FUNCTION(TYPE, PRINTF_SPEC, ERROR_VALUE) \
        TYPE elpa_index_get_##TYPE##_value(elpa_index_t index, char *name, int *error) { \
                TYPE ret; \
312
313
314
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        int from_env = 0; \
                        if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                                from_env = getenv_##TYPE(index, TYPE##_entries[n].base.env_force, NOTIFY_ENV_FORCE, n, &ret, "Option"); \
                        } \
                        if (!from_env) { \
                                ret = index->TYPE##_options.values[n]; \
                        } \
                        if (error != NULL) { \
                                *error = ELPA_OK; \
                        } \
                        return ret; \
                } else { \
                        if (error != NULL) { \
                                *error = ELPA_ERROR_ENTRY_NOT_FOUND; \
                        } \
                        return ERROR_VALUE; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_GET_FUNCTION)


#define IMPLEMENT_LOC_FUNCTION(TYPE, ...) \
        TYPE* elpa_index_get_##TYPE##_loc(elpa_index_t index, char *name) { \
340
341
342
                if (sizeof(TYPE##_entries) == 0) { \
                        return NULL; \
                } \
343
344
345
346
347
348
349
350
351
352
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        return &index->TYPE##_options.values[n]; \
                } else { \
                        return NULL; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_LOC_FUNCTION)


353
#define IMPLEMENT_SET_FUNCTION(TYPE, PRINTF_SPEC, ...) \
354
        int elpa_index_set_##TYPE##_value(elpa_index_t index, char *name, TYPE value) { \
355
356
357
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
358
359
360
361
362
363
                int n = find_##TYPE##_entry(name); \
                if (n < 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                }; \
                if (TYPE##_entries[n].valid != NULL) { \
                        if(!TYPE##_entries[n].valid(index, n, value)) { \
364
                                return ELPA_ERROR_ENTRY_INVALID_VALUE; \
365
366
367
                        }; \
                } \
                if (TYPE##_entries[n].base.once & index->TYPE##_options.is_set[n]) { \
368
369
                        return ELPA_ERROR_ENTRY_ALREADY_SET; \
                } \
370
                if (TYPE##_entries[n].base.readonly) { \
371
                        return ELPA_ERROR_ENTRY_READONLY; \
372
373
374
375
376
377
378
379
380
381
                } \
                index->TYPE##_options.values[n] = value; \
                index->TYPE##_options.is_set[n] = 1; \
                return ELPA_OK; \
        }
FOR_ALL_TYPES(IMPLEMENT_SET_FUNCTION)


#define IMPLEMENT_IS_SET_FUNCTION(TYPE, ...) \
        int elpa_index_##TYPE##_value_is_set(elpa_index_t index, char *name) { \
382
383
384
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        if (index->TYPE##_options.is_set[n]) { \
                                return 1; \
                        } else { \
                                return 0; \
                        } \
                } else { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_IS_SET_FUNCTION)


int elpa_index_value_is_set(elpa_index_t index, char *name) {
        int res = ELPA_ERROR;

#define RET_IF_SET(TYPE, ...) \
        res = elpa_index_##TYPE##_value_is_set(index, name); \
        if (res >= 0) { \
                return res; \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
406
        }
407
408
409

        FOR_ALL_TYPES(RET_IF_SET)

410
411
412
413
        fprintf(stderr, "ELPA Error: Could not find entry '%s'\n", name);
        return res;
}

414
415
416
417
418
int elpa_index_int_is_valid(elpa_index_t index, char *name, int new_value) {
        int n = find_int_entry(name); \
        if (n >= 0) { \
                if (int_entries[n].valid == NULL) {
                        return ELPA_OK;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
419
                } else {
420
                        return int_entries[n].valid(index, n, new_value) ? ELPA_OK : ELPA_ERROR;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
421
422
                }
        }
423
        return ELPA_ERROR_ENTRY_NOT_FOUND;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
424
425
}

426
int elpa_int_value_to_string(char *name, int value, const char **string) {
427
428
429
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
430
        }
431
        if (int_entries[n].to_string == NULL) {
432
                return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
433
434
435
        }
        *string = int_entries[n].to_string(value);
        return ELPA_OK;
436
437
}

438
439

int elpa_int_value_to_strlen(char *name, int value) {
440
        const char *string = NULL;
441
        elpa_int_value_to_string(name, value, &string);
442
        if (string == NULL) {
443
444
445
                return 0;
        } else {
                return strlen(string);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
446
447
        }
}
448

449
450
451
452
453
454

int elpa_index_int_value_to_strlen(elpa_index_t index, char *name) {
        int n = find_int_entry(name);
        if (n < 0) {
                return 0;
        }
455
        return elpa_int_value_to_strlen(name, index->int_options.values[n]);
456
457
458
459
}


int elpa_int_string_to_value(char *name, char *string, int *value) {
460
461
462
463
464
465
466
467
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }

        if (int_entries[n].to_string == NULL) {
                int val, ret;
                ret = sscanf(string, "%d", &val);
468
                if (ret == 1) {
469
                        *value = val;
470
471
                        return ELPA_OK;
                } else {
472
                        return ELPA_ERROR_ENTRY_INVALID_VALUE;
473
474
475
                }
        }

476
477
        for (int i = 0; i < int_entries[n].cardinality(NULL); i++) {
                int candidate = int_entries[n].enumerate(NULL, i);
478
479
480
                if (strcmp(string, int_entries[n].to_string(candidate)) == 0) {
                        *value = candidate;
                        return ELPA_OK;
481
                }
482
        }
483
        return ELPA_ERROR_ENTRY_INVALID_VALUE;
484
485
}

486
int elpa_double_string_to_value(char *name, char *string, double *value) {
487
488
        double val;
        int ret = sscanf(string, "%lf", &val);
489
        if (ret == 1) {
490
491
                *value = val;
                return ELPA_OK;
492
        } else {
493
494
                /* \todo: remove */
                fprintf(stderr, "ELPA: DEBUG: Could not parse double value '%s' for option '%s'\n", string, name);
495
                return ELPA_ERROR_ENTRY_INVALID_VALUE;
496
497
498
        }
}

499
int elpa_double_value_to_string(char *name, double value, const char **string) {
500
        return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
501
}
502

503
int elpa_option_cardinality(char *name) {
504
505
506
507
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].cardinality) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }
508
        return int_entries[n].cardinality(NULL);
509
}
510

511
int elpa_option_enumerate(char *name, int i) {
512
513
514
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].enumerate) {
                return 0;
515
        }
516
        return int_entries[n].enumerate(NULL, i);
517
518
}

519

520
/* Helper functions for simple int entries */
521
static int cardinality_bool(elpa_index_t index) {
522
523
        return 2;
}
524

525
526
static int valid_bool(elpa_index_t index, int n, int new_value) {
        return (0 <= new_value) && (new_value < 2);
527
528
}

529
static int enumerate_identity(elpa_index_t index, int i) {
530
531
532
        return i;
}

533
534
535
536
537
538
539
540
541
542
/* Helper functions for specific options */

#define NAME_CASE(name, value, ...) \
        case value: \
                return #name;

#define VALID_CASE(name, value) \
        case value: \
                return 1;

543
#define VALID_CASE_3(name, value, available, other_checks) \
544
        case value: \
545
                return available && (other_checks(value));
546
547
548
549
550
551

static const char* elpa_solver_name(int solver) {
        switch(solver) {
                ELPA_FOR_ALL_SOLVERS(NAME_CASE)
                default:
                        return "(Invalid solver)";
552
553
554
        }
}

555
static int number_of_solvers(elpa_index_t index) {
556
        return ELPA_NUMBER_OF_SOLVERS;
557
558
}

559
static int solver_enumerate(elpa_index_t index, int i) {
560
#define OPTION_RANK(name, value, ...) \
561
        +(value >= sizeof(array_of_size_value)/sizeof(int) ? 0 : 1)
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578

#define EMPTY()
#define DEFER1(m) m EMPTY()
#define EVAL(...) __VA_ARGS__

#define ENUMERATE_CASE(name, value, ...) \
        { const int array_of_size_value[value]; \
        case 0 DEFER1(INNER_ITERATOR)()(OPTION_RANK): \
                return value; }

        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_SOLVERS
                EVAL(ELPA_FOR_ALL_SOLVERS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
579
580
581
}


Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
582
static int solver_is_valid(elpa_index_t index, int n, int new_value) {
583
584
585
586
587
        switch(new_value) {
                ELPA_FOR_ALL_SOLVERS(VALID_CASE)
                default:
                        return 0;
        }
588
589
}

590
static int number_of_real_kernels(elpa_index_t index) {
591
592
        return ELPA_2STAGE_NUMBER_OF_REAL_KERNELS;
}
593

594
static int real_kernel_enumerate(elpa_index_t index,int i) {
595
596
597
598
599
600
601
602
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_REAL_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_REAL_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}
603

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
604
static const char *real_kernel_name(int kernel) {
605
606
607
608
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(NAME_CASE)
                default:
                        return "(Invalid real kernel)";
609
        }
610
}
611

612
613
614
#define REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_REAL_GPU ? gpu_is_active : 1

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
615
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value) {
616
617
618
619
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_REAL_DEFAULT;
        }
620
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
621
        switch(new_value) {
622
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(VALID_CASE_3, REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
623
624
                default:
                        return 0;
625
        }
626
}
627

628
static int number_of_complex_kernels(elpa_index_t index) {
629
630
        return ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS;
}
631

632

633
static int complex_kernel_enumerate(elpa_index_t index,int i) {
634
635
636
637
638
639
640
641
642
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
643
static const char *complex_kernel_name(int kernel) {
644
645
646
647
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(NAME_CASE)
                default:
                        return "(Invalid complex kernel)";
648
        }
649
}
650

651
652
653
#define COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_COMPLEX_GPU ? gpu_is_active : 1

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
654
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value) {
655
656
657
658
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_COMPLEX_DEFAULT;
        }
659
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
660
        switch(new_value) {
661
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(VALID_CASE_3, COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
662
663
664
665
                default:
                        return 0;
        }
}
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
666
667
668
669
670

static int na_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

671
672
673
674
static int nev_is_valid(elpa_index_t index, int n, int new_value) {
        if (!elpa_index_int_value_is_set(index, "na")) {
                return 0;
        }
675
        return 0 <= new_value && new_value <= elpa_index_get_int_value(index, "na", NULL);
676
677
678
679
680
681
}

static int is_positive(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
682
683
684
685
686
687
688
689
690
static int bw_is_valid(elpa_index_t index, int n, int new_value) {
        int na;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }

        na = elpa_index_get_int_value(index, "na", NULL);
        return (0 <= new_value) && (new_value < na);
}
691

692
693
694
695
static int gpu_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value == 0 || new_value == 1;
}

696
static int band_to_full_cardinality(elpa_index_t index) {
697
	return 10;
698
699
}

700
static int band_to_full_enumerate(elpa_index_t index, int i) {
701
	return i+1;
702
703
704
}

static int band_to_full_is_valid(elpa_index_t index, int n, int new_value) {
705
706
	int max_block=10;
        return (1 <= new_value) && (new_value <= max_block);
707
708
}

Pavel Kus's avatar
Pavel Kus committed
709
static int omp_threads_cardinality(elpa_index_t index) {
Andreas Marek's avatar
Andreas Marek committed
710
711
712
713
714
715
716
717
718
719
720
721
722
723
	int max_threads;
#ifdef WITH_OPENMP
	if (set_max_threads_glob == 0) {
		max_threads_glob = omp_get_max_threads();
		set_max_threads_glob = 1;
	}
#else
	max_threads_glob = 1;
	set_max_threads_glob = 1;
#endif
	max_threads = max_threads_glob;
	return max_threads;
}

Pavel Kus's avatar
Pavel Kus committed
724
static int omp_threads_enumerate(elpa_index_t index, int i) {
Andreas Marek's avatar
Andreas Marek committed
725
726
727
728
729
730
        return i + 1;
}

static int omp_threads_is_valid(elpa_index_t index, int n, int new_value) {
        int max_threads;
#ifdef WITH_OPENMP
731
732
733
734
	if (set_max_threads_glob == 0) {
		max_threads_glob = omp_get_max_threads();
		set_max_threads_glob = 1;
	}
Andreas Marek's avatar
Andreas Marek committed
735
#else
736
737
	max_threads_glob = 1;
	set_max_threads_glob = 1;
Andreas Marek's avatar
Andreas Marek committed
738
#endif
739
	max_threads = max_threads_glob;
Andreas Marek's avatar
Andreas Marek committed
740
        return (1 <= new_value) && (new_value <= max_threads);
Andreas Marek's avatar
Andreas Marek committed
741
742
}

743

744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
static int valid_with_gpu(elpa_index_t index, int n, int new_value) {
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
        if (gpu_is_active == 1) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

static int valid_with_gpu_elpa1(elpa_index_t index, int n, int new_value) {
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
        if ((solver == ELPA_SOLVER_1STAGE) && (gpu_is_active == 1)) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

static int valid_with_gpu_elpa2(elpa_index_t index, int n, int new_value) {
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
        if ((solver == ELPA_SOLVER_2STAGE) && (gpu_is_active == 1)) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
static int max_stored_rows_cardinality(elpa_index_t index) {
	return 8;
}

static int max_stored_rows_enumerate(elpa_index_t index, int i) {
	switch(i) {
	  case 0:
	    return 15;
	  case 1:
	    return 31;
	  case 2:
	    return 47;
	  case 3:
	    return 63;
	  case 4:
	    return 79;
	  case 5:
	    return 95;
	  case 6:
	    return 111;
	  case 7:
	    return 127;
	}
}

static int max_stored_rows_is_valid(elpa_index_t index, int n, int new_value) {
802
803
804
805
806
807
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_2STAGE) {
                return new_value == 15;
        } else {
                return (15 <= new_value) && (new_value <= 127);
        }
808
809
810
}


811
812
813
// TODO: this shoudl definitely be improved (too many options to test in autotuning)
static const int TILE_SIZE_STEP = 128;

814
static int min_tile_size_cardinality(elpa_index_t index) {
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
        int na;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);
        return na/TILE_SIZE_STEP;
}

static int min_tile_size_enumerate(elpa_index_t index, int i) {
        return (i+1) * TILE_SIZE_STEP;
}

static int min_tile_size_is_valid(elpa_index_t index, int n, int new_value) {
       return new_value % TILE_SIZE_STEP == 0;
831
}
832

833
static int intermediate_bandwidth_cardinality(elpa_index_t index) {
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
        int na, nblk;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);

        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

        return na/nblk;
}

static int intermediate_bandwidth_enumerate(elpa_index_t index, int i) {
        int nblk;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

        return (i+1) * nblk;
}

static int intermediate_bandwidth_is_valid(elpa_index_t index, int n, int new_value) {
        int na, nblk;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);

        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

874
875
876
877
878
879
880
881
882
883
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == nblk;
        } else {
                if((new_value <= 1 ) || (new_value > na ))
                  return 0;
                if(new_value % nblk != 0) {
                  fprintf(stderr, "intermediate bandwidth has to be multiple of nblk\n");
                  return 0;
                }
884
        }
885
886
}

887
elpa_index_t elpa_index_instance() {
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
        elpa_index_t index = (elpa_index_t) calloc(1, sizeof(struct elpa_index_struct));

#define ALLOCATE(TYPE, PRINTF_SPEC, ...) \
        index->TYPE##_options.values = (TYPE*) calloc(nelements(TYPE##_entries), sizeof(TYPE)); \
        index->TYPE##_options.is_set = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        index->TYPE##_options.notified = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        for (int n = 0; n < nelements(TYPE##_entries); n++) { \
                TYPE default_value = TYPE##_entries[n].default_value; \
                if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                        getenv_##TYPE(index, TYPE##_entries[n].base.env_default, NOTIFY_ENV_DEFAULT, n, &default_value, "Default for option"); \
                } \
                index->TYPE##_options.values[n] = default_value; \
        }

        FOR_ALL_TYPES(ALLOCATE)

        return index;
905
}
906

907
908
909
910
911
912
913
914
915
916
917
918
static int is_tunable(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
        return (int_entries[i].autotune_level != 0) &&
               (int_entries[i].autotune_level <= autotune_level) &&
               (int_entries[i].autotune_domain & autotune_domain) &&
               (!index->int_options.is_set[i]);
}

int elpa_index_autotune_cardinality(elpa_index_t index, int autotune_level, int autotune_domain) {
        int N = 1;

        for (int i = 0; i < nelements(int_entries); i++) { \
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
919
                        N *= int_entries[i].cardinality(index);
920
921
922
923
924
925
                }
        }
        return N;
}

int elpa_index_set_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int n) {
926
        int n_original = n;
927
928
929
        int debug = elpa_index_get_int_value(index, "debug", NULL);
        for (int i = 0; i < nelements(int_entries); i++) {
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
930
                        int value = int_entries[i].enumerate(index, n % int_entries[i].cardinality(index));
931
932
933
934
935
936
                        /* Try to set option i to that value */
                        if (int_entries[i].valid(index, i, value)) {
                                index->int_options.values[i] = value;
                        } else {
                                return 0;
                        }
937
                        n /= int_entries[i].cardinality(index);
938
939
940
                }
        }
        if (debug == 1) {
941
                fprintf(stderr, "\n*** AUTOTUNING: setting a new combination of parameters, idx %d ***\n", n_original);
942
943
944
945
946
947
948
949
950
951
                for (int i = 0; i < nelements(int_entries); i++) {
                        if (is_tunable(index, i, autotune_level, autotune_domain)) {
                                fprintf(stderr, "%s = ", int_entries[i].base.name);
                                if (int_entries[i].to_string) {
                                        fprintf(stderr, "%s\n", int_entries[i].to_string(index->int_options.values[i]));
                                } else {
                                        fprintf(stderr, "%d\n", index->int_options.values[i]);
                                }
                        }
                }
952
                fprintf(stderr, "***\n\n");
953
954
955
956
957
        }

        /* Could set all values */
        return 1;
}
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987

int elpa_index_print_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int n) {
        //int debug = elpa_index_get_int_value(index, "debug", NULL);
        //for (int i = 0; i < nelements(int_entries); i++) {
        //        if (is_tunable(index, i, autotune_level, autotune_domain)) {
        //                int value = int_entries[i].enumerate(n % int_entries[i].cardinality());
        //                /* Try to set option i to that value */
        //                if (int_entries[i].valid(index, i, value)) {
        //                        index->int_options.values[i] = value;
        //                } else {
        //                        return 0;
        //                }
        //                n /= int_entries[i].cardinality();
        //        }
        //}
        for (int i = 0; i < nelements(int_entries); i++) {
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
                        fprintf(stderr, " %s = ", int_entries[i].base.name);
                        if (int_entries[i].to_string) {
                                fprintf(stderr, " %s\n", int_entries[i].to_string(index->int_options.values[i]));
                        } else {
                                fprintf(stderr, " %d\n", index->int_options.values[i]);
                        }
                }
        }
        fprintf(stderr, "\n");

        /* Could set all values */
        return 1;
}