elpa_index.c 55.7 KB
Newer Older
1
//    This file is part of ELPA.
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
//
//    The ELPA library was originally created by the ELPA consortium,
//    consisting of the following organizations:
//
//    - Max Planck Computing and Data Facility (MPCDF), formerly known as
//      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
//    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
//      Informatik,
//    - Technische Universität München, Lehrstuhl für Informatik mit
//      Schwerpunkt Wissenschaftliches Rechnen ,
//    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
//    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
//      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
//      and
//    - IBM Deutschland GmbH
//
//    This particular source code file contains additions, changes and
//    enhancements authored by Intel Corporation which is not part of
//    the ELPA consortium.
//
//    More information can be found here:
//    http://elpa.mpcdf.mpg.de/
//
//    ELPA is free software: you can redistribute it and/or modify
//    it under the terms of the version 3 of the license of the
//    GNU Lesser General Public License as published by the Free
//    Software Foundation.
//
//    ELPA is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU Lesser General Public License for more details.
//
//    You should have received a copy of the GNU Lesser General Public License
//    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
//
//    ELPA reflects a substantial effort on the part of the original
//    ELPA consortium, and we ask you to respect the spirit of the
//    license that we chose: i.e., please contribute any changes you
//    may have back to the original ELPA library distribution, and keep
//    any derivatives of ELPA under the same license that we chose for
//    the original distribution, the GNU Lesser General Public License.
//
//    Authors: L. Huedepohl and A. Marek, MPCDF
46
#include <assert.h>
Pavel Kus's avatar
Pavel Kus committed
47 48
#include <stdio.h>
#include <stdlib.h>
49
#include <elpa/elpa.h>
50
#include "elpa_index.h"
51

52 53
#include <execinfo.h>

Andreas Marek's avatar
Andreas Marek committed
54 55 56 57 58 59 60 61 62
#include "config.h"

#ifdef WITH_OPENMP
#include <omp.h>
#endif

int max_threads_glob;
int set_max_threads_glob=0;

63 64
static int enumerate_identity(elpa_index_t index, int i);
static int cardinality_bool(elpa_index_t index);
65 66
static int valid_bool(elpa_index_t index, int n, int new_value);

67 68
static int number_of_solvers(elpa_index_t index);
static int solver_enumerate(elpa_index_t index, int i);
69
static int solver_is_valid(elpa_index_t index, int n, int new_value);
70 71
static const char* elpa_solver_name(int solver);

72 73
static int number_of_real_kernels(elpa_index_t index);
static int real_kernel_enumerate(elpa_index_t index, int i);
74 75
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *real_kernel_name(int kernel);
76

77 78
static int number_of_complex_kernels(elpa_index_t index);
static int complex_kernel_enumerate(elpa_index_t index, int i);
79 80
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *complex_kernel_name(int kernel);
81

82 83
static int band_to_full_cardinality(elpa_index_t index);
static int band_to_full_enumerate(elpa_index_t index, int i);
84 85
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value);

86 87 88 89 90 91 92 93
static int stripewidth_real_cardinality(elpa_index_t index);
static int stripewidth_real_enumerate(elpa_index_t index, int i);
static int stripewidth_real_is_valid(elpa_index_t index, int n, int new_value);

static int stripewidth_complex_cardinality(elpa_index_t index);
static int stripewidth_complex_enumerate(elpa_index_t index, int i);
static int stripewidth_complex_is_valid(elpa_index_t index, int n, int new_value);

Pavel Kus's avatar
Pavel Kus committed
94 95 96
static int omp_threads_cardinality(elpa_index_t index);
static int omp_threads_enumerate(elpa_index_t index, int i);
static int omp_threads_is_valid(elpa_index_t index, int n, int new_value);
Andreas Marek's avatar
Andreas Marek committed
97

98 99 100 101
static int max_stored_rows_cardinality(elpa_index_t index);
static int max_stored_rows_enumerate(elpa_index_t index, int i);
static int max_stored_rows_is_valid(elpa_index_t index, int n, int new_value);

102
static int min_tile_size_cardinality(elpa_index_t index);
103 104 105 106 107 108
static int min_tile_size_enumerate(elpa_index_t index, int i);
static int min_tile_size_is_valid(elpa_index_t index, int n, int new_value);

static int valid_with_gpu(elpa_index_t index, int n, int new_value);
static int valid_with_gpu_elpa1(elpa_index_t index, int n, int new_value);
static int valid_with_gpu_elpa2(elpa_index_t index, int n, int new_value);
109

110
static int intermediate_bandwidth_cardinality(elpa_index_t index);
111 112
static int intermediate_bandwidth_enumerate(elpa_index_t index, int i);
static int intermediate_bandwidth_is_valid(elpa_index_t index, int n, int new_value);
113

114 115 116 117
static int cannon_buffer_size_cardinality(elpa_index_t index);
static int cannon_buffer_size_enumerate(elpa_index_t index, int i);
static int cannon_buffer_size_is_valid(elpa_index_t index, int n, int new_value);

118
static int na_is_valid(elpa_index_t index, int n, int new_value);
119
static int nev_is_valid(elpa_index_t index, int n, int new_value);
120
static int bw_is_valid(elpa_index_t index, int n, int new_value);
121
static int gpu_is_valid(elpa_index_t index, int n, int new_value);
122

123 124
static int is_positive(elpa_index_t index, int n, int new_value);

125 126
static int elpa_double_string_to_value(char *name, char *string, double *value);
static int elpa_double_value_to_string(char *name, double value, const char **string);
127

Pavel Kus's avatar
Pavel Kus committed
128
#define BASE_ENTRY(option_name, option_description, once_value, readonly_value, print_flag_value) \
129 130 131 132 133 134 135
                .base = { \
                        .name = option_name, \
                        .description = option_description, \
                        .once = once_value, \
                        .readonly = readonly_value, \
                        .env_default = "ELPA_DEFAULT_" option_name, \
                        .env_force = "ELPA_FORCE_" option_name, \
Pavel Kus's avatar
Pavel Kus committed
136
                        .print_flag = print_flag_value, \
137
                }
138

Pavel Kus's avatar
Pavel Kus committed
139
#define INT_PARAMETER_ENTRY(option_name, option_description, valid_func, print_flag) \
140
        { \
Pavel Kus's avatar
Pavel Kus committed
141
                BASE_ENTRY(option_name, option_description, 1, 0, print_flag), \
142
                .valid = valid_func, \
143
        }
144

Pavel Kus's avatar
Pavel Kus committed
145
#define BOOL_ENTRY(option_name, option_description, default, tune_level, tune_domain, print_flag) \
146
        { \
Pavel Kus's avatar
Pavel Kus committed
147
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
148
                .default_value = default, \
149 150
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
151 152 153
                .cardinality = cardinality_bool, \
                .enumerate = enumerate_identity, \
                .valid = valid_bool, \
154 155
        }

Pavel Kus's avatar
Pavel Kus committed
156
#define INT_ENTRY(option_name, option_description, default, tune_level, tune_domain, card_func, enumerate_func, valid_func, to_string_func, print_flag) \
157
        { \
Pavel Kus's avatar
Pavel Kus committed
158
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
159
                .default_value = default, \
160 161
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
162 163 164 165
                .cardinality = card_func, \
                .enumerate = enumerate_func, \
                .valid = valid_func, \
                .to_string = to_string_func, \
166 167
        }

Pavel Kus's avatar
Pavel Kus committed
168
#define INT_ANY_ENTRY(option_name, option_description, print_flag) \
169
        { \
Pavel Kus's avatar
Pavel Kus committed
170
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
171 172
        }

173 174
/* The order here is important! Tunable options that are dependent on other
 * tunable options must appear later in the list than their prerequisites */
175
static const elpa_index_int_entry_t int_entries[] = {
Pavel Kus's avatar
Pavel Kus committed
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
        INT_PARAMETER_ENTRY("na", "Global matrix has size (na * na)", na_is_valid, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("nev", "Number of eigenvectors to be computed, 0 <= nev <= na", nev_is_valid, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("nblk", "Block size of scalapack block-cyclic distribution", is_positive, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("local_nrows", "Number of matrix rows stored on this process", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("local_ncols", "Number of matrix columns stored on this process", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("process_row", "Process row number in the 2D domain decomposition", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("process_col", "Process column number in the 2D domain decomposition", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("process_id", "Process rank", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("is_process_id_zero", "Is it a process with rank zero?", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("num_process_rows", "Number of process row number in the 2D domain decomposition", NULL, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("num_process_cols", "Number of process column number in the 2D domain decomposition", NULL, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("num_processes", "Total number of processes", NULL, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("bandwidth", "If specified, a band matrix with this bandwidth is expected as input; bandwidth must be multiply of nblk", bw_is_valid, PRINT_YES),
        INT_ANY_ENTRY("mpi_comm_rows", "Communicator for inter-row communication", PRINT_NO),
        INT_ANY_ENTRY("mpi_comm_cols", "Communicator for inter-column communication", PRINT_NO),
        INT_ANY_ENTRY("mpi_comm_parent", "Parent communicator", PRINT_NO),
        INT_ANY_ENTRY("blacs_context", "BLACS context", PRINT_NO),
193
        INT_ANY_ENTRY("legacy_api", "This object has been created through the legacy api. Parameter for internal use only", PRINT_NO),
194
        INT_ENTRY("solver", "Solver to use", ELPA_SOLVER_1STAGE, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
195
                        number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name, PRINT_YES),
196
        INT_ENTRY("gpu", "Use GPU acceleration", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
197
                        cardinality_bool, enumerate_identity, gpu_is_valid, NULL, PRINT_YES),
198 199
        //default of gpu ussage for individual phases is 1. However, it is only evaluated, if GPU is used at all, which first has to be determined
        //by the parameter gpu and presence of the device
200
        INT_ENTRY("gpu_tridiag", "Use GPU acceleration for ELPA1 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
201
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa1, NULL, PRINT_YES),
202
        INT_ENTRY("gpu_solve_tridi", "Use GPU acceleration for ELPA solve tridi", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
203
                        cardinality_bool, enumerate_identity, valid_with_gpu, NULL, PRINT_YES),
204
        INT_ENTRY("gpu_trans_ev", "Use GPU acceleration for ELPA1 trans ev", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
205
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa1, NULL, PRINT_YES),
206
        INT_ENTRY("gpu_bandred", "Use GPU acceleration for ELPA2 band reduction", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
207
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
208
        INT_ENTRY("gpu_tridiag_band", "Use GPU acceleration for ELPA2 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
209
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
210
        INT_ENTRY("gpu_trans_ev_tridi_to_band", "Use GPU acceleration for ELPA2 trans_ev_tridi_to_band", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
211
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
212
        INT_ENTRY("gpu_trans_ev_band_to_full", "Use GPU acceleration for ELPA2 trans_ev_band_to_full", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
213
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
214
        INT_ENTRY("real_kernel", "Real kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_REAL_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, \
Pavel Kus's avatar
Pavel Kus committed
215
                        number_of_real_kernels, real_kernel_enumerate, real_kernel_is_valid, real_kernel_name, PRINT_YES),
216
        INT_ENTRY("complex_kernel", "Complex kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_COMPLEX_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_COMPLEX, \
Pavel Kus's avatar
Pavel Kus committed
217
                        number_of_complex_kernels, complex_kernel_enumerate, complex_kernel_is_valid, complex_kernel_name, PRINT_YES),
218

219
        INT_ENTRY("min_tile_size", "Minimal tile size used internally in elpa1_tridiag and elpa2_bandred", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
220
                        min_tile_size_cardinality, min_tile_size_enumerate, min_tile_size_is_valid, NULL, PRINT_YES),
221
        INT_ENTRY("intermediate_bandwidth", "Specifies the intermediate bandwidth in ELPA2 full->banded step. Must be a multiple of nblk", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
222
                        intermediate_bandwidth_cardinality, intermediate_bandwidth_enumerate, intermediate_bandwidth_is_valid, NULL, PRINT_YES),
223

224
        INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
225
                        band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL, PRINT_YES),
226
        INT_ENTRY("stripewidth_real", "Stripewidth_real, default 48. Must be a multiple of 4", 48, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_REAL,
227
                        stripewidth_real_cardinality, stripewidth_real_enumerate, stripewidth_real_is_valid, NULL, PRINT_YES),
228
        INT_ENTRY("stripewidth_complex", "Stripewidth_complex, default 96. Must be a multiple of 8", 96, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_COMPLEX,
229 230
                        stripewidth_complex_cardinality, stripewidth_complex_enumerate, stripewidth_complex_is_valid, NULL, PRINT_YES),

231
        INT_ENTRY("max_stored_rows", "Maximum number of stored rows used in ELPA 1 backtransformation, default 63", 63, ELPA_AUTOTUNE_EXTENSIVE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
232
                        max_stored_rows_cardinality, max_stored_rows_enumerate, max_stored_rows_is_valid, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
233
#ifdef WITH_OPENMP
234
        INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
235
                        omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
236
#else
237
        INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
238
                        omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
239
#endif
240
        INT_ENTRY("cannon_buffer_size", "Increasing the buffer size might make it faster, but costs memory", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
241
                        cannon_buffer_size_cardinality, cannon_buffer_size_enumerate, cannon_buffer_size_is_valid, NULL, PRINT_YES),
242
        //BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_REAL),
Pavel Kus's avatar
Pavel Kus committed
243 244 245 246 247 248
        BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_REAL, PRINT_YES),
        BOOL_ENTRY("timings", "Enable time measurement", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("debug", "Emit verbose debugging messages", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("print_flops", "Print FLOP rates on task 0", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("check_pd", "Check eigenvalues to be positive", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("cannon_for_generalized", "Whether to use Cannons algorithm for the generalized EVP", 1, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
249 250 251
};

#define READONLY_DOUBLE_ENTRY(option_name, option_description) \
252
        { \
253
                BASE_ENTRY(option_name, option_description, 0, 1, 0) \
254 255 256
        }

static const elpa_index_double_entry_t double_entries[] = {
257
        /* Empty for now */
258
};
259

260
void elpa_index_free(elpa_index_t index) {
261 262 263 264 265 266 267
#define FREE_OPTION(TYPE, ...) \
        free(index->TYPE##_options.values); \
        free(index->TYPE##_options.is_set); \
        free(index->TYPE##_options.notified);

        FOR_ALL_TYPES(FREE_OPTION);

268 269 270 271 272
        free(index);
}

static int compar(const void *key, const void *member) {
        const char *name = (const char *) key;
273
        elpa_index_int_entry_t *entry = (elpa_index_int_entry_t *) member;
274

275
        int l1 = strlen(entry->base.name);
276 277 278 279
        int l2 = strlen(name);
        if (l1 != l2) {
                return 1;
        }
280
        if (strncmp(name, entry->base.name, l1) == 0) {
281 282 283 284 285 286
                return 0;
        } else {
                return 1;
        }
}

287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
#define IMPLEMENT_FIND_ENTRY(TYPE, ...) \
        static int find_##TYPE##_entry(char *name) { \
                elpa_index_##TYPE##_entry_t *entry; \
                size_t nmembers = nelements(TYPE##_entries); \
                entry = lfind((const void*) name, (const void *) TYPE##_entries, &nmembers, sizeof(elpa_index_##TYPE##_entry_t), compar); \
                if (entry) { \
                        return (entry - &TYPE##_entries[0]); \
                } else { \
                        return -1; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_FIND_ENTRY)


#define IMPLEMENT_GETENV(TYPE, PRINTF_SPEC, ...) \
        static int getenv_##TYPE(elpa_index_t index, const char *env_variable, enum NOTIFY_FLAGS notify_flag, int n, TYPE *value, const char *error_string) { \
                int err; \
304
                int is_process_id_zero = elpa_index_get_int_value(index, "is_process_id_zero", NULL); \
305 306
                char *env_value = getenv(env_variable); \
                if (env_value) { \
307
                        err = elpa_##TYPE##_string_to_value(TYPE##_entries[n].base.name, env_value, value); \
308 309 310 311 312
                        if (err != ELPA_OK) { \
                                fprintf(stderr, "ELPA: Error interpreting environment variable %s with value '%s': %s\n", \
                                                TYPE##_entries[n].base.name, env_value, elpa_strerr(err)); \
                        } else {\
                                const char *value_string = NULL; \
313
                                if (elpa_##TYPE##_value_to_string(TYPE##_entries[n].base.name, *value, &value_string) == ELPA_OK) { \
314
                                        if (!(index->TYPE##_options.notified[n] & notify_flag)) { \
315
                                                if (is_process_id_zero == 1) { \
316 317 318
                                                        fprintf(stderr, "ELPA: %s '%s' is set to %s due to environment variable %s\n", \
                                                                      error_string, TYPE##_entries[n].base.name, value_string, env_variable); \
                                                } \
319 320 321
                                                index->TYPE##_options.notified[n] |= notify_flag; \
                                        } \
                                } else { \
322
                                        if (is_process_id_zero == 1) { \
323
                                                fprintf(stderr, "ELPA: %s '%s' is set to '" PRINTF_SPEC "' due to environment variable %s\n", \
324
                                                        error_string, TYPE##_entries[n].base.name, *value, env_variable);\
325
                                        } \
326 327 328 329 330 331 332 333 334
                                } \
                                return 1; \
                        } \
                } \
                return 0; \
        }
FOR_ALL_TYPES(IMPLEMENT_GETENV)


335
#define IMPLEMENT_GET_FUNCTION(TYPE, PRINTF_SPEC, SCANF_SPEC, ERROR_VALUE) \
336 337
        TYPE elpa_index_get_##TYPE##_value(elpa_index_t index, char *name, int *error) { \
                TYPE ret; \
338 339 340
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        int from_env = 0; \
                        if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                                from_env = getenv_##TYPE(index, TYPE##_entries[n].base.env_force, NOTIFY_ENV_FORCE, n, &ret, "Option"); \
                        } \
                        if (!from_env) { \
                                ret = index->TYPE##_options.values[n]; \
                        } \
                        if (error != NULL) { \
                                *error = ELPA_OK; \
                        } \
                        return ret; \
                } else { \
                        if (error != NULL) { \
                                *error = ELPA_ERROR_ENTRY_NOT_FOUND; \
                        } \
                        return ERROR_VALUE; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_GET_FUNCTION)


#define IMPLEMENT_LOC_FUNCTION(TYPE, ...) \
        TYPE* elpa_index_get_##TYPE##_loc(elpa_index_t index, char *name) { \
366 367 368
                if (sizeof(TYPE##_entries) == 0) { \
                        return NULL; \
                } \
369 370 371 372 373 374 375 376 377 378
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        return &index->TYPE##_options.values[n]; \
                } else { \
                        return NULL; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_LOC_FUNCTION)


379
#define IMPLEMENT_SET_FUNCTION(TYPE, PRINTF_SPEC, ...) \
380
        int elpa_index_set_##TYPE##_value(elpa_index_t index, char *name, TYPE value) { \
381 382 383
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
384 385 386 387 388 389
                int n = find_##TYPE##_entry(name); \
                if (n < 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                }; \
                if (TYPE##_entries[n].valid != NULL) { \
                        if(!TYPE##_entries[n].valid(index, n, value)) { \
390
                                return ELPA_ERROR_ENTRY_INVALID_VALUE; \
391 392 393
                        }; \
                } \
                if (TYPE##_entries[n].base.once & index->TYPE##_options.is_set[n]) { \
394 395
                        return ELPA_ERROR_ENTRY_ALREADY_SET; \
                } \
396
                if (TYPE##_entries[n].base.readonly) { \
397
                        return ELPA_ERROR_ENTRY_READONLY; \
398 399 400 401 402 403 404
                } \
                index->TYPE##_options.values[n] = value; \
                index->TYPE##_options.is_set[n] = 1; \
                return ELPA_OK; \
        }
FOR_ALL_TYPES(IMPLEMENT_SET_FUNCTION)

Pavel Kus's avatar
Pavel Kus committed
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
#define IMPLEMENT_SET_FROM_LOAD_FUNCTION(TYPE, PRINTF_SPEC, ...) \
        int elpa_index_set_from_load_##TYPE##_value(elpa_index_t index, char *name, TYPE value, int explicit) { \
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
                int n = find_##TYPE##_entry(name); \
                if (n < 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                }; \
                index->TYPE##_options.values[n] = value; \
                if(explicit) \
                        index->TYPE##_options.is_set[n] = 1; \
                return ELPA_OK; \
        }
FOR_ALL_TYPES(IMPLEMENT_SET_FROM_LOAD_FUNCTION)

421 422 423

#define IMPLEMENT_IS_SET_FUNCTION(TYPE, ...) \
        int elpa_index_##TYPE##_value_is_set(elpa_index_t index, char *name) { \
424 425 426
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        if (index->TYPE##_options.is_set[n]) { \
                                return 1; \
                        } else { \
                                return 0; \
                        } \
                } else { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_IS_SET_FUNCTION)


int elpa_index_value_is_set(elpa_index_t index, char *name) {
        int res = ELPA_ERROR;

#define RET_IF_SET(TYPE, ...) \
        res = elpa_index_##TYPE##_value_is_set(index, name); \
        if (res >= 0) { \
                return res; \
448
        }
449 450 451

        FOR_ALL_TYPES(RET_IF_SET)

452 453 454 455
        fprintf(stderr, "ELPA Error: Could not find entry '%s'\n", name);
        return res;
}

456 457 458 459 460
int elpa_index_int_is_valid(elpa_index_t index, char *name, int new_value) {
        int n = find_int_entry(name); \
        if (n >= 0) { \
                if (int_entries[n].valid == NULL) {
                        return ELPA_OK;
461
                } else {
462
                        return int_entries[n].valid(index, n, new_value) ? ELPA_OK : ELPA_ERROR;
463 464
                }
        }
465
        return ELPA_ERROR_ENTRY_NOT_FOUND;
466 467
}

468
int elpa_int_value_to_string(char *name, int value, const char **string) {
469 470 471
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
472
        }
473
        if (int_entries[n].to_string == NULL) {
474
                return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
475 476 477
        }
        *string = int_entries[n].to_string(value);
        return ELPA_OK;
478 479
}

480 481

int elpa_int_value_to_strlen(char *name, int value) {
482
        const char *string = NULL;
483
        elpa_int_value_to_string(name, value, &string);
484
        if (string == NULL) {
485 486 487
                return 0;
        } else {
                return strlen(string);
488 489
        }
}
490

491 492 493 494 495 496

int elpa_index_int_value_to_strlen(elpa_index_t index, char *name) {
        int n = find_int_entry(name);
        if (n < 0) {
                return 0;
        }
497
        return elpa_int_value_to_strlen(name, index->int_options.values[n]);
498 499 500 501
}


int elpa_int_string_to_value(char *name, char *string, int *value) {
502 503 504 505 506 507 508 509
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }

        if (int_entries[n].to_string == NULL) {
                int val, ret;
                ret = sscanf(string, "%d", &val);
510
                if (ret == 1) {
511
                        *value = val;
512 513
                        return ELPA_OK;
                } else {
514
                        return ELPA_ERROR_ENTRY_INVALID_VALUE;
515 516 517
                }
        }

518 519
        for (int i = 0; i < int_entries[n].cardinality(NULL); i++) {
                int candidate = int_entries[n].enumerate(NULL, i);
520 521 522
                if (strcmp(string, int_entries[n].to_string(candidate)) == 0) {
                        *value = candidate;
                        return ELPA_OK;
523
                }
524
        }
525
        return ELPA_ERROR_ENTRY_INVALID_VALUE;
526 527
}

528
int elpa_double_string_to_value(char *name, char *string, double *value) {
529 530
        double val;
        int ret = sscanf(string, "%lf", &val);
531
        if (ret == 1) {
532 533
                *value = val;
                return ELPA_OK;
534
        } else {
535 536
                /* \todo: remove */
                fprintf(stderr, "ELPA: DEBUG: Could not parse double value '%s' for option '%s'\n", string, name);
537
                return ELPA_ERROR_ENTRY_INVALID_VALUE;
538 539 540
        }
}

541
int elpa_double_value_to_string(char *name, double value, const char **string) {
542
        return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
543
}
544

545
int elpa_option_cardinality(char *name) {
546 547 548 549
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].cardinality) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }
550
        return int_entries[n].cardinality(NULL);
551
}
552

553
int elpa_option_enumerate(char *name, int i) {
554 555 556
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].enumerate) {
                return 0;
557
        }
558
        return int_entries[n].enumerate(NULL, i);
559 560
}

561

562
/* Helper functions for simple int entries */
563
static int cardinality_bool(elpa_index_t index) {
564 565
        return 2;
}
566

567 568
static int valid_bool(elpa_index_t index, int n, int new_value) {
        return (0 <= new_value) && (new_value < 2);
569 570
}

571
static int enumerate_identity(elpa_index_t index, int i) {
572 573 574
        return i;
}

575 576 577 578 579 580 581 582 583 584
/* Helper functions for specific options */

#define NAME_CASE(name, value, ...) \
        case value: \
                return #name;

#define VALID_CASE(name, value) \
        case value: \
                return 1;

585
#define VALID_CASE_3(name, value, available, other_checks) \
586
        case value: \
587
                return available && (other_checks(value));
588 589 590 591 592 593

static const char* elpa_solver_name(int solver) {
        switch(solver) {
                ELPA_FOR_ALL_SOLVERS(NAME_CASE)
                default:
                        return "(Invalid solver)";
594 595 596
        }
}

597
static int number_of_solvers(elpa_index_t index) {
598
        return ELPA_NUMBER_OF_SOLVERS;
599 600
}

601
static int solver_enumerate(elpa_index_t index, int i) {
602
#define OPTION_RANK(name, value, ...) \
603
        +(value >= sizeof(array_of_size_value)/sizeof(int) ? 0 : 1)
604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620

#define EMPTY()
#define DEFER1(m) m EMPTY()
#define EVAL(...) __VA_ARGS__

#define ENUMERATE_CASE(name, value, ...) \
        { const int array_of_size_value[value]; \
        case 0 DEFER1(INNER_ITERATOR)()(OPTION_RANK): \
                return value; }

        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_SOLVERS
                EVAL(ELPA_FOR_ALL_SOLVERS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
621 622 623
}


624
static int solver_is_valid(elpa_index_t index, int n, int new_value) {
625 626 627 628 629
        switch(new_value) {
                ELPA_FOR_ALL_SOLVERS(VALID_CASE)
                default:
                        return 0;
        }
630 631
}

632
static int number_of_real_kernels(elpa_index_t index) {
633 634
        return ELPA_2STAGE_NUMBER_OF_REAL_KERNELS;
}
635

636
static int real_kernel_enumerate(elpa_index_t index,int i) {
637 638 639 640 641 642 643 644
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_REAL_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_REAL_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}
645

646
static const char *real_kernel_name(int kernel) {
647 648 649 650
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(NAME_CASE)
                default:
                        return "(Invalid real kernel)";
651
        }
652
}
653

654 655 656
#define REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_REAL_GPU ? gpu_is_active : 1

657
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value) {
658 659 660 661
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_REAL_DEFAULT;
        }
662
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
663
        switch(new_value) {
664
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(VALID_CASE_3, REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
665 666
                default:
                        return 0;
667
        }
668
}
669

670
static int number_of_complex_kernels(elpa_index_t index) {
671 672
        return ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS;
}
673

674

675
static int complex_kernel_enumerate(elpa_index_t index,int i) {
676 677 678 679 680 681 682 683 684
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}

685
static const char *complex_kernel_name(int kernel) {
686 687 688 689
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(NAME_CASE)
                default:
                        return "(Invalid complex kernel)";
690
        }
691
}
692

693 694 695
#define COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_COMPLEX_GPU ? gpu_is_active : 1

696
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value) {
697 698 699 700
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_COMPLEX_DEFAULT;
        }
701
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
702
        switch(new_value) {
703
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(VALID_CASE_3, COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
704 705 706 707
                default:
                        return 0;
        }
}
708

709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724
static const char* elpa_autotune_level_name(int level) {
        switch(level) {
                ELPA_FOR_ALL_AUTOTUNE_LEVELS(NAME_CASE)
                default:
                        return "(Invalid autotune level)";
        }
}

static const char* elpa_autotune_domain_name(int domain) {
        switch(domain) {
                ELPA_FOR_ALL_AUTOTUNE_DOMAINS(NAME_CASE)
                default:
                        return "(Invalid autotune domain)";
        }
}

725 726 727 728
static int na_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

729 730 731 732
static int nev_is_valid(elpa_index_t index, int n, int new_value) {
        if (!elpa_index_int_value_is_set(index, "na")) {
                return 0;
        }
733
        return 0 <= new_value && new_value <= elpa_index_get_int_value(index, "na", NULL);
734 735 736 737 738 739
}

static int is_positive(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

740 741 742 743 744 745 746 747 748
static int bw_is_valid(elpa_index_t index, int n, int new_value) {
        int na;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }

        na = elpa_index_get_int_value(index, "na", NULL);
        return (0 <= new_value) && (new_value < na);
}
749

750 751 752 753
static int gpu_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value == 0 || new_value == 1;
}

754
static int band_to_full_cardinality(elpa_index_t index) {
755
	return 10;
756
}
757
static int band_to_full_enumerate(elpa_index_t index, int i) {
758
	return i+1;
759 760
}

761
// TODO shouldnt it be only for ELPA2??
762
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value) {
763 764
	int max_block=10;
        return (1 <= new_value) && (new_value <= max_block);
765 766
}

767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860
static int stripewidth_real_cardinality(elpa_index_t index) {
	return 17;
}

static int stripewidth_complex_cardinality(elpa_index_t index) {
	return 17;
}

static int stripewidth_real_enumerate(elpa_index_t index, int i) {
	switch(i) {
	  case 0:
	    return 32;
	  case 1:
	    return 36;
	  case 2:
	    return 40;
	  case 3:
	    return 44;
	  case 4:
	    return 48;
	  case 5:
	    return 52;
	  case 6:
	    return 56;
	  case 7:
	    return 60;
	  case 8:
	    return 64;
	  case 9:
	    return 68;
	  case 10:
	    return 72;
	  case 11:
	    return 76;
	  case 12:
	    return 80;
	  case 13:
	    return 84;
	  case 14:
	    return 88;
	  case 15:
	    return 92;
	  case 16:
	    return 96;
	}
}

static int stripewidth_complex_enumerate(elpa_index_t index, int i) {
	switch(i) {
	  case 0:
	    return 48;
	  case 1:
	    return 56;
	  case 2:
	    return 64;
	  case 3:
	    return 72;
	  case 4:
	    return 80;
	  case 5:
	    return 88;
	  case 6:
	    return 96;
	  case 7:
	    return 104;
	  case 8:
	    return 112;
	  case 9:
	    return 120;
	  case 10:
	    return 128;
	  case 11:
	    return 136;
	  case 12:
	    return 144;
	  case 13:
	    return 152;
	  case 14:
	    return 160;
	  case 15:
	    return 168;
	  case 16:
	    return 176;
	}
}

static int stripewidth_real_is_valid(elpa_index_t index, int n, int new_value) {
	return (32 <= new_value) && (new_value <= 96);
}

static int stripewidth_complex_is_valid(elpa_index_t index, int n, int new_value) {
	return (48 <= new_value) && (new_value <= 176);
}

Pavel Kus's avatar
Pavel Kus committed
861
static int omp_threads_cardinality(elpa_index_t index) {
Andreas Marek's avatar
Andreas Marek committed
862 863 864 865 866 867 868 869 870 871 872 873 874 875
	int max_threads;
#ifdef WITH_OPENMP
	if (set_max_threads_glob == 0) {
		max_threads_glob = omp_get_max_threads();
		set_max_threads_glob = 1;
	}
#else
	max_threads_glob = 1;
	set_max_threads_glob = 1;
#endif
	max_threads = max_threads_glob;
	return max_threads;
}

Pavel Kus's avatar
Pavel Kus committed
876
static int omp_threads_enumerate(elpa_index_t index, int i) {
Andreas Marek's avatar
Andreas Marek committed
877 878 879 880 881 882
        return i + 1;
}

static int omp_threads_is_valid(elpa_index_t index, int n, int new_value) {
        int max_threads;
#ifdef WITH_OPENMP
883 884 885 886
	if (set_max_threads_glob == 0) {
		max_threads_glob = omp_get_max_threads();
		set_max_threads_glob = 1;
	}
Andreas Marek's avatar
Andreas Marek committed
887
#else
888 889
	max_threads_glob = 1;
	set_max_threads_glob = 1;
Andreas Marek's avatar
Andreas Marek committed
890
#endif
891
	max_threads = max_threads_glob;
Andreas Marek's avatar
Andreas Marek committed
892
        return (1 <= new_value) && (new_value <= max_threads);
Andreas Marek's avatar
Andreas Marek committed
893 894
}

895

896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927
static int valid_with_gpu(elpa_index_t index, int n, int new_value) {
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
        if (gpu_is_active == 1) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

static int valid_with_gpu_elpa1(elpa_index_t index, int n, int new_value) {
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
        if ((solver == ELPA_SOLVER_1STAGE) && (gpu_is_active == 1)) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

static int valid_with_gpu_elpa2(elpa_index_t index, int n, int new_value) {
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
        if ((solver == ELPA_SOLVER_2STAGE) && (gpu_is_active == 1)) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953
static int max_stored_rows_cardinality(elpa_index_t index) {
	return 8;
}

static int max_stored_rows_enumerate(elpa_index_t index, int i) {
	switch(i) {
	  case 0:
	    return 15;
	  case 1:
	    return 31;
	  case 2:
	    return 47;
	  case 3:
	    return 63;
	  case 4:
	    return 79;
	  case 5:
	    return 95;
	  case 6:
	    return 111;
	  case 7:
	    return 127;
	}
}

static int max_stored_rows_is_valid(elpa_index_t index, int n, int new_value) {
954 955 956 957 958 959
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_2STAGE) {
                return new_value == 15;
        } else {
                return (15 <= new_value) && (new_value <= 127);
        }
960 961 962
}


963 964 965
// TODO: this shoudl definitely be improved (too many options to test in autotuning)
static const int TILE_SIZE_STEP = 128;

966
static int min_tile_size_cardinality(elpa_index_t index) {
967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982
        int na;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);
        return na/TILE_SIZE_STEP;
}

static int min_tile_size_enumerate(elpa_index_t index, int i) {
        return (i+1) * TILE_SIZE_STEP;
}

static int min_tile_size_is_valid(elpa_index_t index, int n, int new_value) {
       return new_value % TILE_SIZE_STEP == 0;
983
}
984

985
static int intermediate_bandwidth_cardinality(elpa_index_t index) {
986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025
        int na, nblk;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);

        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

        return na/nblk;
}

static int intermediate_bandwidth_enumerate(elpa_index_t index, int i) {
        int nblk;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

        return (i+1) * nblk;
}

static int intermediate_bandwidth_is_valid(elpa_index_t index, int n, int new_value) {
        int na, nblk;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);

        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == nblk;
        } else {
                if((new_value <= 1 ) || (new_value > na ))
                  return 0;
                if(new_value % nblk != 0) {
                  fprintf(stderr, "intermediate bandwidth has to be multiple of nblk\n");
                  return 0;
                }
1036
        }
1037 1038
}

1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070
static int cannon_buffer_size_cardinality(elpa_index_t index) {
        return 2;
}

static int cannon_buffer_size_enumerate(elpa_index_t index, int i) {
        int np_rows;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "num_process_rows") != 1) {
                return 0;
        }
        np_rows = elpa_index_get_int_value(index, "num_process_rows", NULL);

        // TODO: 0 is both error code and legal value?
        if(i == 0)
          return 0;
        else
          return np_rows - 1;
}

static int cannon_buffer_size_is_valid(elpa_index_t index, int n, int new_value) {
        int np_rows;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "num_process_rows") != 1) {
                return 0;
        }
        np_rows = elpa_index_get_int_value(index, "num_process_rows", NULL);

        return ((new_value >= 0) && (new_value < np_rows));
}

1071
elpa_index_t elpa_index_instance() {
1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088
        elpa_index_t index = (elpa_index_t) calloc(1, sizeof(struct elpa_index_struct));

#define ALLOCATE(TYPE, PRINTF_SPEC, ...) \
        index->TYPE##_options.values = (TYPE*) calloc(nelements(TYPE##_entries), sizeof(TYPE)); \
        index->TYPE##_options.is_set = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        index->TYPE##_options.notified = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        for (int n = 0; n < nelements(TYPE##_entries); n++) { \
                TYPE default_value = TYPE##_entries[n].default_value; \
                if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                        getenv_##TYPE(index, TYPE##_entries[n].base.env_default, NOTIFY_ENV_DEFAULT, n, &default_value, "Default for option"); \
                } \
                index->TYPE##_options.values[n] = default_value; \
        }

        FOR_ALL_TYPES(ALLOCATE)

        return index;
1089
}
1090

1091 1092 1093 1094 1095 1096 1097
static int is_tunable_but_overriden(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
        return (int_entries[i].autotune_level != 0) &&
               (int_entries[i].autotune_level <= autotune_level) &&
               (int_entries[i].autotune_domain & autotune_domain) &&
               (index->int_options.is_set[i]);
}

1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109
static int is_tunable(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
        return (int_entries[i].autotune_level != 0) &&
               (int_entries[i].autotune_level <= autotune_level) &&
               (int_entries[i].autotune_domain & autotune_domain) &&
               (!index->int_options.is_set[i]);
}

int elpa_index_autotune_cardinality(elpa_index_t index, int autotune_level, int autotune_domain) {
        int N = 1;

        for (int i = 0; i < nelements(int_entries); i++) { \
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
1110
                        N *= int_entries[i].cardinality(index);
1111 1112 1113 1114 1115
                }
        }
        return N;
}

1116 1117
void elpa_index_print_int_parameter(elpa_index_t index, char* buff, int i)
{
Pavel Kus's avatar
Pavel Kus committed
1118
        int value = index->int_options.values[i];
1119 1120
        sprintf(buff, "%s = ", int_entries[i].base.name);
        if (int_entries[i].to_string) {
Pavel Kus's avatar
Pavel Kus committed
1121
                sprintf(buff, "%s%d -> %s\n", buff, value, int_entries[i].to_string(value));
1122
        } else {
Pavel Kus's avatar
Pavel Kus committed
1123
                sprintf(buff, "%s%d\n", buff, value);
1124 1125 1126
        }
}

1127 1128
int elpa_index_set_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int current) {
        int current_cpy = current;
1129
        char buff[100];
1130
        int debug = elpa_index_get_int_value(index, "debug", NULL);
1131 1132 1133
        int is_process_id_zero = elpa_index_get_int_value(index, "is_process_id_zero", NULL);

        //if(is_process_id_zero) fprintf(stderr, "***Trying a new autotuning index %d\n", current);
1134 1135
        for (int i = 0; i < nelements(int_entries); i++) {
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
1136
                        int value = int_entries[i].enumerate(index, current_cpy % int_entries[i].cardinality(index));
1137
                        //if(is_process_id_zero) fprintf(stderr, "  * val[%d] = %d -> %d\n", i, current_cpy % int_entries[i].cardinality(index), value);
1138 1139 1140 1141
                        /* Try to set option i to that value */
                        if (int_entries[i].valid(index, i, value)) {
                                index->int_options.values[i] = value;
                        } else {
1142
                                //if(is_process_id_zero) fprintf(stderr, "  *NOT VALID becaluse of i %d (%s) and value %d translated to %d\n", i, int_entries[i].base.name, current_cpy % int_entries[i].cardinality(index), value);
1143 1144
                                return 0;
                        }
1145
                        current_cpy /= int_entries[i].cardinality(index);
1146 1147
                }
        }
1148
        if (debug == 1 && is_process_id_zero) {
1149
                fprintf(stderr, "\n*** AUTOTUNING: setting a new combination of parameters, idx %d ***\n", current);
1150
                elpa_index_print_autotune_parameters(index, autotune_level, autotune_domain);
1151
                fprintf(stderr, "***\n\n");
1152 1153 1154 1155 1156
        }

        /* Could set all values */
        return 1;
}
1157

1158
int elpa_index_print_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain) {
1159
        char buff[100];
1160 1161 1162 1163 1164 1165 1166
        int is_process_id_zero = elpa_index_get_int_value(index, "is_process_id_zero", NULL);
        if (is_process_id_zero) {
                for (int i = 0; i < nelements(int_entries); i++) {
                        if (is_tunable(index, i, autotune_level, autotune_domain)) {
                                elpa_index_print_int_parameter(index, buff, i);
                                fprintf(stderr, "%s", buff);
                        }
1167 1168 1169 1170
                }
        }
        return 1;
}
Pavel Kus's avatar
Pavel Kus committed
1171

1172
int elpa_index_print_autotune_state(elpa_index_t index, int autotune_level, int autotune_domain, int min_loc,
1173
                                    double min_val, int current, int cardinality, char* file_name) {
1174 1175 1176
        char buff[100];
        elpa_index_t index_best;
        int min_loc_cpy = min_loc;
1177 1178
        FILE *f;

1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194
        // get index with the currently best parameters
        index_best = elpa_index_instance();

        if(min_loc_cpy > -1){
                for (int i = 0; i < nelements(int_entries); i++) {
                        if (is_tunable(index, i, autotune_level, autotune_domain)) {

                                int value = int_entries[i].enumerate(index, min_loc_cpy % int_entries[i].cardinality(index));
                                /* we are setting the value for output only, we do not need to check consistency */
                                index_best->int_options.values[i] = value;
                                min_loc_cpy /= int_entries[i].cardinality(index);
                        }
                }
        }
        int is_process_id_zero = elpa_index_get_int_value(index, "is_process_id_zero", NULL);
        if (is_process_id_zero) {
Pavel Kus's avatar
Pavel Kus committed
1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207
                int output_to_file = (strlen(file_name) > 0);
                if(output_to_file) {
                        f = fopen(file_name, "w");
                        if(f == NULL){
                                fprintf(stderr, "Cannot open file %s in elpa_index_print_autotune_state\n", file_name);
                                return 0;
                        }
                }
                else {
                        f = stdout;
                }

                if(!output_to_file)
1208
                        fprintf(f, "\n");
1209 1210
                fprintf(f, "*** AUTOTUNING STATE ***\n");
                fprintf(f, "** This is the state of the autotuning object\n");
1211 1212
                fprintf(f, "autotune_level = %d -> %s\n", autotune_level, elpa_autotune_level_name(autotune_level));
                fprintf(f, "autotune_domain = %d -> %s\n", autotune_domain, elpa_autotune_domain_name(autotune_domain));
1213 1214 1215 1216
                fprintf(f, "autotune_cardinality = %d\n", cardinality);
                fprintf(f, "current_idx = %d\n", current);
                fprintf(f, "best_idx = %d\n", min_loc);
                fprintf(f, "best_time = %g\n", min_val);
1217
                if(min_loc_cpy > -1) {
1218
                        fprintf(f, "** The following parameters are autotuned with so far the best values\n");
1219 1220 1221
                        for (int i = 0; i < nelements(int_entries); i++) {
                                if (is_tunable(index, i, autotune_level, autotune_domain)) {
                                        elpa_index_print_int_parameter(index_best, buff, i);
1222
                                        fprintf(f, "%s", buff);
1223 1224
                                }
                        }
1225
                        fprintf(f, "** The following parameters would be autotuned on the selected autotuning level, but were overridden by the set() method\n");
1226 1227
                        for (int i = 0; i < nelements(int_entries); i++) {
                                if (is_tunable_but_overriden(index, i, autotune_level, autotune_domain)) {
1228
                                        elpa_index_print_int_parameter(index, buff, i);
1229
                                        fprintf(f, "%s", buff);
1230 1231 1232
                                }
                        }
                }else{
1233
                        fprintf(f, "** No output after first step\n");
1234
                }
1235
                fprintf(f, "*** END OF AUTOTUNING STATE ***\n");
Pavel Kus's avatar
Pavel Kus committed
1236 1237 1238

                if(output_to_file)
                        fclose(f);
1239 1240
        }
        elpa_index_free(index_best);
1241

1242 1243 1244
        return 1;
}

1245 1246
const int LEN =1000;

1247
#define IMPLEMENT_LOAD_LINE(TYPE, PRINTF_SPEC, SCANF_SPEC, ...) \
1248 1249 1250 1251 1252 1253 1254 1255
        static int load_##TYPE##_line(FILE* f, const char* expected, TYPE* val) { \
                char line[LEN], s[LEN]; \
                int error = 0; \
                TYPE n; \
                if(fgets(line, LEN, f) == NULL){ \
                        fprintf(stderr, "Loading autotuning state error: line is not there\n"); \
                        error = 1; \
                } else{ \
1256
                        sscanf(line, "%s = " SCANF_SPEC "\n", s, &n); \
1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300
                        if(strcmp(s, expected) != 0){ \
                                fprintf(stderr, "Loading autotuning state error: expected %s, got %s\n", expected, s); \
                                error = 1;\
                        } else{ \
                                *val = n; \
                        } \
                } \
                if(error){ \
                        fprintf(stderr, "Autotuning state file corrupted\n"); \
                        return 0; \
                } \
                return 1; \
        }
FOR_ALL_TYPES(IMPLEMENT_LOAD_LINE)

int elpa_index_load_autotune_state(elpa_index_t index, int* autotune_level, int* autotune_domain, int* min_loc,
                                    double* min_val, int* current, int* cardinality, char* file_name) {
        char line[LEN];
        FILE *f;

        //TODO: should be broadcasted, instead of read on all ranks
        //if(is_process_id_zero){
                f = fopen(file_name, "r");

                if (f == NULL) {
                        fprintf(stderr, "Cannont open file %s\n", file_name);
                        return(0);
                }


                if(fgets(line, LEN, f) == NULL) return 0;
                if(fgets(line, LEN, f) == NULL) return 0;
                if(! load_int_line(f, "autotune_level", autotune_level)) return 0;
                if(! load_int_line(f, "autotune_domain", autotune_domain)) return 0;
                if(! load_int_line(f, "autotune_cardinality", cardinality)) return 0;
                if(! load_int_line(f, "current_idx", current)) return 0;
                if(! load_int_line(f, "best_idx", min_loc)) return 0;
                if(! load_double_line(f, "best_time", min_val)) return 0;
                fclose(f);
       // }

        return 1;
}

Pavel Kus's avatar
Pavel Kus committed
1301 1302 1303 1304
const char STRUCTURE_PARAMETERS[] = "* Parameters describing structure of the computation:\n";
const char EXPLICIT_PARAMETERS[] = "* Parameters explicitly set by the user:\n";
const char DEFAULT_PARAMETERS[] = "* Parameters with default or environment value:\n";

1305
int elpa_index_print_settings(elpa_index_t index, char *file_name) {
Pavel Kus's avatar
Pavel Kus committed
1306
        const int LEN =10000;
1307
        char out_structure[LEN], out_set[LEN], out_defaults[LEN], out_nowhere[LEN], buff[100];
Pavel Kus's avatar
Pavel Kus committed
1308
        char (*out)[LEN];
Pavel Kus's avatar
Pavel Kus committed
1309 1310
        FILE *f;

Pavel Kus's avatar
Pavel Kus committed
1311 1312 1313
        sprintf(out_structure, "%s", STRUCTURE_PARAMETERS);
        sprintf(out_set, "%s", EXPLICIT_PARAMETERS);
        sprintf(out_defaults, "%s", DEFAULT_PARAMETERS);
Pavel Kus's avatar
Pavel Kus committed
1314 1315 1316
        sprintf(out_nowhere, "Not to be printed:\n");
        int is_process_id_zero = elpa_index_get_int_value(index, "is_process_id_zero", NULL);
        if(is_process_id_zero){
1317
                for (int i = 0; i < nelements(int_entries); i++) {
Pavel Kus's avatar
Pavel Kus committed
1318 1319 1320 1321 1322 1323 1324 1325
                        if(int_entries[i].base.print_flag == PRINT_STRUCTURE) {
                                out = &out_structure;
                        } else if(int_entries[i].base.print_flag == PRINT_YES && index->int_options.is_set[i]) {
                                out = &out_set;
                        } else if(int_entries[i].base.print_flag == PRINT_YES && !index->int_options.is_set[i]) {
                                out = &out_defaults;
                        } else
                                out = &out_nowhere;
1326 1327
                        elpa_index_print_int_parameter(index, buff, i);
                        sprintf(*out, "%s%s", *out, buff);
Pavel Kus's avatar
Pavel Kus committed
1328
                }
Pavel Kus's avatar
Pavel Kus committed
1329 1330 1331 1332
                int output_to_file = (strlen(file_name) > 0);
                if(output_to_file) {
                        f = fopen(file_name, "w");
                        if(f == NULL){
1333
                                fprintf(stderr, "Cannot open file %s in elpa_index_print_settings\n", file_name);
Pavel Kus's avatar
Pavel Kus committed
1334 1335 1336 1337 1338 1339 1340 1341
                                return 0;
                        }
                }
                else {
                        f = stdout;
                }

                fprintf(f, "*** ELPA STATE ***\n");
1342
                fprintf(f, "%s%s%s", out_structure, out_set, out_defaults);
Pavel Kus's avatar
Pavel Kus committed
1343 1344 1345
                fprintf(f, "*** END OF ELPA STATE ***\n");
                if(output_to_file)
                        fclose(f);
Pavel Kus's avatar
Pavel Kus committed
1346
        }
Pavel Kus's avatar
Pavel Kus committed
1347

Pavel Kus's avatar
Pavel Kus committed
1348 1349
        return 1;
}
Pavel Kus's avatar
Pavel Kus committed
1350

1351
int elpa_index_load_settings(elpa_index_t index, char *file_name) {
Pavel Kus's avatar
Pavel Kus committed
1352 1353 1354 1355 1356 1357 1358
        const int LEN = 1000;
        char line[LEN], s[LEN];
        int n;
        FILE *f;
        int is_process_id_zero = elpa_index_get_int_value(index, "is_process_id_zero", NULL);
        int skip, explicit;

1359
        //TODO: should be broadcasted, instead of read on all ranks
Pavel Kus's avatar
Pavel Kus committed
1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381
        //if(is_process_id_zero){
                f = fopen(file_name, "r");

                if (f == NULL) {
                        fprintf(stderr, "Cannont open file %s\n", file_name);
                        return(0);
                }

                skip = 1;
                explicit = 0;

                while ((fgets(line, LEN, f)) != NULL) {
                        if(strcmp(line, EXPLICIT_PARAMETERS) == 0){
                                skip = 0;
                                explicit = 1;
                        }
                        if(strcmp(line, DEFAULT_PARAMETERS) == 0){
                                skip = 0;
                                explicit = 0;
                        }

                        if(line[0] != '\n' && line[0] != '*'){
1382
                                sscanf(line, "%s = %d\n", s, &n);
Pavel Kus's avatar
Pavel Kus committed
1383 1384 1385 1386 1387 1388 1389 1390 1391 1392
                                if(! skip){
                                        int error = elpa_index_set_from_load_int_value(index, s, n, explicit);
                                }
                        }
                }
                fclose(f);
       // }

        return 1;
}