elpa_index.c 47.8 KB
Newer Older
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
//    This file is part of ELPA.
//
//    The ELPA library was originally created by the ELPA consortium,
//    consisting of the following organizations:
//
//    - Max Planck Computing and Data Facility (MPCDF), formerly known as
//      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
//    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
//      Informatik,
//    - Technische Universität München, Lehrstuhl für Informatik mit
//      Schwerpunkt Wissenschaftliches Rechnen ,
//    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
//    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
//      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
//      and
//    - IBM Deutschland GmbH
//
//    This particular source code file contains additions, changes and
//    enhancements authored by Intel Corporation which is not part of
//    the ELPA consortium.
//
//    More information can be found here:
//    http://elpa.mpcdf.mpg.de/
//
//    ELPA is free software: you can redistribute it and/or modify
//    it under the terms of the version 3 of the license of the
//    GNU Lesser General Public License as published by the Free
//    Software Foundation.
//
//    ELPA is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU Lesser General Public License for more details.
//
//    You should have received a copy of the GNU Lesser General Public License
//    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
//
//    ELPA reflects a substantial effort on the part of the original
//    ELPA consortium, and we ask you to respect the spirit of the
//    license that we chose: i.e., please contribute any changes you
//    may have back to the original ELPA library distribution, and keep
//    any derivatives of ELPA under the same license that we chose for
//    the original distribution, the GNU Lesser General Public License.
//
//    Authors: L. Huedepohl and A. Marek, MPCDF
Pavel Kus's avatar
Pavel Kus committed
46
#include <assert.h>
47
#include <elpa/elpa.h>
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
48
#include "elpa_index.h"
49

50 51
#include <execinfo.h>

Andreas Marek's avatar
Andreas Marek committed
52 53 54 55 56 57 58 59 60
#include "config.h"

#ifdef WITH_OPENMP
#include <omp.h>
#endif

int max_threads_glob;
int set_max_threads_glob=0;

61 62
static int enumerate_identity(elpa_index_t index, int i);
static int cardinality_bool(elpa_index_t index);
63 64
static int valid_bool(elpa_index_t index, int n, int new_value);

65 66
static int number_of_solvers(elpa_index_t index);
static int solver_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
67
static int solver_is_valid(elpa_index_t index, int n, int new_value);
68 69
static const char* elpa_solver_name(int solver);

70 71
static int number_of_real_kernels(elpa_index_t index);
static int real_kernel_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
72 73
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *real_kernel_name(int kernel);
74

75 76
static int number_of_complex_kernels(elpa_index_t index);
static int complex_kernel_enumerate(elpa_index_t index, int i);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
77 78
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value);
static const char *complex_kernel_name(int kernel);
79

80 81
static int band_to_full_cardinality(elpa_index_t index);
static int band_to_full_enumerate(elpa_index_t index, int i);
82 83
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value);

Pavel Kus's avatar
Pavel Kus committed
84 85 86
static int omp_threads_cardinality(elpa_index_t index);
static int omp_threads_enumerate(elpa_index_t index, int i);
static int omp_threads_is_valid(elpa_index_t index, int n, int new_value);
Andreas Marek's avatar
Andreas Marek committed
87

88 89 90 91
static int max_stored_rows_cardinality(elpa_index_t index);
static int max_stored_rows_enumerate(elpa_index_t index, int i);
static int max_stored_rows_is_valid(elpa_index_t index, int n, int new_value);

92
static int min_tile_size_cardinality(elpa_index_t index);
93 94 95 96 97 98
static int min_tile_size_enumerate(elpa_index_t index, int i);
static int min_tile_size_is_valid(elpa_index_t index, int n, int new_value);

static int valid_with_gpu(elpa_index_t index, int n, int new_value);
static int valid_with_gpu_elpa1(elpa_index_t index, int n, int new_value);
static int valid_with_gpu_elpa2(elpa_index_t index, int n, int new_value);
99

100
static int intermediate_bandwidth_cardinality(elpa_index_t index);
101 102
static int intermediate_bandwidth_enumerate(elpa_index_t index, int i);
static int intermediate_bandwidth_is_valid(elpa_index_t index, int n, int new_value);
103

104 105 106 107
static int cannon_buffer_size_cardinality(elpa_index_t index);
static int cannon_buffer_size_enumerate(elpa_index_t index, int i);
static int cannon_buffer_size_is_valid(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
108
static int na_is_valid(elpa_index_t index, int n, int new_value);
109
static int nev_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
110
static int bw_is_valid(elpa_index_t index, int n, int new_value);
111
static int gpu_is_valid(elpa_index_t index, int n, int new_value);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
112

113 114
static int is_positive(elpa_index_t index, int n, int new_value);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
115 116
static int elpa_double_string_to_value(char *name, char *string, double *value);
static int elpa_double_value_to_string(char *name, double value, const char **string);
117

Pavel Kus's avatar
Pavel Kus committed
118
#define BASE_ENTRY(option_name, option_description, once_value, readonly_value, print_flag_value) \
119 120 121 122 123 124 125
                .base = { \
                        .name = option_name, \
                        .description = option_description, \
                        .once = once_value, \
                        .readonly = readonly_value, \
                        .env_default = "ELPA_DEFAULT_" option_name, \
                        .env_force = "ELPA_FORCE_" option_name, \
Pavel Kus's avatar
Pavel Kus committed
126
                        .print_flag = print_flag_value, \
127
                }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
128

Pavel Kus's avatar
Pavel Kus committed
129
#define INT_PARAMETER_ENTRY(option_name, option_description, valid_func, print_flag) \
130
        { \
Pavel Kus's avatar
Pavel Kus committed
131
                BASE_ENTRY(option_name, option_description, 1, 0, print_flag), \
132
                .valid = valid_func, \
133
        }
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
134

Pavel Kus's avatar
Pavel Kus committed
135
#define BOOL_ENTRY(option_name, option_description, default, tune_level, tune_domain, print_flag) \
136
        { \
Pavel Kus's avatar
Pavel Kus committed
137
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
138
                .default_value = default, \
139 140
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
141 142 143
                .cardinality = cardinality_bool, \
                .enumerate = enumerate_identity, \
                .valid = valid_bool, \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
144 145
        }

Pavel Kus's avatar
Pavel Kus committed
146
#define INT_ENTRY(option_name, option_description, default, tune_level, tune_domain, card_func, enumerate_func, valid_func, to_string_func, print_flag) \
147
        { \
Pavel Kus's avatar
Pavel Kus committed
148
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
149
                .default_value = default, \
150 151
                .autotune_level = tune_level, \
                .autotune_domain = tune_domain, \
152 153 154 155
                .cardinality = card_func, \
                .enumerate = enumerate_func, \
                .valid = valid_func, \
                .to_string = to_string_func, \
156 157
        }

Pavel Kus's avatar
Pavel Kus committed
158
#define INT_ANY_ENTRY(option_name, option_description, print_flag) \
159
        { \
Pavel Kus's avatar
Pavel Kus committed
160
                BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
161 162
        }

163 164
/* The order here is important! Tunable options that are dependent on other
 * tunable options must appear later in the list than their prerequisites */
165
static const elpa_index_int_entry_t int_entries[] = {
Pavel Kus's avatar
Pavel Kus committed
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
        INT_PARAMETER_ENTRY("na", "Global matrix has size (na * na)", na_is_valid, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("nev", "Number of eigenvectors to be computed, 0 <= nev <= na", nev_is_valid, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("nblk", "Block size of scalapack block-cyclic distribution", is_positive, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("local_nrows", "Number of matrix rows stored on this process", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("local_ncols", "Number of matrix columns stored on this process", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("process_row", "Process row number in the 2D domain decomposition", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("process_col", "Process column number in the 2D domain decomposition", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("process_id", "Process rank", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("is_process_id_zero", "Is it a process with rank zero?", NULL, PRINT_NO),
        INT_PARAMETER_ENTRY("num_process_rows", "Number of process row number in the 2D domain decomposition", NULL, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("num_process_cols", "Number of process column number in the 2D domain decomposition", NULL, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("num_processes", "Total number of processes", NULL, PRINT_STRUCTURE),
        INT_PARAMETER_ENTRY("bandwidth", "If specified, a band matrix with this bandwidth is expected as input; bandwidth must be multiply of nblk", bw_is_valid, PRINT_YES),
        INT_ANY_ENTRY("mpi_comm_rows", "Communicator for inter-row communication", PRINT_NO),
        INT_ANY_ENTRY("mpi_comm_cols", "Communicator for inter-column communication", PRINT_NO),
        INT_ANY_ENTRY("mpi_comm_parent", "Parent communicator", PRINT_NO),
        INT_ANY_ENTRY("blacs_context", "BLACS context", PRINT_NO),
183
        INT_ENTRY("solver", "Solver to use", ELPA_SOLVER_1STAGE, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
Pavel Kus's avatar
Pavel Kus committed
184
                        number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name, PRINT_YES),
185
        INT_ENTRY("gpu", "Use GPU acceleration", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
186
                        cardinality_bool, enumerate_identity, gpu_is_valid, NULL, PRINT_YES),
187 188
        //default of gpu ussage for individual phases is 1. However, it is only evaluated, if GPU is used at all, which first has to be determined
        //by the parameter gpu and presence of the device
189
        INT_ENTRY("gpu_tridiag", "Use GPU acceleration for ELPA1 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
190
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa1, NULL, PRINT_YES),
191
        INT_ENTRY("gpu_solve_tridi", "Use GPU acceleration for ELPA solve tridi", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
192
                        cardinality_bool, enumerate_identity, valid_with_gpu, NULL, PRINT_YES),
193
        INT_ENTRY("gpu_trans_ev", "Use GPU acceleration for ELPA1 trans ev", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
194
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa1, NULL, PRINT_YES),
195
        INT_ENTRY("gpu_bandred", "Use GPU acceleration for ELPA2 band reduction", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
196
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
197
        INT_ENTRY("gpu_tridiag_band", "Use GPU acceleration for ELPA2 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
198
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
199
        INT_ENTRY("gpu_trans_ev_tridi_to_band", "Use GPU acceleration for ELPA2 trans_ev_tridi_to_band", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
200
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
201
        INT_ENTRY("gpu_trans_ev_band_to_full", "Use GPU acceleration for ELPA2 trans_ev_band_to_full", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
202
                        cardinality_bool, enumerate_identity, valid_with_gpu_elpa2, NULL, PRINT_YES),
203
        INT_ENTRY("real_kernel", "Real kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_REAL_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, \
Pavel Kus's avatar
Pavel Kus committed
204
                        number_of_real_kernels, real_kernel_enumerate, real_kernel_is_valid, real_kernel_name, PRINT_YES),
205
        INT_ENTRY("complex_kernel", "Complex kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_COMPLEX_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_COMPLEX, \
Pavel Kus's avatar
Pavel Kus committed
206
                        number_of_complex_kernels, complex_kernel_enumerate, complex_kernel_is_valid, complex_kernel_name, PRINT_YES),
207

208
        INT_ENTRY("min_tile_size", "Minimal tile size used internally in elpa1_tridiag and elpa2_bandred", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
209
                        min_tile_size_cardinality, min_tile_size_enumerate, min_tile_size_is_valid, NULL, PRINT_YES),
210
        INT_ENTRY("intermediate_bandwidth", "Specifies the intermediate bandwidth in ELPA2 full->banded step. Must be a multiple of nblk", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
211
                        intermediate_bandwidth_cardinality, intermediate_bandwidth_enumerate, intermediate_bandwidth_is_valid, NULL, PRINT_YES),
212

213
        INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
214
                        band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL, PRINT_YES),
215
        INT_ENTRY("max_stored_rows", "Maximum number of stored rows used in ELPA 1 backtransformation, default 63", 63, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
216
                        max_stored_rows_cardinality, max_stored_rows_enumerate, max_stored_rows_is_valid, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
217
#ifdef WITH_OPENMP
218
        INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
219
                        omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
220
#else
221
        INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
222
                        omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL, PRINT_YES),
Andreas Marek's avatar
Andreas Marek committed
223
#endif
224
        INT_ENTRY("cannon_buffer_size", "Increasing the buffer size might make it faster, but costs memory", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
Pavel Kus's avatar
Pavel Kus committed
225
                        cannon_buffer_size_cardinality, cannon_buffer_size_enumerate, cannon_buffer_size_is_valid, NULL, PRINT_YES),
226
        //BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_REAL),
Pavel Kus's avatar
Pavel Kus committed
227 228 229 230 231 232
        BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_REAL, PRINT_YES),
        BOOL_ENTRY("timings", "Enable time measurement", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("debug", "Emit verbose debugging messages", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("print_flops", "Print FLOP rates on task 0", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("check_pd", "Check eigenvalues to be positive", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
        BOOL_ENTRY("cannon_for_generalized", "Whether to use Cannons algorithm for the generalized EVP", 1, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
233 234 235
};

#define READONLY_DOUBLE_ENTRY(option_name, option_description) \
236
        { \
237
                BASE_ENTRY(option_name, option_description, 0, 1, 0) \
238 239 240
        }

static const elpa_index_double_entry_t double_entries[] = {
241
        /* Empty for now */
242
};
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
243

244
void elpa_index_free(elpa_index_t index) {
245 246 247 248 249 250 251
#define FREE_OPTION(TYPE, ...) \
        free(index->TYPE##_options.values); \
        free(index->TYPE##_options.is_set); \
        free(index->TYPE##_options.notified);

        FOR_ALL_TYPES(FREE_OPTION);

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
252 253 254 255 256
        free(index);
}

static int compar(const void *key, const void *member) {
        const char *name = (const char *) key;
257
        elpa_index_int_entry_t *entry = (elpa_index_int_entry_t *) member;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
258

259
        int l1 = strlen(entry->base.name);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
260 261 262 263
        int l2 = strlen(name);
        if (l1 != l2) {
                return 1;
        }
264
        if (strncmp(name, entry->base.name, l1) == 0) {
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
265 266 267 268 269 270
                return 0;
        } else {
                return 1;
        }
}

271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
#define IMPLEMENT_FIND_ENTRY(TYPE, ...) \
        static int find_##TYPE##_entry(char *name) { \
                elpa_index_##TYPE##_entry_t *entry; \
                size_t nmembers = nelements(TYPE##_entries); \
                entry = lfind((const void*) name, (const void *) TYPE##_entries, &nmembers, sizeof(elpa_index_##TYPE##_entry_t), compar); \
                if (entry) { \
                        return (entry - &TYPE##_entries[0]); \
                } else { \
                        return -1; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_FIND_ENTRY)


#define IMPLEMENT_GETENV(TYPE, PRINTF_SPEC, ...) \
        static int getenv_##TYPE(elpa_index_t index, const char *env_variable, enum NOTIFY_FLAGS notify_flag, int n, TYPE *value, const char *error_string) { \
                int err; \
288
                int is_process_id_zero = elpa_index_get_int_value(index, "is_process_id_zero", NULL); \
289 290
                char *env_value = getenv(env_variable); \
                if (env_value) { \
291
                        err = elpa_##TYPE##_string_to_value(TYPE##_entries[n].base.name, env_value, value); \
292 293 294 295 296
                        if (err != ELPA_OK) { \
                                fprintf(stderr, "ELPA: Error interpreting environment variable %s with value '%s': %s\n", \
                                                TYPE##_entries[n].base.name, env_value, elpa_strerr(err)); \
                        } else {\
                                const char *value_string = NULL; \
297
                                if (elpa_##TYPE##_value_to_string(TYPE##_entries[n].base.name, *value, &value_string) == ELPA_OK) { \
298
                                        if (!(index->TYPE##_options.notified[n] & notify_flag)) { \
299
                                                if (is_process_id_zero == 1) { \
300 301 302
                                                        fprintf(stderr, "ELPA: %s '%s' is set to %s due to environment variable %s\n", \
                                                                      error_string, TYPE##_entries[n].base.name, value_string, env_variable); \
                                                } \
303 304 305
                                                index->TYPE##_options.notified[n] |= notify_flag; \
                                        } \
                                } else { \
306
                                        if (is_process_id_zero == 1) { \
307
                                                fprintf(stderr, "ELPA: %s '%s' is set to '" PRINTF_SPEC "' due to environment variable %s\n", \
308
                                                        error_string, TYPE##_entries[n].base.name, *value, env_variable);\
309
                                        } \
310 311 312 313 314 315 316 317 318 319 320 321
                                } \
                                return 1; \
                        } \
                } \
                return 0; \
        }
FOR_ALL_TYPES(IMPLEMENT_GETENV)


#define IMPLEMENT_GET_FUNCTION(TYPE, PRINTF_SPEC, ERROR_VALUE) \
        TYPE elpa_index_get_##TYPE##_value(elpa_index_t index, char *name, int *error) { \
                TYPE ret; \
322 323 324
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        int from_env = 0; \
                        if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                                from_env = getenv_##TYPE(index, TYPE##_entries[n].base.env_force, NOTIFY_ENV_FORCE, n, &ret, "Option"); \
                        } \
                        if (!from_env) { \
                                ret = index->TYPE##_options.values[n]; \
                        } \
                        if (error != NULL) { \
                                *error = ELPA_OK; \
                        } \
                        return ret; \
                } else { \
                        if (error != NULL) { \
                                *error = ELPA_ERROR_ENTRY_NOT_FOUND; \
                        } \
                        return ERROR_VALUE; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_GET_FUNCTION)


#define IMPLEMENT_LOC_FUNCTION(TYPE, ...) \
        TYPE* elpa_index_get_##TYPE##_loc(elpa_index_t index, char *name) { \
350 351 352
                if (sizeof(TYPE##_entries) == 0) { \
                        return NULL; \
                } \
353 354 355 356 357 358 359 360 361 362
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        return &index->TYPE##_options.values[n]; \
                } else { \
                        return NULL; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_LOC_FUNCTION)


363
#define IMPLEMENT_SET_FUNCTION(TYPE, PRINTF_SPEC, ...) \
364
        int elpa_index_set_##TYPE##_value(elpa_index_t index, char *name, TYPE value) { \
365 366 367
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
368 369 370 371 372 373
                int n = find_##TYPE##_entry(name); \
                if (n < 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                }; \
                if (TYPE##_entries[n].valid != NULL) { \
                        if(!TYPE##_entries[n].valid(index, n, value)) { \
374
                                return ELPA_ERROR_ENTRY_INVALID_VALUE; \
375 376 377
                        }; \
                } \
                if (TYPE##_entries[n].base.once & index->TYPE##_options.is_set[n]) { \
378 379
                        return ELPA_ERROR_ENTRY_ALREADY_SET; \
                } \
380
                if (TYPE##_entries[n].base.readonly) { \
381
                        return ELPA_ERROR_ENTRY_READONLY; \
382 383 384 385 386 387 388 389 390 391
                } \
                index->TYPE##_options.values[n] = value; \
                index->TYPE##_options.is_set[n] = 1; \
                return ELPA_OK; \
        }
FOR_ALL_TYPES(IMPLEMENT_SET_FUNCTION)


#define IMPLEMENT_IS_SET_FUNCTION(TYPE, ...) \
        int elpa_index_##TYPE##_value_is_set(elpa_index_t index, char *name) { \
392 393 394
                if (sizeof(TYPE##_entries) == 0) { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415
                int n = find_##TYPE##_entry(name); \
                if (n >= 0) { \
                        if (index->TYPE##_options.is_set[n]) { \
                                return 1; \
                        } else { \
                                return 0; \
                        } \
                } else { \
                        return ELPA_ERROR_ENTRY_NOT_FOUND; \
                } \
        }
FOR_ALL_TYPES(IMPLEMENT_IS_SET_FUNCTION)


int elpa_index_value_is_set(elpa_index_t index, char *name) {
        int res = ELPA_ERROR;

#define RET_IF_SET(TYPE, ...) \
        res = elpa_index_##TYPE##_value_is_set(index, name); \
        if (res >= 0) { \
                return res; \
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
416
        }
417 418 419

        FOR_ALL_TYPES(RET_IF_SET)

420 421 422 423
        fprintf(stderr, "ELPA Error: Could not find entry '%s'\n", name);
        return res;
}

424 425 426 427 428
int elpa_index_int_is_valid(elpa_index_t index, char *name, int new_value) {
        int n = find_int_entry(name); \
        if (n >= 0) { \
                if (int_entries[n].valid == NULL) {
                        return ELPA_OK;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
429
                } else {
430
                        return int_entries[n].valid(index, n, new_value) ? ELPA_OK : ELPA_ERROR;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
431 432
                }
        }
433
        return ELPA_ERROR_ENTRY_NOT_FOUND;
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
434 435
}

436
int elpa_int_value_to_string(char *name, int value, const char **string) {
437 438 439
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
440
        }
441
        if (int_entries[n].to_string == NULL) {
442
                return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
443 444 445
        }
        *string = int_entries[n].to_string(value);
        return ELPA_OK;
446 447
}

448 449

int elpa_int_value_to_strlen(char *name, int value) {
450
        const char *string = NULL;
451
        elpa_int_value_to_string(name, value, &string);
452
        if (string == NULL) {
453 454 455
                return 0;
        } else {
                return strlen(string);
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
456 457
        }
}
458

459 460 461 462 463 464

int elpa_index_int_value_to_strlen(elpa_index_t index, char *name) {
        int n = find_int_entry(name);
        if (n < 0) {
                return 0;
        }
465
        return elpa_int_value_to_strlen(name, index->int_options.values[n]);
466 467 468 469
}


int elpa_int_string_to_value(char *name, char *string, int *value) {
470 471 472 473 474 475 476 477
        int n = find_int_entry(name);
        if (n < 0) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }

        if (int_entries[n].to_string == NULL) {
                int val, ret;
                ret = sscanf(string, "%d", &val);
478
                if (ret == 1) {
479
                        *value = val;
480 481
                        return ELPA_OK;
                } else {
482
                        return ELPA_ERROR_ENTRY_INVALID_VALUE;
483 484 485
                }
        }

486 487
        for (int i = 0; i < int_entries[n].cardinality(NULL); i++) {
                int candidate = int_entries[n].enumerate(NULL, i);
488 489 490
                if (strcmp(string, int_entries[n].to_string(candidate)) == 0) {
                        *value = candidate;
                        return ELPA_OK;
491
                }
492
        }
493
        return ELPA_ERROR_ENTRY_INVALID_VALUE;
494 495
}

496
int elpa_double_string_to_value(char *name, char *string, double *value) {
497 498
        double val;
        int ret = sscanf(string, "%lf", &val);
499
        if (ret == 1) {
500 501
                *value = val;
                return ELPA_OK;
502
        } else {
503 504
                /* \todo: remove */
                fprintf(stderr, "ELPA: DEBUG: Could not parse double value '%s' for option '%s'\n", string, name);
505
                return ELPA_ERROR_ENTRY_INVALID_VALUE;
506 507 508
        }
}

509
int elpa_double_value_to_string(char *name, double value, const char **string) {
510
        return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
511
}
512

513
int elpa_option_cardinality(char *name) {
514 515 516 517
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].cardinality) {
                return ELPA_ERROR_ENTRY_NOT_FOUND;
        }
518
        return int_entries[n].cardinality(NULL);
519
}
520

521
int elpa_option_enumerate(char *name, int i) {
522 523 524
        int n = find_int_entry(name);
        if (n < 0 || !int_entries[n].enumerate) {
                return 0;
525
        }
526
        return int_entries[n].enumerate(NULL, i);
527 528
}

529

530
/* Helper functions for simple int entries */
531
static int cardinality_bool(elpa_index_t index) {
532 533
        return 2;
}
534

535 536
static int valid_bool(elpa_index_t index, int n, int new_value) {
        return (0 <= new_value) && (new_value < 2);
537 538
}

539
static int enumerate_identity(elpa_index_t index, int i) {
540 541 542
        return i;
}

543 544 545 546 547 548 549 550 551 552
/* Helper functions for specific options */

#define NAME_CASE(name, value, ...) \
        case value: \
                return #name;

#define VALID_CASE(name, value) \
        case value: \
                return 1;

553
#define VALID_CASE_3(name, value, available, other_checks) \
554
        case value: \
555
                return available && (other_checks(value));
556 557 558 559 560 561

static const char* elpa_solver_name(int solver) {
        switch(solver) {
                ELPA_FOR_ALL_SOLVERS(NAME_CASE)
                default:
                        return "(Invalid solver)";
562 563 564
        }
}

565
static int number_of_solvers(elpa_index_t index) {
566
        return ELPA_NUMBER_OF_SOLVERS;
567 568
}

569
static int solver_enumerate(elpa_index_t index, int i) {
570
#define OPTION_RANK(name, value, ...) \
571
        +(value >= sizeof(array_of_size_value)/sizeof(int) ? 0 : 1)
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588

#define EMPTY()
#define DEFER1(m) m EMPTY()
#define EVAL(...) __VA_ARGS__

#define ENUMERATE_CASE(name, value, ...) \
        { const int array_of_size_value[value]; \
        case 0 DEFER1(INNER_ITERATOR)()(OPTION_RANK): \
                return value; }

        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_SOLVERS
                EVAL(ELPA_FOR_ALL_SOLVERS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
589 590 591
}


Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
592
static int solver_is_valid(elpa_index_t index, int n, int new_value) {
593 594 595 596 597
        switch(new_value) {
                ELPA_FOR_ALL_SOLVERS(VALID_CASE)
                default:
                        return 0;
        }
598 599
}

600
static int number_of_real_kernels(elpa_index_t index) {
601 602
        return ELPA_2STAGE_NUMBER_OF_REAL_KERNELS;
}
603

604
static int real_kernel_enumerate(elpa_index_t index,int i) {
605 606 607 608 609 610 611 612
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_REAL_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_REAL_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}
613

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
614
static const char *real_kernel_name(int kernel) {
615 616 617 618
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(NAME_CASE)
                default:
                        return "(Invalid real kernel)";
619
        }
620
}
621

622 623 624
#define REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_REAL_GPU ? gpu_is_active : 1

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
625
static int real_kernel_is_valid(elpa_index_t index, int n, int new_value) {
626 627 628 629
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_REAL_DEFAULT;
        }
630
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
631
        switch(new_value) {
632
                ELPA_FOR_ALL_2STAGE_REAL_KERNELS(VALID_CASE_3, REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
633 634
                default:
                        return 0;
635
        }
636
}
637

638
static int number_of_complex_kernels(elpa_index_t index) {
639 640
        return ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS;
}
641

642

643
static int complex_kernel_enumerate(elpa_index_t index,int i) {
644 645 646 647 648 649 650 651 652
        switch(i) {
#define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS
                EVAL(ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(ENUMERATE_CASE))
#undef INNER_ITERATOR
                default:
                        return 0;
        }
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
653
static const char *complex_kernel_name(int kernel) {
654 655 656 657
        switch(kernel) {
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(NAME_CASE)
                default:
                        return "(Invalid complex kernel)";
658
        }
659
}
660

661 662 663
#define COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
        kernel_number == ELPA_2STAGE_COMPLEX_GPU ? gpu_is_active : 1

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
664
static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value) {
665 666 667 668
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == ELPA_2STAGE_COMPLEX_DEFAULT;
        }
669
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
670
        switch(new_value) {
671
                ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(VALID_CASE_3, COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
672 673 674 675
                default:
                        return 0;
        }
}
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
676 677 678 679 680

static int na_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

681 682 683 684
static int nev_is_valid(elpa_index_t index, int n, int new_value) {
        if (!elpa_index_int_value_is_set(index, "na")) {
                return 0;
        }
685
        return 0 <= new_value && new_value <= elpa_index_get_int_value(index, "na", NULL);
686 687 688 689 690 691
}

static int is_positive(elpa_index_t index, int n, int new_value) {
        return new_value > 0;
}

Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
692 693 694 695 696 697 698 699 700
static int bw_is_valid(elpa_index_t index, int n, int new_value) {
        int na;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }

        na = elpa_index_get_int_value(index, "na", NULL);
        return (0 <= new_value) && (new_value < na);
}
701

702 703 704 705
static int gpu_is_valid(elpa_index_t index, int n, int new_value) {
        return new_value == 0 || new_value == 1;
}

706
static int band_to_full_cardinality(elpa_index_t index) {
707
	return 10;
708
}
709
static int band_to_full_enumerate(elpa_index_t index, int i) {
710
	return i+1;
711 712
}

Pavel Kus's avatar
Pavel Kus committed
713
// TODO shouldnt it be only for ELPA2??
714
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value) {
715 716
	int max_block=10;
        return (1 <= new_value) && (new_value <= max_block);
717 718
}

Pavel Kus's avatar
Pavel Kus committed
719
static int omp_threads_cardinality(elpa_index_t index) {
Andreas Marek's avatar
Andreas Marek committed
720 721 722 723 724 725 726 727 728 729 730 731 732 733
	int max_threads;
#ifdef WITH_OPENMP
	if (set_max_threads_glob == 0) {
		max_threads_glob = omp_get_max_threads();
		set_max_threads_glob = 1;
	}
#else
	max_threads_glob = 1;
	set_max_threads_glob = 1;
#endif
	max_threads = max_threads_glob;
	return max_threads;
}

Pavel Kus's avatar
Pavel Kus committed
734
static int omp_threads_enumerate(elpa_index_t index, int i) {
Andreas Marek's avatar
Andreas Marek committed
735 736 737 738 739 740
        return i + 1;
}

static int omp_threads_is_valid(elpa_index_t index, int n, int new_value) {
        int max_threads;
#ifdef WITH_OPENMP
741 742 743 744
	if (set_max_threads_glob == 0) {
		max_threads_glob = omp_get_max_threads();
		set_max_threads_glob = 1;
	}
Andreas Marek's avatar
Andreas Marek committed
745
#else
746 747
	max_threads_glob = 1;
	set_max_threads_glob = 1;
Andreas Marek's avatar
Andreas Marek committed
748
#endif
749
	max_threads = max_threads_glob;
Andreas Marek's avatar
Andreas Marek committed
750
        return (1 <= new_value) && (new_value <= max_threads);
Andreas Marek's avatar
Andreas Marek committed
751 752
}

753

754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785
static int valid_with_gpu(elpa_index_t index, int n, int new_value) {
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
        if (gpu_is_active == 1) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

static int valid_with_gpu_elpa1(elpa_index_t index, int n, int new_value) {
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
        if ((solver == ELPA_SOLVER_1STAGE) && (gpu_is_active == 1)) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

static int valid_with_gpu_elpa2(elpa_index_t index, int n, int new_value) {
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
        if ((solver == ELPA_SOLVER_2STAGE) && (gpu_is_active == 1)) {
                return ((new_value == 0 ) || (new_value == 1));
        }
        else {
                return new_value == 0;
        }
}

786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811
static int max_stored_rows_cardinality(elpa_index_t index) {
	return 8;
}

static int max_stored_rows_enumerate(elpa_index_t index, int i) {
	switch(i) {
	  case 0:
	    return 15;
	  case 1:
	    return 31;
	  case 2:
	    return 47;
	  case 3:
	    return 63;
	  case 4:
	    return 79;
	  case 5:
	    return 95;
	  case 6:
	    return 111;
	  case 7:
	    return 127;
	}
}

static int max_stored_rows_is_valid(elpa_index_t index, int n, int new_value) {
812 813 814 815 816 817
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_2STAGE) {
                return new_value == 15;
        } else {
                return (15 <= new_value) && (new_value <= 127);
        }
818 819 820
}


821 822 823
// TODO: this shoudl definitely be improved (too many options to test in autotuning)
static const int TILE_SIZE_STEP = 128;

824
static int min_tile_size_cardinality(elpa_index_t index) {
825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840
        int na;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);
        return na/TILE_SIZE_STEP;
}

static int min_tile_size_enumerate(elpa_index_t index, int i) {
        return (i+1) * TILE_SIZE_STEP;
}

static int min_tile_size_is_valid(elpa_index_t index, int n, int new_value) {
       return new_value % TILE_SIZE_STEP == 0;
841
}
842

843
static int intermediate_bandwidth_cardinality(elpa_index_t index) {
844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883
        int na, nblk;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);

        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

        return na/nblk;
}

static int intermediate_bandwidth_enumerate(elpa_index_t index, int i) {
        int nblk;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

        return (i+1) * nblk;
}

static int intermediate_bandwidth_is_valid(elpa_index_t index, int n, int new_value) {
        int na, nblk;
        if (elpa_index_int_value_is_set(index, "na") != 1) {
                return 0;
        }
        na = elpa_index_get_int_value(index, "na", NULL);

        if (elpa_index_int_value_is_set(index, "nblk") != 1) {
                return 0;
        }
        nblk = elpa_index_get_int_value(index, "nblk", NULL);

884 885 886 887 888 889 890 891 892 893
        int solver = elpa_index_get_int_value(index, "solver", NULL);
        if (solver == ELPA_SOLVER_1STAGE) {
                return new_value == nblk;
        } else {
                if((new_value <= 1 ) || (new_value > na ))
                  return 0;
                if(new_value % nblk != 0) {
                  fprintf(stderr, "intermediate bandwidth has to be multiple of nblk\n");
                  return 0;
                }
894
        }
895 896
}

897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928
static int cannon_buffer_size_cardinality(elpa_index_t index) {
        return 2;
}

static int cannon_buffer_size_enumerate(elpa_index_t index, int i) {
        int np_rows;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "num_process_rows") != 1) {
                return 0;
        }
        np_rows = elpa_index_get_int_value(index, "num_process_rows", NULL);

        // TODO: 0 is both error code and legal value?
        if(i == 0)
          return 0;
        else
          return np_rows - 1;
}

static int cannon_buffer_size_is_valid(elpa_index_t index, int n, int new_value) {
        int np_rows;
        if(index == NULL)
                return 0;
        if (elpa_index_int_value_is_set(index, "num_process_rows") != 1) {
                return 0;
        }
        np_rows = elpa_index_get_int_value(index, "num_process_rows", NULL);

        return ((new_value >= 0) && (new_value < np_rows));
}

929
elpa_index_t elpa_index_instance() {
930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946
        elpa_index_t index = (elpa_index_t) calloc(1, sizeof(struct elpa_index_struct));

#define ALLOCATE(TYPE, PRINTF_SPEC, ...) \
        index->TYPE##_options.values = (TYPE*) calloc(nelements(TYPE##_entries), sizeof(TYPE)); \
        index->TYPE##_options.is_set = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        index->TYPE##_options.notified = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
        for (int n = 0; n < nelements(TYPE##_entries); n++) { \
                TYPE default_value = TYPE##_entries[n].default_value; \
                if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
                        getenv_##TYPE(index, TYPE##_entries[n].base.env_default, NOTIFY_ENV_DEFAULT, n, &default_value, "Default for option"); \
                } \
                index->TYPE##_options.values[n] = default_value; \
        }

        FOR_ALL_TYPES(ALLOCATE)

        return index;
947
}
948

Pavel Kus's avatar
Pavel Kus committed
949 950 951 952 953 954 955
static int is_tunable_but_overriden(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
        return (int_entries[i].autotune_level != 0) &&
               (int_entries[i].autotune_level <= autotune_level) &&
               (int_entries[i].autotune_domain & autotune_domain) &&
               (index->int_options.is_set[i]);
}

956 957 958 959 960 961 962 963 964 965 966 967
static int is_tunable(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
        return (int_entries[i].autotune_level != 0) &&
               (int_entries[i].autotune_level <= autotune_level) &&
               (int_entries[i].autotune_domain & autotune_domain) &&
               (!index->int_options.is_set[i]);
}

int elpa_index_autotune_cardinality(elpa_index_t index, int autotune_level, int autotune_domain) {
        int N = 1;

        for (int i = 0; i < nelements(int_entries); i++) { \
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
968
                        N *= int_entries[i].cardinality(index);
969 970 971 972 973
                }
        }
        return N;
}

974 975 976 977 978 979 980 981 982 983
void elpa_index_print_int_parameter(elpa_index_t index, char* buff, int i)
{
        sprintf(buff, "%s = ", int_entries[i].base.name);
        if (int_entries[i].to_string) {
                sprintf(buff, "%s%s\n", buff, int_entries[i].to_string(index->int_options.values[i]));
        } else {
                sprintf(buff, "%s%d\n", buff, index->int_options.values[i]);
        }
}

Pavel Kus's avatar
Pavel Kus committed
984 985
int elpa_index_set_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int current) {
        int current_cpy = current;
986
        char buff[100];
987
        int debug = elpa_index_get_int_value(index, "debug", NULL);
Pavel Kus's avatar
Pavel Kus committed
988 989 990
        int is_process_id_zero = elpa_index_get_int_value(index, "is_process_id_zero", NULL);

        //if(is_process_id_zero) fprintf(stderr, "***Trying a new autotuning index %d\n", current);
991 992
        for (int i = 0; i < nelements(int_entries); i++) {
                if (is_tunable(index, i, autotune_level, autotune_domain)) {
Pavel Kus's avatar
Pavel Kus committed
993
                        int value = int_entries[i].enumerate(index, current_cpy % int_entries[i].cardinality(index));
Pavel Kus's avatar
Pavel Kus committed
994
                        //if(is_process_id_zero) fprintf(stderr, "  * val[%d] = %d -> %d\n", i, current_cpy % int_entries[i].cardinality(index), value);
995 996 997 998
                        /* Try to set option i to that value */
                        if (int_entries[i].valid(index, i, value)) {
                                index->int_options.values[i] = value;
                        } else {
Pavel Kus's avatar
Pavel Kus committed
999
                                //if(is_process_id_zero) fprintf(stderr, "  *NOT VALID becaluse of i %d (%s) and value %d translated to %d\n", i, int_entries[i].base.name, current_cpy % int_entries[i].cardinality(index), value);
1000 1001
                                return 0;
                        }
Pavel Kus's avatar
Pavel Kus committed
1002
                        current_cpy /= int_entries[i].cardinality(index);
1003 1004
                }
        }
1005
        if (debug == 1 && is_process_id_zero) {
Pavel Kus's avatar
Pavel Kus committed
1006
                fprintf(stderr, "\n*** AUTOTUNING: setting a new combination of parameters, idx %d ***\n", current);
1007
                elpa_index_print_autotune_parameters(index, autotune_level, autotune_domain);
1008
                fprintf(stderr, "***\n\n");
1009 1010 1011 1012 1013
        }

        /* Could set all values */
        return 1;
}
1014

1015
int elpa_index_print_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain) {
1016
        char buff[100];
1017 1018 1019 1020 1021 1022 1023
        int is_process_id_zero = elpa_index_get_int_value(index, "is_process_id_zero", NULL);
        if (is_process_id_zero) {
                for (int i = 0; i < nelements(int_entries); i++) {
                        if (is_tunable(index, i, autotune_level, autotune_domain)) {
                                elpa_index_print_int_parameter(index, buff, i);
                                fprintf(stderr, "%s", buff);
                        }
1024 1025 1026 1027
                }
        }
        return 1;
}
Pavel Kus's avatar
Pavel Kus committed
1028

Pavel Kus's avatar
Pavel Kus committed
1029
int elpa_index_print_autotune_state(elpa_index_t index, int autotune_level, int autotune_domain, int min_loc,
1030
                                    double min_val, int current, int cardinality, char* file_name) {
Pavel Kus's avatar
Pavel Kus committed
1031 1032 1033
        char buff[100];
        elpa_index_t index_best;
        int min_loc_cpy = min_loc;
1034 1035
        FILE *f;

Pavel Kus's avatar
Pavel Kus committed
1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051
        // get index with the currently best parameters
        index_best = elpa_index_instance();

        if(min_loc_cpy > -1){
                for (int i = 0; i < nelements(int_entries); i++) {
                        if (is_tunable(index, i, autotune_level, autotune_domain)) {

                                int value = int_entries[i].enumerate(index, min_loc_cpy % int_entries[i].cardinality(index));
                                /* we are setting the value for output only, we do not need to check consistency */
                                index_best->int_options.values[i] = value;
                                min_loc_cpy /= int_entries[i].cardinality(index);
                        }
                }
        }
        int is_process_id_zero = elpa_index_get_int_value(index, "is_process_id_zero", NULL);
        if (is_process_id_zero) {
Pavel Kus's avatar
Pavel Kus committed
1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064
                int output_to_file = (strlen(file_name) > 0);
                if(output_to_file) {
                        f = fopen(file_name, "w");
                        if(f == NULL){
                                fprintf(stderr, "Cannot open file %s in elpa_index_print_autotune_state\n", file_name);
                                return 0;
                        }
                }
                else {
                        f = stdout;
                }

                if(!output_to_file)
Pavel Kus's avatar
Pavel Kus committed
1065
                        fprintf(f, "\n");
1066 1067 1068 1069 1070 1071 1072 1073
                fprintf(f, "*** AUTOTUNING STATE ***\n");
                fprintf(f, "** This is the state of the autotuning object\n");
                fprintf(f, "autotune level = %d\n", autotune_level);
                fprintf(f, "autotune domain = %d\n", autotune_domain);
                fprintf(f, "autotune cardinality = %d\n", cardinality);
                fprintf(f, "current idx = %d\n", current);
                fprintf(f, "best idx = %d\n", min_loc);
                fprintf(f, "best time = %lf\n", min_val);
Pavel Kus's avatar
Pavel Kus committed
1074
                if(min_loc_cpy > -1) {
1075
                        fprintf(f, "** The following parameters are autotuned with so far the best values\n");
Pavel Kus's avatar
Pavel Kus committed
1076 1077 1078
                        for (int i = 0; i < nelements(int_entries); i++) {
                                if (is_tunable(index, i, autotune_level, autotune_domain)) {
                                        elpa_index_print_int_parameter(index_best, buff, i);
1079
                                        fprintf(f, "%s", buff);
Pavel Kus's avatar
Pavel Kus committed
1080 1081
                                }
                        }
1082
                        fprintf(f, "** The following parameters would be autotuned on the selected autotuning level, but were overridden by the set() method\n");
Pavel Kus's avatar
Pavel Kus committed
1083 1084
                        for (int i = 0; i < nelements(int_entries); i++) {
                                if (is_tunable_but_overriden(index, i, autotune_level, autotune_domain)) {
Pavel Kus's avatar
Pavel Kus committed
1085
                                        elpa_index_print_int_parameter(index, buff, i);