Commit 60a5cfff authored by Andreas Marek's avatar Andreas Marek

Autounable OpenMP

parent 9513796c
......@@ -122,7 +122,9 @@ function elpa_solve_evp_&
&")
#ifdef WITH_OPENMP
nrThreads = omp_get_max_threads()
!nrThreads = omp_get_max_threads()
call obj%get("omp_threads",nrThreads,error)
call omp_set_num_threads(nrThreads)
#else
nrThreads = 1
#endif
......
......@@ -80,7 +80,9 @@
&")
#ifdef WITH_OPENMP
nrThreads=omp_get_max_threads()
!nrThreads=omp_get_max_threads()
call obj%get("omp_threads",nrThreads,error)
call omp_set_num_threads(nrThreads)
#else
nrThreads=1
#endif
......
......@@ -121,9 +121,9 @@ subroutine elpa_reduce_add_vectors_&
aux1(:) = 0
aux2(:) = 0
#ifdef WITH_OPENMP
call omp_set_num_threads(nrThreads)
!call omp_set_num_threads(nrThreads)
!$omp parallel private(ips, ipt, auxstride, lc, i, k, ns, nl)
!$omp parallel private(ips, ipt, auxstride, lc, i, k, ns, nl) num_threads(nrThreads)
#endif
do n = 0, lcm_s_t-1
......
......@@ -81,7 +81,7 @@
logical :: success
integer :: debug, error
integer :: max_threads
integer :: nrThreads
call obj%timer%start("elpa_solve_tridi_public_&
&MATH_DATATYPE&
......@@ -95,9 +95,10 @@
matrixCols = obj%local_ncols
#ifdef WITH_OPENMP
max_threads=omp_get_max_threads()
!nrThreads=omp_get_max_threads()
call obj%get("omp_threads",nrThreads,error)
#else
max_threads=1
nrThreads=1
#endif
call obj%get("mpi_comm_rows", mpi_comm_rows,error)
......@@ -127,7 +128,7 @@
&PRECISION&
&_private_impl(obj, na, nev, d, e, q, ldq, nblk, matrixCols, &
mpi_comm_rows, mpi_comm_cols,.false., wantDebug, success, &
max_threads)
nrThreads)
call obj%timer%stop("elpa_solve_tridi_public_&
&MATH_DATATYPE&
......
......@@ -150,7 +150,9 @@
#ifdef WITH_OPENMP
nrThreads = omp_get_max_threads()
!nrThreads = omp_get_max_threads()
call obj%get("omp_threads",nrThreads,error)
call omp_set_num_threads(nrThreads)
#else
nrThreads = 1
#endif
......
......@@ -48,6 +48,15 @@
#include <execinfo.h>
#include "config.h"
#ifdef WITH_OPENMP
#include <omp.h>
#endif
int max_threads_glob;
int set_max_threads_glob=0;
static int enumerate_identity(int i);
static int cardinality_bool(void);
static int valid_bool(elpa_index_t index, int n, int new_value);
......@@ -71,9 +80,9 @@ static int band_to_full_cardinality();
static int band_to_full_enumerate(int i);
static int band_to_full_is_valid(elpa_index_t index, int n, int new_value);
static int elpa_omp_threads_cardinality();
static int elpa_omp_threads_enumerate(int i);
static int elpa_omp_threads_is_valid(elpa_index_t index, int n, int new_value);
static int omp_threads_cardinality();
static int omp_threads_enumerate(int i);
static int omp_threads_is_valid(elpa_index_t index, int n, int new_value);
static int min_tile_size_cardinality();
......@@ -178,7 +187,11 @@ static const elpa_index_int_entry_t int_entries[] = {
//INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL),
INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL),
INT_ENTRY("ELPA_OMP_THREADS", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, elpa_omp_threads_cardinality, elpa_omp_threads_enumerate, elpa_omp_threads_is_valid, NULL),
#ifdef WITH_OPENMP
INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL),
#else
INT_ENTRY("omp_threads", "OpenMP threads used in ELPA, default 1", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, omp_threads_cardinality, omp_threads_enumerate, omp_threads_is_valid, NULL),
#endif
//BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_REAL),
BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_REAL),
BOOL_ENTRY("timings", "Enable time measurement", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
......@@ -671,22 +684,33 @@ static int band_to_full_is_valid(elpa_index_t index, int n, int new_value) {
abort();
}
static int elpa_omp_threads_cardinality() {
/* TODO */
fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
abort();
}
static int elpa_omp_threads_enumerate(int i) {
/* TODO */
fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
abort();
}
static int elpa_omp_threads_is_valid(elpa_index_t index, int n, int new_value) {
/* TODO */
fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
abort();
static int omp_threads_cardinality() {
int max_threads;
#ifdef WITH_OPENMP
if (set_max_threads_glob == 0) {
max_threads_glob = omp_get_max_threads();
set_max_threads_glob = 1;
}
#else
max_threads_glob = 1;
set_max_threads_glob = 1;
#endif
max_threads = max_threads_glob;
return max_threads;
}
static int omp_threads_enumerate(int i) {
return i + 1;
}
static int omp_threads_is_valid(elpa_index_t index, int n, int new_value) {
int max_threads;
#ifdef WITH_OPENMP
max_threads = omp_get_max_threads();
#else
max_threads = 1;
#endif
return (1 <= new_value) && (new_value <= max_threads);
}
static int min_tile_size_cardinality() {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment