Commit 12b5a9b6 authored by Pavel Kus's avatar Pavel Kus

intermediate bandwidth in full->banded->tridi tunable

parent ef171c92
......@@ -429,6 +429,8 @@
endif
if (obj%is_set("bandwidth") == 1) then
! bandwidth is set. That means, that the inputed matrix is actually banded and thus the
! first step of ELPA2 should be skipped
call obj%get("bandwidth",nbw,error)
if (nbw == 0) then
if (wantDebug) then
......@@ -463,22 +465,40 @@
do_solve_tridi = .true. ! we also have to solve something :-)
do_trans_to_band = .true. ! and still we have to backsub to banded
do_trans_to_full = .false. ! but not to full since we have a banded matrix
else ! bandwidth is not set
! Choose bandwidth, must be a multiple of nblk, set to a value >= 32
! On older systems (IBM Bluegene/P, Intel Nehalem) a value of 32 was optimal.
! For Intel(R) Xeon(R) E5 v2 and v3, better use 64 instead of 32!
! For IBM Bluegene/Q this is not clear at the moment. We have to keep an eye
! on this and maybe allow a run-time optimization here
if (do_useGPU) then
nbw = nblk
else
else ! matrix is not banded, determine the intermediate bandwidth for full->banded->tridi
!first check if the intermediate bandwidth was set by the user
call obj%get("intermediate_bandwidth", nbw, error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
if(nbw == 0) then
! intermediate bandwidth was not specified, select one of the defaults
! Choose bandwidth, must be a multiple of nblk, set to a value >= 32
! On older systems (IBM Bluegene/P, Intel Nehalem) a value of 32 was optimal.
! For Intel(R) Xeon(R) E5 v2 and v3, better use 64 instead of 32!
! For IBM Bluegene/Q this is not clear at the moment. We have to keep an eye
! on this and maybe allow a run-time optimization here
if (do_useGPU) then
nbw = nblk
else
#if REALCASE == 1
nbw = (63/nblk+1)*nblk
nbw = (63/nblk+1)*nblk
#elif COMPLEXCASE == 1
nbw = (31/nblk+1)*nblk
nbw = (31/nblk+1)*nblk
#endif
endif
endif
else
! intermediate bandwidth has been specified by the user, check, whether correctly
if (mod(nbw, nblk) .ne. 0) then
print *, "Specified bandwidth ",nbw," has to be mutiple of the blocksize ", nblk, ". Aborting..."
success = .false.
return
endif
endif !nbw == 0
num_blocks = (na-1)/nbw + 1
......@@ -496,7 +516,7 @@
do_solve_tridi = .true.
do_trans_to_band = .true.
do_trans_to_full = .true.
end if ! matrix not already banded on input
endif ! matrix not already banded on input
! start the computations in 5 steps
......
......@@ -85,6 +85,7 @@ static int omp_threads_enumerate(int i);
static int omp_threads_is_valid(elpa_index_t index, int n, int new_value);
static int min_tile_size_cardinality();
static int intermediate_bandwidth_cardinality();
static int na_is_valid(elpa_index_t index, int n, int new_value);
static int nev_is_valid(elpa_index_t index, int n, int new_value);
......@@ -184,6 +185,8 @@ static const elpa_index_int_entry_t int_entries[] = {
INT_ENTRY("min_tile_size", "Minimal tile size used internally in elpa1_tridiag and elpa2_bandred", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
min_tile_size_cardinality, NULL, NULL, NULL),
INT_ENTRY("intermediate_bandwidth", "Specifies the intermediate bandwidth in ELPA2 full->banded step. Must be a multiple of nblk", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
intermediate_bandwidth_cardinality, NULL, NULL, NULL),
INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL),
#ifdef WITH_OPENMP
......@@ -715,6 +718,13 @@ static int min_tile_size_cardinality() {
fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
abort();
}
static int intermediate_bandwidth_cardinality() {
/* TODO */
fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
abort();
}
elpa_index_t elpa_index_instance() {
elpa_index_t index = (elpa_index_t) calloc(1, sizeof(struct elpa_index_struct));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment