Commit 98d0fd81 authored by Andreas Marek's avatar Andreas Marek
Browse files

Blocking factor in trans_ev_band_to_full tunable

The blocking factor can be set now with the new interface
set method. The default is 3

Autotuning still has to be implemented.

This closses issue #44
parent 6a8db1f1
......@@ -143,11 +143,16 @@
&PRECISION&
&_&
&MATH_DATATYPE
integer :: blocking_factor
call obj%timer%start("trans_ev_band_to_full_&
&MATH_DATATYPE&
&" // &
&PRECISION_SUFFIX &
)
#ifdef BAND_TO_FULL_BLOCKING
call obj%get("blocking_in_band_to_full",blocking_factor)
print *,"Blocking factor: ", blocking_factor
#endif
call obj%timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
......@@ -432,7 +437,7 @@
#ifdef BAND_TO_FULL_BLOCKING
! t_blocking was formerly 2; 3 is a better choice
t_blocking = 3 ! number of matrices T (tmat) which are aggregated into a new (larger) T matrix (tmat_complete) and applied at once
t_blocking = blocking_factor ! number of matrices T (tmat) which are aggregated into a new (larger) T matrix (tmat_complete) and applied at once
! we only use the t_blocking if we could call it fully, this is might be better but needs to benchmarked.
! if ( na >= ((t_blocking+1)*nbw) ) then
......
......@@ -132,7 +132,6 @@ static const elpa_index_int_entry_t int_entries[] = {
INT_PARAMETER_ENTRY("process_row", "Process row number in the 2D domain decomposition", NULL),
INT_PARAMETER_ENTRY("process_col", "Process column number in the 2D domain decomposition", NULL),
INT_PARAMETER_ENTRY("bandwidth", "If specified, a band matrix with this bandwidth is expected as input; bandwidth must be multiply of nblk", bw_is_valid),
INT_ANY_ENTRY("mpi_comm_rows", "Communicator for inter-row communication"),
INT_ANY_ENTRY("mpi_comm_cols", "Communicator for inter-column communication"),
INT_ANY_ENTRY("mpi_comm_parent", "Parent communicator"),
......@@ -144,6 +143,8 @@ static const elpa_index_int_entry_t int_entries[] = {
INT_ENTRY("complex_kernel", "Complex kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_COMPLEX_DEFAULT, \
number_of_complex_kernels, complex_kernel_enumerate, \
complex_kernel_is_valid, complex_kernel_name),
INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, NULL, NULL, NULL, NULL),
BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0),
BOOL_ENTRY("gpu", "Use GPU acceleration", 0),
BOOL_ENTRY("timings", "Enable time measurement", 0),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment