Commit cb4464ff authored by Pavel Kus's avatar Pavel Kus

output of the autotuning state

parent 51f01bab
......@@ -160,6 +160,7 @@ module elpa_api
procedure(elpa_autotune_step_i), deferred, public :: autotune_step !< method to do an autotuning step
procedure(elpa_autotune_set_best_i), deferred, public :: autotune_set_best !< method to set the best options
procedure(elpa_autotune_print_best_i), deferred, public :: autotune_print_best !< method to set the best options
procedure(elpa_autotune_print_state_i), deferred, public :: autotune_print_state !< method to set the best options
#endif
!> \brief These method have to be public, in order to be overrideable in the extension types
......@@ -331,6 +332,21 @@ module elpa_api
class(elpa_autotune_t), intent(in), target :: tune_state
end subroutine
end interface
!> \brief abstract definition of the autotune print state method
!> Parameters
!> \details
!> \param self class(elpa_t): the ELPA object, which should be tuned
!> \param tune_state class(elpa_autotune_t): the autotuning object
!> Prints the best combination of ELPA options
abstract interface
subroutine elpa_autotune_print_state_i(self, tune_state)
import elpa_t, elpa_autotune_t
implicit none
class(elpa_t), intent(inout) :: self
class(elpa_autotune_t), intent(in), target :: tune_state
end subroutine
end interface
#endif
!> \brief abstract definition of set method for integer values
......
......@@ -62,6 +62,7 @@ module elpa_impl
use elpa_autotune_impl
#endif
use, intrinsic :: iso_c_binding
use iso_fortran_env
implicit none
private
......@@ -160,6 +161,7 @@ module elpa_impl
procedure, public :: autotune_step => elpa_autotune_step
procedure, public :: autotune_set_best => elpa_autotune_set_best
procedure, public :: autotune_print_best => elpa_autotune_print_best
procedure, public :: autotune_print_state => elpa_autotune_print_state
#endif
procedure, private :: construct_scalapack_descriptor => elpa_construct_scalapack_descriptor
end type elpa_impl_t
......@@ -1071,6 +1073,7 @@ module elpa_impl
print *, "The following settings were found to be best:"
print *, "Best, i = ", ts_impl%min_loc, "best time = ", ts_impl%min_val
flush(output_unit)
if (elpa_index_print_autotune_parameters_c(self%index, ts_impl%level, ts_impl%domain) /= 1) then
stop "This should not happen (in elpa_autotune_print_best())"
endif
......@@ -1091,6 +1094,30 @@ module elpa_impl
end subroutine
!> \brief function to print the state of the autotuning
!> Parameters
!> \param self class(elpa_impl_t) the allocated ELPA object
!> \param tune_state class(elpa_autotune_t): the autotuning object
subroutine elpa_autotune_print_state(self, tune_state)
implicit none
class(elpa_impl_t), intent(inout) :: self
class(elpa_autotune_t), intent(in), target :: tune_state
type(elpa_autotune_impl_t), pointer :: ts_impl
select type(tune_state)
type is (elpa_autotune_impl_t)
ts_impl => tune_state
class default
print *, "This should not happen"
end select
!print *, "The following settings were found to be best:"
if (elpa_index_print_autotune_state_c(self%index, ts_impl%level, ts_impl%domain, ts_impl%min_loc, &
ts_impl%min_val, ts_impl%current, ts_impl%cardinality) /= 1) then
stop "This should not happen (in elpa_autotune_print_state())"
endif
end subroutine
!c> /*! \brief C interface for the implementation of the elpa_autotune_set_best method
!c> *
......
......@@ -43,6 +43,7 @@
// the original distribution, the GNU Lesser General Public License.
//
// Authors: L. Huedepohl and A. Marek, MPCDF
#include <assert.h>
#include <elpa/elpa.h>
#include "elpa_index.h"
......@@ -945,6 +946,13 @@ elpa_index_t elpa_index_instance() {
return index;
}
static int is_tunable_but_overriden(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
return (int_entries[i].autotune_level != 0) &&
(int_entries[i].autotune_level <= autotune_level) &&
(int_entries[i].autotune_domain & autotune_domain) &&
(index->int_options.is_set[i]);
}
static int is_tunable(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
return (int_entries[i].autotune_level != 0) &&
(int_entries[i].autotune_level <= autotune_level) &&
......@@ -973,25 +981,25 @@ void elpa_index_print_int_parameter(elpa_index_t index, char* buff, int i)
}
}
int elpa_index_set_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int n) {
int n_original = n;
int elpa_index_set_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int current) {
int current_cpy = current;
char buff[100];
int debug = elpa_index_get_int_value(index, "debug", NULL);
for (int i = 0; i < nelements(int_entries); i++) {
if (is_tunable(index, i, autotune_level, autotune_domain)) {
int value = int_entries[i].enumerate(index, n % int_entries[i].cardinality(index));
int value = int_entries[i].enumerate(index, current_cpy % int_entries[i].cardinality(index));
/* Try to set option i to that value */
if (int_entries[i].valid(index, i, value)) {
index->int_options.values[i] = value;
} else {
return 0;
}
n /= int_entries[i].cardinality(index);
current_cpy /= int_entries[i].cardinality(index);
}
}
int is_process_id_zero = elpa_index_get_int_value(index, "is_process_id_zero", NULL);
if (debug == 1 && is_process_id_zero) {
fprintf(stderr, "\n*** AUTOTUNING: setting a new combination of parameters, idx %d ***\n", n_original);
fprintf(stderr, "\n*** AUTOTUNING: setting a new combination of parameters, idx %d ***\n", current);
elpa_index_print_autotune_parameters(index, autotune_level, autotune_domain);
fprintf(stderr, "***\n\n");
}
......@@ -1014,6 +1022,60 @@ int elpa_index_print_autotune_parameters(elpa_index_t index, int autotune_level,
return 1;
}
int elpa_index_print_autotune_state(elpa_index_t index, int autotune_level, int autotune_domain, int min_loc,
double min_val, int current, int cardinality) {
char buff[100];
elpa_index_t index_best;
int min_loc_cpy = min_loc;
// get index with the currently best parameters
index_best = elpa_index_instance();
if(min_loc_cpy > -1){
for (int i = 0; i < nelements(int_entries); i++) {
if (is_tunable(index, i, autotune_level, autotune_domain)) {
int value = int_entries[i].enumerate(index, min_loc_cpy % int_entries[i].cardinality(index));
/* we are setting the value for output only, we do not need to check consistency */
index_best->int_options.values[i] = value;
min_loc_cpy /= int_entries[i].cardinality(index);
}
}
}
int is_process_id_zero = elpa_index_get_int_value(index, "is_process_id_zero", NULL);
if (is_process_id_zero) {
fprintf(stderr, "\n*** AUTOTUNING STATE ***\n");
fprintf(stderr, "** This is the state of the autotuning object\n");
fprintf(stderr, "autotune level = %d\n", autotune_level);
fprintf(stderr, "autotune domain = %d\n", autotune_domain);
fprintf(stderr, "autotune cardinality = %d\n", cardinality);
fprintf(stderr, "current idx = %d\n", current);
fprintf(stderr, "best idx = %d\n", min_loc);
fprintf(stderr, "best time = %lf\n", min_val);
if(min_loc_cpy > -1) {
fprintf(stderr, "** The following parameters are autotuned with so far the best values\n");
for (int i = 0; i < nelements(int_entries); i++) {
if (is_tunable(index, i, autotune_level, autotune_domain)) {
elpa_index_print_int_parameter(index_best, buff, i);
fprintf(stderr, "%s", buff);
}
}
fprintf(stderr, "** The following parameters would be autotuned on the selected autotuning level, but were overridden by the set() method\n");
for (int i = 0; i < nelements(int_entries); i++) {
if (is_tunable_but_overriden(index, i, autotune_level, autotune_domain)) {
elpa_index_print_int_parameter(index_best, buff, i);
fprintf(stderr, "%s", buff);
}
}
}else{
fprintf(stderr, "** No output after first step\n");
}
fprintf(stderr, "*** END OF AUTOTUNING STATE ***\n");
}
elpa_index_free(index_best);
return 1;
}
int elpa_index_print_all_parameters(elpa_index_t index) {
const int LEN =10000;
char out_structure[LEN], out_set[LEN], out_defaults[LEN], out_nowhere[LEN], buff[100];
......
......@@ -447,3 +447,20 @@ int elpa_index_print_autotune_parameters(elpa_index_t index, int autotune_level,
!f>
*/
int elpa_index_print_all_parameters(elpa_index_t index);
/*
!f> interface
!f> function elpa_index_print_autotune_state_c(index, autotune_level, autotune_domain, min_loc, &
!f> min_val, current, cardinality) result(success) &
!f> bind(C, name="elpa_index_print_autotune_state")
!f> import c_int, c_ptr, c_char, c_double
!f> type(c_ptr), intent(in), value :: index
!f> integer(kind=c_int), intent(in), value :: autotune_level, autotune_domain, min_loc, current, cardinality
!f> real(kind=c_double), intent(in), value :: min_val
!f> integer(kind=c_int) :: success
!f> end function
!f> end interface
!f>
*/
int elpa_index_print_autotune_state(elpa_index_t index, int autotune_level, int autotune_domain, int min_loc,
double min_val, int current, int cardinality);
......@@ -201,13 +201,16 @@ program test
assert_elpa_ok(error)
#endif
call e%set("timings",1, error)
!call e%set("debug",1)
call e%set("debug",1)
call e%set("gpu", 0)
call e%set("max_stored_rows", 30)
assert_elpa_ok(e%setup())
if (myid == 0) print *, ""
tune_state => e%autotune_setup(ELPA_AUTOTUNE_FAST, AUTOTUNE_DOMAIN, error)
tune_state => e%autotune_setup(ELPA_AUTOTUNE_MEDIUM, AUTOTUNE_DOMAIN, error)
assert_elpa_ok(error)
iter=0
......@@ -226,6 +229,7 @@ program test
status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, &
.true., .true., print_times=.false.)
a(:,:) = as(:,:)
call e%autotune_print_state(tune_state)
end do
! set and print the autotuned-settings
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment