From 10e8908b6f278eaec23557c8097a4b46e35f81ef Mon Sep 17 00:00:00 2001 From: Pavel Kus Date: Tue, 21 Aug 2018 14:49:42 +0200 Subject: [PATCH] basic load autotuning state implementation --- src/elpa_api.F90 | 22 ++++++++- src/elpa_impl.F90 | 30 ++++++++++++ src/elpa_index.c | 74 ++++++++++++++++++++++++++--- src/elpa_index.h | 18 +++++++ test/Fortran/test_multiple_objs.F90 | 8 +++- 5 files changed, 142 insertions(+), 10 deletions(-) diff --git a/src/elpa_api.F90 b/src/elpa_api.F90 index c7139b82..eebc73ab 100644 --- a/src/elpa_api.F90 +++ b/src/elpa_api.F90 @@ -164,6 +164,7 @@ module elpa_api procedure(elpa_autotune_print_best_i), deferred, public :: autotune_print_best !< method to print the best options procedure(elpa_autotune_print_state_i), deferred, public :: autotune_print_state !< method to print the state procedure(elpa_autotune_save_state_i), deferred, public :: autotune_save_state !< method to save the state + procedure(elpa_autotune_load_state_i), deferred, public :: autotune_load_state !< method to load the state #endif !> \brief These method have to be public, in order to be overrideable in the extension types @@ -371,7 +372,7 @@ module elpa_api !> \details !> \param self class(elpa_t): the ELPA object, which should be tuned !> \param tune_state class(elpa_autotune_t): the autotuning object - !> Prints the best combination of ELPA options + !> Prints the autotuning state abstract interface subroutine elpa_autotune_print_state_i(self, tune_state) import elpa_t, elpa_autotune_t @@ -387,7 +388,7 @@ module elpa_api !> \param self class(elpa_t): the ELPA object, which should be tuned !> \param tune_state class(elpa_autotune_t): the autotuning object !> \param file_name string, the name of the file where to save the state - !> Prints the best combination of ELPA options + !> Saves the autotuning state abstract interface subroutine elpa_autotune_save_state_i(self, tune_state, file_name) import elpa_t, elpa_autotune_t @@ -397,6 +398,23 @@ module elpa_api character(*), intent(in) :: file_name end subroutine end interface + + !> \brief abstract definition of the autotune load state method + !> Parameters + !> \details + !> \param self class(elpa_t): the ELPA object, which is being tuned + !> \param tune_state class(elpa_autotune_t): the autotuning object + !> \param file_name string, the name of the file from which to load the autotuning state + !> Loads all the elpa parameters + abstract interface + subroutine elpa_autotune_load_state_i(self, tune_state, file_name) + import elpa_t, elpa_autotune_t + implicit none + class(elpa_t), intent(inout) :: self + class(elpa_autotune_t), intent(in), target :: tune_state + character(*), intent(in) :: file_name + end subroutine + end interface #endif !> \brief abstract definition of set method for integer values diff --git a/src/elpa_impl.F90 b/src/elpa_impl.F90 index dc0299ed..1875c10f 100644 --- a/src/elpa_impl.F90 +++ b/src/elpa_impl.F90 @@ -165,6 +165,7 @@ module elpa_impl procedure, public :: autotune_print_best => elpa_autotune_print_best procedure, public :: autotune_print_state => elpa_autotune_print_state procedure, public :: autotune_save_state => elpa_autotune_save_state + procedure, public :: autotune_load_state => elpa_autotune_load_state #endif procedure, private :: construct_scalapack_descriptor => elpa_construct_scalapack_descriptor end type elpa_impl_t @@ -1182,6 +1183,35 @@ module elpa_impl end subroutine + !> \brief function to load the state of the autotuning + !> Parameters + !> \param self class(elpa_impl_t) the allocated ELPA object + !> \param tune_state class(elpa_autotune_t): the autotuning object + !> \param file_name string, the name of the file from which to load the state + subroutine elpa_autotune_load_state(self, tune_state, file_name) + implicit none + class(elpa_impl_t), intent(inout) :: self + class(elpa_autotune_t), intent(in), target :: tune_state + type(elpa_autotune_impl_t), pointer :: ts_impl + character(*), intent(in) :: file_name + + select type(tune_state) + type is (elpa_autotune_impl_t) + ts_impl => tune_state + class default + print *, "This should not happen" + end select + + print *, "testing, before C call, ts_impl%current is ", ts_impl%current + + if (elpa_index_load_autotune_state_c(self%index, ts_impl%level, ts_impl%domain, ts_impl%min_loc, & + ts_impl%min_val, ts_impl%current, ts_impl%cardinality, file_name // c_null_char) /= 1) then + stop "This should not happen (in elpa_autotune_load_state())" + endif + print *, "testing, after C call, ts_impl%current is ", ts_impl%current + end subroutine + + !c> /*! \brief C interface for the implementation of the elpa_autotune_set_best method !c> * !c> * \param elpa_t handle: of the ELPA object which should be tuned diff --git a/src/elpa_index.c b/src/elpa_index.c index 80b068a8..bb34c752 100644 --- a/src/elpa_index.c +++ b/src/elpa_index.c @@ -1084,12 +1084,12 @@ int elpa_index_print_autotune_state(elpa_index_t index, int autotune_level, int fprintf(f, "\n"); fprintf(f, "*** AUTOTUNING STATE ***\n"); fprintf(f, "** This is the state of the autotuning object\n"); - fprintf(f, "autotune level = %d\n", autotune_level); - fprintf(f, "autotune domain = %d\n", autotune_domain); - fprintf(f, "autotune cardinality = %d\n", cardinality); - fprintf(f, "current idx = %d\n", current); - fprintf(f, "best idx = %d\n", min_loc); - fprintf(f, "best time = %lf\n", min_val); + fprintf(f, "autotune_level = %d\n", autotune_level); + fprintf(f, "autotune_domain = %d\n", autotune_domain); + fprintf(f, "autotune_cardinality = %d\n", cardinality); + fprintf(f, "current_idx = %d\n", current); + fprintf(f, "best_idx = %d\n", min_loc); + fprintf(f, "best_time = %g\n", min_val); if(min_loc_cpy > -1) { fprintf(f, "** The following parameters are autotuned with so far the best values\n"); for (int i = 0; i < nelements(int_entries); i++) { @@ -1118,6 +1118,65 @@ int elpa_index_print_autotune_state(elpa_index_t index, int autotune_level, int return 1; } +const int LEN =1000; + +#define IMPLEMENT_LOAD_LINE(TYPE, PRINTF_SPEC, ...) \ + static int load_##TYPE##_line(FILE* f, const char* expected, TYPE* val) { \ + char line[LEN], s[LEN]; \ + int error = 0; \ + TYPE n; \ + if(fgets(line, LEN, f) == NULL){ \ + fprintf(stderr, "Loading autotuning state error: line is not there\n"); \ + error = 1; \ + } else{ \ + sscanf(line, "%s = " PRINTF_SPEC "\n", &s, &n); \ + printf("FROM FILE: %s, " PRINTF_SPEC "\n", s, n); \ + if(strcmp(s, expected) != 0){ \ + fprintf(stderr, "Loading autotuning state error: expected %s, got %s\n", expected, s); \ + error = 1;\ + } else{ \ + *val = n; \ + } \ + } \ + if(error){ \ + fprintf(stderr, "Autotuning state file corrupted\n"); \ + return 0; \ + } \ + return 1; \ + } +FOR_ALL_TYPES(IMPLEMENT_LOAD_LINE) + +int elpa_index_load_autotune_state(elpa_index_t index, int* autotune_level, int* autotune_domain, int* min_loc, + double* min_val, int* current, int* cardinality, char* file_name) { + char line[LEN]; + FILE *f; + + //TODO: should be broadcasted, instead of read on all ranks + //if(is_process_id_zero){ + f = fopen(file_name, "r"); + + if (f == NULL) { + fprintf(stderr, "Cannont open file %s\n", file_name); + return(0); + } + + + if(fgets(line, LEN, f) == NULL) return 0; + if(fgets(line, LEN, f) == NULL) return 0; + if(! load_int_line(f, "autotune_level", autotune_level)) return 0; + if(! load_int_line(f, "autotune_domain", autotune_domain)) return 0; + if(! load_int_line(f, "autotune_cardinality", cardinality)) return 0; + printf("current in C before load is %d\n", *current); + if(! load_int_line(f, "current_idx", current)) return 0; + printf("current in C after load is %d\n", *current); + if(! load_int_line(f, "best_idx", min_loc)) return 0; + if(! load_double_line(f, "best_time", min_val)) return 0; + fclose(f); + // } + + return 1; +} + const char STRUCTURE_PARAMETERS[] = "* Parameters describing structure of the computation:\n"; const char EXPLICIT_PARAMETERS[] = "* Parameters explicitly set by the user:\n"; const char DEFAULT_PARAMETERS[] = "* Parameters with default or environment value:\n"; @@ -1159,7 +1218,7 @@ int elpa_index_print_all_parameters(elpa_index_t index, char *file_name) { } fprintf(f, "*** ELPA STATE ***\n"); - fprintf(f, "%s\n%s\n%s", out_structure, out_set, out_defaults); + fprintf(f, "%s%s%s", out_structure, out_set, out_defaults); fprintf(f, "*** END OF ELPA STATE ***\n"); if(output_to_file) fclose(f); @@ -1176,6 +1235,7 @@ int elpa_index_load_all_parameters(elpa_index_t index, char *file_name) { int is_process_id_zero = elpa_index_get_int_value(index, "is_process_id_zero", NULL); int skip, explicit; + //TODO: should be broadcasted, instead of read on all ranks //if(is_process_id_zero){ f = fopen(file_name, "r"); diff --git a/src/elpa_index.h b/src/elpa_index.h index c941b836..1b7903bc 100644 --- a/src/elpa_index.h +++ b/src/elpa_index.h @@ -480,3 +480,21 @@ int elpa_index_load_all_parameters(elpa_index_t index, char* filename); */ int elpa_index_print_autotune_state(elpa_index_t index, int autotune_level, int autotune_domain, int min_loc, double min_val, int current, int cardinality, char* filename); + +/* + !f> interface + !f> function elpa_index_load_autotune_state_c(index, autotune_level, autotune_domain, min_loc, & + !f> min_val, current, cardinality, file_name) result(success) & + !f> bind(C, name="elpa_index_load_autotune_state") + !f> import c_int, c_ptr, c_char, c_double + !f> type(c_ptr), intent(in), value :: index + !f> integer(kind=c_int), intent(in) :: autotune_level, autotune_domain, min_loc, current, cardinality + !f> real(kind=c_double), intent(in) :: min_val + !f> character(kind=c_char), intent(in) :: file_name(*) + !f> integer(kind=c_int) :: success + !f> end function + !f> end interface + !f> + */ +int elpa_index_load_autotune_state(elpa_index_t index, int* autotune_level, int* autotune_domain, int* min_loc, + double* min_val, int* current, int* cardinality, char* filename); diff --git a/test/Fortran/test_multiple_objs.F90 b/test/Fortran/test_multiple_objs.F90 index 93429ee0..6eae198d 100644 --- a/test/Fortran/test_multiple_objs.F90 +++ b/test/Fortran/test_multiple_objs.F90 @@ -106,7 +106,7 @@ program test integer :: na_cols, na_rows ! local matrix size integer :: np_cols, np_rows ! number of MPI processes per column/row integer :: my_prow, my_pcol ! local MPI task position (my_prow, my_pcol) in the grid (0..np_cols -1, 0..np_rows -1) - integer :: mpierr + integer :: mpierr, ierr ! blacs character(len=1) :: layout @@ -209,12 +209,14 @@ program test tune_state => e_ptr%autotune_setup(ELPA_AUTOTUNE_MEDIUM, AUTOTUNE_DOMAIN, error) assert_elpa_ok(error) + iter=0 do while (e_ptr%autotune_step(tune_state)) iter=iter+1 write(iter_string,'(I5.5)') iter call e_ptr%print_all_parameters() call e_ptr%save_all_parameters("saved_parameters_"//trim(iter_string)//".txt") + call e_ptr%timer_start("eigenvectors: iteration "//trim(iter_string)) call e_ptr%eigenvectors(a, ev, z, error) call e_ptr%timer_stop("eigenvectors: iteration "//trim(iter_string)) @@ -229,6 +231,10 @@ program test a(:,:) = as(:,:) call e_ptr%autotune_print_state(tune_state) call e_ptr%autotune_save_state(tune_state, "saved_state_"//trim(iter_string)//".txt") +#ifdef WITH_MPI + call MPI_BARRIER(MPI_COMM_WORLD, ierr) +#endif + call e_ptr%autotune_load_state(tune_state, "saved_state_"//trim(iter_string)//".txt") end do ! set and print the autotuned-settings -- GitLab