From 2bc48d717df64662f84c100df29533cad79f13cf Mon Sep 17 00:00:00 2001 From: Andreas Marek Date: Fri, 11 May 2018 12:48:44 +0200 Subject: [PATCH] Function to print best autotune --- src/elpa1/elpa1_template.F90 | 1 - src/elpa_api.F90 | 17 ++++++++++++ src/elpa_impl.F90 | 49 ++++++++++++++++++++++++++++++++++ src/elpa_index.c | 35 +++++++++++++++++++++++- src/elpa_index.h | 14 ++++++++++ test/Fortran/test_autotune.F90 | 19 ++++++++----- 6 files changed, 126 insertions(+), 9 deletions(-) diff --git a/src/elpa1/elpa1_template.F90 b/src/elpa1/elpa1_template.F90 index 468ace18..21651744 100644 --- a/src/elpa1/elpa1_template.F90 +++ b/src/elpa1/elpa1_template.F90 @@ -315,7 +315,6 @@ function elpa_solve_evp_& if (obj%eigenvalues_only) then do_trans_ev = .true. endif - print *,"ELPA 1 ",nrThreads if (do_bandred) then call obj%timer%start("forward") call tridiag_& diff --git a/src/elpa_api.F90 b/src/elpa_api.F90 index 776c1556..146c9f65 100644 --- a/src/elpa_api.F90 +++ b/src/elpa_api.F90 @@ -158,6 +158,7 @@ module elpa_api procedure(elpa_autotune_setup_i), deferred, public :: autotune_setup !< method to prepare the ELPA autotuning procedure(elpa_autotune_step_i), deferred, public :: autotune_step !< method to do an autotuning step procedure(elpa_autotune_set_best_i), deferred, public :: autotune_set_best !< method to set the best options + procedure(elpa_autotune_print_best_i), deferred, public :: autotune_print_best !< method to set the best options #endif !> \brief These method have to be public, in order to be overrideable in the extension types @@ -300,6 +301,22 @@ module elpa_api class(elpa_autotune_t), intent(in), target :: tune_state end subroutine end interface + + + !> \brief abstract definition of the autotune print best method + !> Parameters + !> \details + !> \param self class(elpa_t): the ELPA object, which should be tuned + !> \param tune_state class(elpa_autotune_t): the autotuning object + !> Prints the best combination of ELPA options + abstract interface + subroutine elpa_autotune_print_best_i(self, tune_state) + import elpa_t, elpa_autotune_t + implicit none + class(elpa_t), intent(inout) :: self + class(elpa_autotune_t), intent(in), target :: tune_state + end subroutine + end interface #endif !> \brief abstract definition of set method for integer values diff --git a/src/elpa_impl.F90 b/src/elpa_impl.F90 index 6b87d7ab..4a1d7596 100644 --- a/src/elpa_impl.F90 +++ b/src/elpa_impl.F90 @@ -160,6 +160,7 @@ module elpa_impl procedure, public :: autotune_setup => elpa_autotune_setup procedure, public :: autotune_step => elpa_autotune_step procedure, public :: autotune_set_best => elpa_autotune_set_best + procedure, public :: autotune_print_best => elpa_autotune_print_best #endif procedure, private :: construct_scalapack_descriptor => elpa_construct_scalapack_descriptor end type elpa_impl_t @@ -927,6 +928,32 @@ module elpa_impl + !> \brief function to print the up-to-know best options of the autotuning + !> Parameters + !> \param self class(elpa_impl_t) the allocated ELPA object + !> \param tune_state class(elpa_autotune_t): the autotuning object + subroutine elpa_autotune_print_best(self, tune_state) + implicit none + class(elpa_impl_t), intent(inout) :: self + class(elpa_autotune_t), intent(in), target :: tune_state + type(elpa_autotune_impl_t), pointer :: ts_impl + + select type(tune_state) + type is (elpa_autotune_impl_t) + ts_impl => tune_state + class default + print *, "This should not happen" + end select + + print *, "The following settings were found to be best:" + print *, "Best, i = ", ts_impl%min_loc, "best time = ", ts_impl%min_val + if (elpa_index_print_autotune_parameters_c(self%index, ts_impl%level, ts_impl%domain, ts_impl%min_loc) /= 1) then + stop "This should not happen (in elpa_autotune_print_best())" + endif + end subroutine + + + !c> /*! \brief C interface for the implementation of the elpa_autotune_set_best method !c> * !c> * \param elpa_t handle: of the ELPA object which should be tuned @@ -946,6 +973,28 @@ module elpa_impl call self%autotune_set_best(tune_state) end subroutine + + + + !c> /*! \brief C interface for the implementation of the elpa_autotune_print_best method + !c> * + !c> * \param elpa_t handle: of the ELPA object which should be tuned + !c> * \param elpa_autotune_t autotune_handle: the autotuning object + !c> * \result none + !c> */ + !c> void elpa_autotune_print_best(elpa_t handle, elpa_autotune_t autotune_handle); + subroutine elpa_autotune_print_best_c(handle, autotune_handle) bind(C, name="elpa_autotune_print_best") + type(c_ptr), intent(in), value :: handle + type(c_ptr), intent(in), value :: autotune_handle + type(elpa_impl_t), pointer :: self + type(elpa_autotune_impl_t), pointer :: tune_state + + call c_f_pointer(handle, self) + call c_f_pointer(autotune_handle, tune_state) + + call self%autotune_print_best(tune_state) + + end subroutine #endif diff --git a/src/elpa_index.c b/src/elpa_index.c index 3179b7da..d6e26901 100644 --- a/src/elpa_index.c +++ b/src/elpa_index.c @@ -690,12 +690,14 @@ static int omp_threads_cardinality() { if (set_max_threads_glob == 0) { max_threads_glob = omp_get_max_threads(); set_max_threads_glob = 1; + //printf("Setting global max threads to %d \n",max_threads_glob); } #else max_threads_glob = 1; set_max_threads_glob = 1; #endif max_threads = max_threads_glob; + //printf("Setting max threads to %d \n",max_threads); return max_threads; } @@ -706,10 +708,11 @@ static int omp_threads_enumerate(int i) { static int omp_threads_is_valid(elpa_index_t index, int n, int new_value) { int max_threads; #ifdef WITH_OPENMP - max_threads = omp_get_max_threads(); + max_threads = max_threads_glob; #else max_threads = 1; #endif + //printf("In valid max threads to %d \n",max_threads); return (1 <= new_value) && (new_value <= max_threads); } @@ -787,3 +790,33 @@ int elpa_index_set_autotune_parameters(elpa_index_t index, int autotune_level, i /* Could set all values */ return 1; } + +int elpa_index_print_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int n) { + //int debug = elpa_index_get_int_value(index, "debug", NULL); + //for (int i = 0; i < nelements(int_entries); i++) { + // if (is_tunable(index, i, autotune_level, autotune_domain)) { + // int value = int_entries[i].enumerate(n % int_entries[i].cardinality()); + // /* Try to set option i to that value */ + // if (int_entries[i].valid(index, i, value)) { + // index->int_options.values[i] = value; + // } else { + // return 0; + // } + // n /= int_entries[i].cardinality(); + // } + //} + for (int i = 0; i < nelements(int_entries); i++) { + if (is_tunable(index, i, autotune_level, autotune_domain)) { + fprintf(stderr, " %s = ", int_entries[i].base.name); + if (int_entries[i].to_string) { + fprintf(stderr, " %s\n", int_entries[i].to_string(index->int_options.values[i])); + } else { + fprintf(stderr, " %d\n", index->int_options.values[i]); + } + } + } + fprintf(stderr, "\n"); + + /* Could set all values */ + return 1; +} diff --git a/src/elpa_index.h b/src/elpa_index.h index a9bb6929..af0b1524 100644 --- a/src/elpa_index.h +++ b/src/elpa_index.h @@ -413,3 +413,17 @@ int elpa_index_autotune_cardinality(elpa_index_t index, int autotune_level, int !f> */ int elpa_index_set_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int n); + +/* + !f> interface + !f> function elpa_index_print_autotune_parameters_c(index, autotune_level, autotune_domain, n) result(success) & + !f> bind(C, name="elpa_index_print_autotune_parameters") + !f> import c_int, c_ptr, c_char + !f> type(c_ptr), intent(in), value :: index + !f> integer(kind=c_int), intent(in), value :: autotune_level, autotune_domain, n + !f> integer(kind=c_int) :: success + !f> end function + !f> end interface + !f> + */ +int elpa_index_print_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int n); diff --git a/test/Fortran/test_autotune.F90 b/test/Fortran/test_autotune.F90 index a4176876..bb53e81d 100644 --- a/test/Fortran/test_autotune.F90 +++ b/test/Fortran/test_autotune.F90 @@ -127,13 +127,12 @@ program test integer :: iter character(len=5) :: iter_string - call read_input_parameters(na, nev, nblk, write_to_file) call setup_mpi(myid, nprocs) #ifdef HAVE_REDIRECT #ifdef WITH_MPI - call MPI_BARRIER(MPI_COMM_WORLD, mpierr) - call redirect_stdout(myid) + call MPI_BARRIER(MPI_COMM_WORLD, mpierr) + call redirect_stdout(myid) #endif #endif @@ -176,7 +175,7 @@ program test z(:,:) = 0.0 ev(:) = 0.0 - call prepare_matrix_analytic(na, a, nblk, myid, np_rows, np_cols, my_prow, my_pcol) + call prepare_matrix_analytic(na, a, nblk, myid, np_rows, np_cols, my_prow, my_pcol, print_times=.false.) as(:,:) = a(:,:) e => elpa_allocate() @@ -219,7 +218,8 @@ program test call e%timer_stop("eigenvectors: iteration "//trim(iter_string)) assert_elpa_ok(error) - status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, .true., .true.) + status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, & + .true., .true., print_times=.false.) a(:,:) = as(:,:) if (myid .eq. 0) then print *, "" @@ -227,13 +227,18 @@ program test endif end do - ! de-allocate autotune object + ! set and print the autotuned-settings call e%autotune_set_best(tune_state) + if (myid .eq. 0) then + call e%autotune_print_best(tune_state) + endif + ! de-allocate autotune object call elpa_autotune_deallocate(tune_state) call e%eigenvectors(a, ev, z, error) assert_elpa_ok(error) - status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, .true., .true.) + status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, & + .true., .true., print_times=.false.) call elpa_deallocate(e) -- GitLab