Commit 2bc48d71 authored by Andreas Marek's avatar Andreas Marek

Function to print best autotune

parent 74c5b3d2
......@@ -315,7 +315,6 @@ function elpa_solve_evp_&
if (obj%eigenvalues_only) then
do_trans_ev = .true.
endif
print *,"ELPA 1 ",nrThreads
if (do_bandred) then
call obj%timer%start("forward")
call tridiag_&
......
......@@ -158,6 +158,7 @@ module elpa_api
procedure(elpa_autotune_setup_i), deferred, public :: autotune_setup !< method to prepare the ELPA autotuning
procedure(elpa_autotune_step_i), deferred, public :: autotune_step !< method to do an autotuning step
procedure(elpa_autotune_set_best_i), deferred, public :: autotune_set_best !< method to set the best options
procedure(elpa_autotune_print_best_i), deferred, public :: autotune_print_best !< method to set the best options
#endif
!> \brief These method have to be public, in order to be overrideable in the extension types
......@@ -300,6 +301,22 @@ module elpa_api
class(elpa_autotune_t), intent(in), target :: tune_state
end subroutine
end interface
!> \brief abstract definition of the autotune print best method
!> Parameters
!> \details
!> \param self class(elpa_t): the ELPA object, which should be tuned
!> \param tune_state class(elpa_autotune_t): the autotuning object
!> Prints the best combination of ELPA options
abstract interface
subroutine elpa_autotune_print_best_i(self, tune_state)
import elpa_t, elpa_autotune_t
implicit none
class(elpa_t), intent(inout) :: self
class(elpa_autotune_t), intent(in), target :: tune_state
end subroutine
end interface
#endif
!> \brief abstract definition of set method for integer values
......
......@@ -160,6 +160,7 @@ module elpa_impl
procedure, public :: autotune_setup => elpa_autotune_setup
procedure, public :: autotune_step => elpa_autotune_step
procedure, public :: autotune_set_best => elpa_autotune_set_best
procedure, public :: autotune_print_best => elpa_autotune_print_best
#endif
procedure, private :: construct_scalapack_descriptor => elpa_construct_scalapack_descriptor
end type elpa_impl_t
......@@ -927,6 +928,32 @@ module elpa_impl
!> \brief function to print the up-to-know best options of the autotuning
!> Parameters
!> \param self class(elpa_impl_t) the allocated ELPA object
!> \param tune_state class(elpa_autotune_t): the autotuning object
subroutine elpa_autotune_print_best(self, tune_state)
implicit none
class(elpa_impl_t), intent(inout) :: self
class(elpa_autotune_t), intent(in), target :: tune_state
type(elpa_autotune_impl_t), pointer :: ts_impl
select type(tune_state)
type is (elpa_autotune_impl_t)
ts_impl => tune_state
class default
print *, "This should not happen"
end select
print *, "The following settings were found to be best:"
print *, "Best, i = ", ts_impl%min_loc, "best time = ", ts_impl%min_val
if (elpa_index_print_autotune_parameters_c(self%index, ts_impl%level, ts_impl%domain, ts_impl%min_loc) /= 1) then
stop "This should not happen (in elpa_autotune_print_best())"
endif
end subroutine
!c> /*! \brief C interface for the implementation of the elpa_autotune_set_best method
!c> *
!c> * \param elpa_t handle: of the ELPA object which should be tuned
......@@ -946,6 +973,28 @@ module elpa_impl
call self%autotune_set_best(tune_state)
end subroutine
!c> /*! \brief C interface for the implementation of the elpa_autotune_print_best method
!c> *
!c> * \param elpa_t handle: of the ELPA object which should be tuned
!c> * \param elpa_autotune_t autotune_handle: the autotuning object
!c> * \result none
!c> */
!c> void elpa_autotune_print_best(elpa_t handle, elpa_autotune_t autotune_handle);
subroutine elpa_autotune_print_best_c(handle, autotune_handle) bind(C, name="elpa_autotune_print_best")
type(c_ptr), intent(in), value :: handle
type(c_ptr), intent(in), value :: autotune_handle
type(elpa_impl_t), pointer :: self
type(elpa_autotune_impl_t), pointer :: tune_state
call c_f_pointer(handle, self)
call c_f_pointer(autotune_handle, tune_state)
call self%autotune_print_best(tune_state)
end subroutine
#endif
......
......@@ -690,12 +690,14 @@ static int omp_threads_cardinality() {
if (set_max_threads_glob == 0) {
max_threads_glob = omp_get_max_threads();
set_max_threads_glob = 1;
//printf("Setting global max threads to %d \n",max_threads_glob);
}
#else
max_threads_glob = 1;
set_max_threads_glob = 1;
#endif
max_threads = max_threads_glob;
//printf("Setting max threads to %d \n",max_threads);
return max_threads;
}
......@@ -706,10 +708,11 @@ static int omp_threads_enumerate(int i) {
static int omp_threads_is_valid(elpa_index_t index, int n, int new_value) {
int max_threads;
#ifdef WITH_OPENMP
max_threads = omp_get_max_threads();
max_threads = max_threads_glob;
#else
max_threads = 1;
#endif
//printf("In valid max threads to %d \n",max_threads);
return (1 <= new_value) && (new_value <= max_threads);
}
......@@ -787,3 +790,33 @@ int elpa_index_set_autotune_parameters(elpa_index_t index, int autotune_level, i
/* Could set all values */
return 1;
}
int elpa_index_print_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int n) {
//int debug = elpa_index_get_int_value(index, "debug", NULL);
//for (int i = 0; i < nelements(int_entries); i++) {
// if (is_tunable(index, i, autotune_level, autotune_domain)) {
// int value = int_entries[i].enumerate(n % int_entries[i].cardinality());
// /* Try to set option i to that value */
// if (int_entries[i].valid(index, i, value)) {
// index->int_options.values[i] = value;
// } else {
// return 0;
// }
// n /= int_entries[i].cardinality();
// }
//}
for (int i = 0; i < nelements(int_entries); i++) {
if (is_tunable(index, i, autotune_level, autotune_domain)) {
fprintf(stderr, " %s = ", int_entries[i].base.name);
if (int_entries[i].to_string) {
fprintf(stderr, " %s\n", int_entries[i].to_string(index->int_options.values[i]));
} else {
fprintf(stderr, " %d\n", index->int_options.values[i]);
}
}
}
fprintf(stderr, "\n");
/* Could set all values */
return 1;
}
......@@ -413,3 +413,17 @@ int elpa_index_autotune_cardinality(elpa_index_t index, int autotune_level, int
!f>
*/
int elpa_index_set_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int n);
/*
!f> interface
!f> function elpa_index_print_autotune_parameters_c(index, autotune_level, autotune_domain, n) result(success) &
!f> bind(C, name="elpa_index_print_autotune_parameters")
!f> import c_int, c_ptr, c_char
!f> type(c_ptr), intent(in), value :: index
!f> integer(kind=c_int), intent(in), value :: autotune_level, autotune_domain, n
!f> integer(kind=c_int) :: success
!f> end function
!f> end interface
!f>
*/
int elpa_index_print_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int n);
......@@ -127,13 +127,12 @@ program test
integer :: iter
character(len=5) :: iter_string
call read_input_parameters(na, nev, nblk, write_to_file)
call setup_mpi(myid, nprocs)
#ifdef HAVE_REDIRECT
#ifdef WITH_MPI
call MPI_BARRIER(MPI_COMM_WORLD, mpierr)
call redirect_stdout(myid)
call MPI_BARRIER(MPI_COMM_WORLD, mpierr)
call redirect_stdout(myid)
#endif
#endif
......@@ -176,7 +175,7 @@ program test
z(:,:) = 0.0
ev(:) = 0.0
call prepare_matrix_analytic(na, a, nblk, myid, np_rows, np_cols, my_prow, my_pcol)
call prepare_matrix_analytic(na, a, nblk, myid, np_rows, np_cols, my_prow, my_pcol, print_times=.false.)
as(:,:) = a(:,:)
e => elpa_allocate()
......@@ -219,7 +218,8 @@ program test
call e%timer_stop("eigenvectors: iteration "//trim(iter_string))
assert_elpa_ok(error)
status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, .true., .true.)
status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, &
.true., .true., print_times=.false.)
a(:,:) = as(:,:)
if (myid .eq. 0) then
print *, ""
......@@ -227,13 +227,18 @@ program test
endif
end do
! de-allocate autotune object
! set and print the autotuned-settings
call e%autotune_set_best(tune_state)
if (myid .eq. 0) then
call e%autotune_print_best(tune_state)
endif
! de-allocate autotune object
call elpa_autotune_deallocate(tune_state)
call e%eigenvectors(a, ev, z, error)
assert_elpa_ok(error)
status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, .true., .true.)
status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, &
.true., .true., print_times=.false.)
call elpa_deallocate(e)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment