Commit 0df899d0 authored by Andreas Marek's avatar Andreas Marek
Browse files

Reproduce old timing functionality of ELPA2 with the new timer objects

parent b2ca1a60
...@@ -108,11 +108,11 @@ ...@@ -108,11 +108,11 @@
integer(kind=ik) :: na, nev, lda, ldq, nblk, matrixCols, & integer(kind=ik) :: na, nev, lda, ldq, nblk, matrixCols, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_all mpi_comm_rows, mpi_comm_cols, mpi_comm_all
call obj%timer%start("solve_evp_& call obj%timer%start("elpa_solve_evp_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_2stage" // & &_2stage_&
&PRECISION_SUFFIX & &PRECISION&
) &")
na = obj%na na = obj%na
nev = obj%nev nev = obj%nev
...@@ -243,6 +243,7 @@ ...@@ -243,6 +243,7 @@
do_useGPU_trans_ev_tridi = .true. do_useGPU_trans_ev_tridi = .true.
endif endif
endif endif
call obj%timer%start("bandred")
if (obj%is_set("bandwidth") == 1) then if (obj%is_set("bandwidth") == 1) then
nbw=obj%get("bandwidth") nbw=obj%get("bandwidth")
...@@ -286,7 +287,6 @@ ...@@ -286,7 +287,6 @@
endif endif
! Reduction full -> band ! Reduction full -> band
call obj%timer%start("bandred")
call bandred_& call bandred_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_& &_&
...@@ -298,9 +298,10 @@ ...@@ -298,9 +298,10 @@
, useQRActual & , useQRActual &
#endif #endif
) )
call obj%timer%stop("bandred")
if (.not.(success)) return if (.not.(success)) return
end if ! matrix not already banded on input end if ! matrix not already banded on input
call obj%timer%stop("bandred")
! Reduction band -> tridiagonal ! Reduction band -> tridiagonal
...@@ -401,6 +402,7 @@ ...@@ -401,6 +402,7 @@
stop 1 stop 1
endif endif
call obj%timer%start("trans_ev_to_full")
if(obj%is_set("bandwidth") .ne. 1) then if(obj%is_set("bandwidth") .ne. 1) then
if ( (do_useGPU) .and. .not.(do_useGPU_trans_ev_tridi) ) then if ( (do_useGPU) .and. .not.(do_useGPU_trans_ev_tridi) ) then
! copy to device if we want to continue on GPU ! copy to device if we want to continue on GPU
...@@ -410,7 +412,6 @@ ...@@ -410,7 +412,6 @@
endif endif
! Backtransform stage 2 ! Backtransform stage 2
call obj%timer%start("trans_ev_to_full")
call trans_ev_band_to_full_& call trans_ev_band_to_full_&
&MATH_DATATYPE& &MATH_DATATYPE&
...@@ -424,7 +425,7 @@ ...@@ -424,7 +425,7 @@
, useQRActual & , useQRActual &
#endif #endif
) )
call obj%timer%stop("trans_ev_to_full")
deallocate(tmat, stat=istat, errmsg=errorMessage) deallocate(tmat, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then if (istat .ne. 0) then
...@@ -435,12 +436,13 @@ ...@@ -435,12 +436,13 @@
stop 1 stop 1
endif endif
endif endif
call obj%timer%stop("trans_ev_to_full")
call obj%timer%stop("solve_evp_& call obj%timer%stop("elpa_solve_evp_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_2stage" // & &_2stage_&
&PRECISION_SUFFIX & &PRECISION&
) &")
1 format(a,f10.3) 1 format(a,f10.3)
end function elpa_solve_evp_& end function elpa_solve_evp_&
......
...@@ -63,11 +63,6 @@ ...@@ -63,11 +63,6 @@
#endif #endif
useGPU) result(success) useGPU) result(success)
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use iso_c_binding use iso_c_binding
use elpa use elpa
use elpa_mpi use elpa_mpi
...@@ -97,12 +92,6 @@ ...@@ -97,12 +92,6 @@
integer(kind=c_int) :: successInternal integer(kind=c_int) :: successInternal
class(elpa_t), pointer :: e class(elpa_t), pointer :: e
call timer%start("solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&_legacy_interface")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr) call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr) call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
...@@ -204,6 +193,7 @@ ...@@ -204,6 +193,7 @@
success = .false. success = .false.
return return
endif endif
call e%set("timings", 1)
call e%solve(a(1:lda,1:matrixCols), ev, q(1:ldq,1:matrixCols), successInternal) call e%solve(a(1:lda,1:matrixCols), ev, q(1:ldq,1:matrixCols), successInternal)
if (successInternal .ne. ELPA_OK) then if (successInternal .ne. ELPA_OK) then
...@@ -212,20 +202,65 @@ ...@@ -212,20 +202,65 @@
return return
endif endif
time_evp_fwd = e%get_double("time_evp_fwd") time_evp_fwd = e%get_time("elpa_solve_evp_&
time_evp_solve = e%get_double("time_evp_solve") &MATH_DATATYPE&
time_evp_back = e%get_double("time_evp_back") &_2stage_&
&PRECISION&
&","bandred")
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time bandred_real :', time_evp_fwd
time_evp_fwd = time_evp_fwd + e%get_time("elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&","tridiag")
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time tridiag_band_real :',e%get_time("elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&","tridiag")
time_evp_solve = e%get_time("elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&","solve")
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time solve_tridi :',time_evp_solve
time_evp_back = e%get_time("elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&","trans_ev_to_band")
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time trans_ev_tridi_to_band_real:',time_evp_back
time_evp_back = time_evp_back + &
e%get_time("elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&","trans_ev_to_full")
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time trans_ev_band_to_full_real :',e%get_time("elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&","trans_ev_to_full")
call elpa_deallocate(e) call elpa_deallocate(e)
call elpa_uninit() call elpa_uninit()
call timer%stop("solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&_legacy_interface")
end function end function
! vim: syntax=fortran ! vim: syntax=fortran
...@@ -97,9 +97,6 @@ program test_complex2_double_precision ...@@ -97,9 +97,6 @@ program test_complex2_double_precision
use redirect use redirect
#endif #endif
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use output_types use output_types
implicit none implicit none
...@@ -156,33 +153,6 @@ program test_complex2_double_precision ...@@ -156,33 +153,6 @@ program test_complex2_double_precision
#include "../../elpa_print_headers.X90" #include "../../elpa_print_headers.X90"
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call timer%measure_flops(.true.)
#endif
call timer%measure_allocated_memory(.true.)
call timer%measure_virtual_memory(.true.)
call timer%measure_max_allocated_memory(.true.)
call timer%set_print_options(&
#ifdef HAVE_LIBPAPI
print_flop_count=.true., &
print_flop_rate=.true., &
#endif
print_allocated_memory = .true. , &
print_virtual_memory=.true., &
print_max_allocated_memory=.true.)
call timer%enable()
call timer%start("program: test_complex2_double_precision")
#endif
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
! Selection of number of processor rows/columns ! Selection of number of processor rows/columns
! We try to set up the grid square-like, i.e. start the search for possible ! We try to set up the grid square-like, i.e. start the search for possible
...@@ -244,9 +214,6 @@ program test_complex2_double_precision ...@@ -244,9 +214,6 @@ program test_complex2_double_precision
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
! Allocate matrices and set up a test matrix for the eigenvalue problem ! Allocate matrices and set up a test matrix for the eigenvalue problem
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("set up matrix")
#endif
allocate(a (na_rows,na_cols)) allocate(a (na_rows,na_cols))
allocate(z (na_rows,na_cols)) allocate(z (na_rows,na_cols))
allocate(as(na_rows,na_cols)) allocate(as(na_rows,na_cols))
...@@ -255,9 +222,6 @@ program test_complex2_double_precision ...@@ -255,9 +222,6 @@ program test_complex2_double_precision
call prepare_matrix(na, myid, sc_desc, a, z, as) call prepare_matrix(na, myid, sc_desc, a, z, as)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("set up matrix")
#endif
! set print flag in elpa1 ! set print flag in elpa1
elpa_print_times = .true. elpa_print_times = .true.
...@@ -314,14 +278,6 @@ program test_complex2_double_precision ...@@ -314,14 +278,6 @@ program test_complex2_double_precision
deallocate(z) deallocate(z)
deallocate(ev) deallocate(ev)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("program: test_complex2_double_precision")
print *," "
print *,"Timings program: test_complex2_double_precision"
call timer%print("program: test_complex2_double_precision")
print *," "
print *,"End timings program: test_complex2_double_precision"
#endif
#ifdef WITH_MPI #ifdef WITH_MPI
call blacs_gridexit(my_blacs_ctxt) call blacs_gridexit(my_blacs_ctxt)
call mpi_finalize(mpierr) call mpi_finalize(mpierr)
......
...@@ -99,9 +99,6 @@ program test_complex2_choose_kernel_with_api_double_precision ...@@ -99,9 +99,6 @@ program test_complex2_choose_kernel_with_api_double_precision
use redirect use redirect
#endif #endif
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use output_types use output_types
implicit none implicit none
...@@ -156,33 +153,6 @@ program test_complex2_choose_kernel_with_api_double_precision ...@@ -156,33 +153,6 @@ program test_complex2_choose_kernel_with_api_double_precision
#define COMPLEXCASE #define COMPLEXCASE
#include "../../elpa_print_headers.X90" #include "../../elpa_print_headers.X90"
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call timer%measure_flops(.true.)
#endif
call timer%measure_allocated_memory(.true.)
call timer%measure_virtual_memory(.true.)
call timer%measure_max_allocated_memory(.true.)
call timer%set_print_options(&
#ifdef HAVE_LIBPAPI
print_flop_count=.true., &
print_flop_rate=.true., &
#endif
print_allocated_memory = .true. , &
print_virtual_memory=.true., &
print_max_allocated_memory=.true.)
call timer%enable()
call timer%start("program: test_complex2_choose_kernel_with_api_double_precision")
#endif
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
! Selection of number of processor rows/columns ! Selection of number of processor rows/columns
! We try to set up the grid square-like, i.e. start the search for possible ! We try to set up the grid square-like, i.e. start the search for possible
...@@ -314,9 +284,6 @@ program test_complex2_choose_kernel_with_api_double_precision ...@@ -314,9 +284,6 @@ program test_complex2_choose_kernel_with_api_double_precision
end if end if
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
! Allocate matrices and set up a test matrix for the eigenvalue problem ! Allocate matrices and set up a test matrix for the eigenvalue problem
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("set up matrix")
#endif
allocate(a (na_rows,na_cols)) allocate(a (na_rows,na_cols))
allocate(z (na_rows,na_cols)) allocate(z (na_rows,na_cols))
allocate(as(na_rows,na_cols)) allocate(as(na_rows,na_cols))
...@@ -325,10 +292,6 @@ program test_complex2_choose_kernel_with_api_double_precision ...@@ -325,10 +292,6 @@ program test_complex2_choose_kernel_with_api_double_precision
call prepare_matrix(na, myid, sc_desc, a, z, as) call prepare_matrix(na, myid, sc_desc, a, z, as)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("set up matrix")
#endif
! set print flag in elpa1 ! set print flag in elpa1
elpa_print_times = .true. elpa_print_times = .true.
...@@ -447,14 +410,6 @@ program test_complex2_choose_kernel_with_api_double_precision ...@@ -447,14 +410,6 @@ program test_complex2_choose_kernel_with_api_double_precision
deallocate(z) deallocate(z)
deallocate(ev) deallocate(ev)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("program: test_complex2_choose_kernel_with_api_double_precision")
print *," "
print *,"Timings program: test_complex2_choose_kernel_with_api_double_precision"
call timer%print("program: test_complex2_choose_kernel_with_api_double_precision")
print *," "
print *,"End timings program: test_complex2_choose_kernel_with_api_double_precision"
#endif
#ifdef WITH_MPI #ifdef WITH_MPI
call blacs_gridexit(my_blacs_ctxt) call blacs_gridexit(my_blacs_ctxt)
call mpi_finalize(mpierr) call mpi_finalize(mpierr)
......
...@@ -99,9 +99,6 @@ program test_complex2_default_kernel_double_precision ...@@ -99,9 +99,6 @@ program test_complex2_default_kernel_double_precision
use redirect use redirect
#endif #endif
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use output_types use output_types
implicit none implicit none
...@@ -156,33 +153,6 @@ program test_complex2_default_kernel_double_precision ...@@ -156,33 +153,6 @@ program test_complex2_default_kernel_double_precision
#define COMPLEXCASE #define COMPLEXCASE
#include "../../elpa_print_headers.X90" #include "../../elpa_print_headers.X90"
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call timer%measure_flops(.true.)
#endif
call timer%measure_allocated_memory(.true.)
call timer%measure_virtual_memory(.true.)
call timer%measure_max_allocated_memory(.true.)
call timer%set_print_options(&
#ifdef HAVE_LIBPAPI
print_flop_count=.true., &
print_flop_rate=.true., &
#endif
print_allocated_memory = .true. , &
print_virtual_memory=.true., &
print_max_allocated_memory=.true.)
call timer%enable()
call timer%start("program: test_complex2_default_kernel_double_precision")
#endif
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
! Selection of number of processor rows/columns ! Selection of number of processor rows/columns
! We try to set up the grid square-like, i.e. start the search for possible ! We try to set up the grid square-like, i.e. start the search for possible
...@@ -260,9 +230,6 @@ program test_complex2_default_kernel_double_precision ...@@ -260,9 +230,6 @@ program test_complex2_default_kernel_double_precision
end if end if
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
! Allocate matrices and set up a test matrix for the eigenvalue problem ! Allocate matrices and set up a test matrix for the eigenvalue problem
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("set up matrix")
#endif
allocate(a (na_rows,na_cols)) allocate(a (na_rows,na_cols))
allocate(z (na_rows,na_cols)) allocate(z (na_rows,na_cols))
allocate(as(na_rows,na_cols)) allocate(as(na_rows,na_cols))
...@@ -271,9 +238,6 @@ program test_complex2_default_kernel_double_precision ...@@ -271,9 +238,6 @@ program test_complex2_default_kernel_double_precision
call prepare_matrix(na, myid, sc_desc, a, z, as) call prepare_matrix(na, myid, sc_desc, a, z, as)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("set up matrix")
#endif
! set print flag in elpa1 ! set print flag in elpa1
elpa_print_times = .true. elpa_print_times = .true.
...@@ -342,14 +306,6 @@ program test_complex2_default_kernel_double_precision ...@@ -342,14 +306,6 @@ program test_complex2_default_kernel_double_precision
deallocate(z) deallocate(z)
deallocate(ev) deallocate(ev)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("program: test_complex2_default_kernel_double_precision")
print *," "
print *,"Timings program: test_complex2_default_kernel_double_precision"
call timer%print("program: test_complex2_default_kernel_double_precision")
print *," "
print *,"End timings program: test_complex2_default_kernel_double_precision"
#endif
#ifdef WITH_MPI #ifdef WITH_MPI
call blacs_gridexit(my_blacs_ctxt) call blacs_gridexit(my_blacs_ctxt)
call mpi_finalize(mpierr) call mpi_finalize(mpierr)
......
...@@ -99,9 +99,6 @@ program test_complex2_gpu_version_double_precision ...@@ -99,9 +99,6 @@ program test_complex2_gpu_version_double_precision
use redirect use redirect
#endif #endif
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use output_types use output_types
implicit none implicit none
...@@ -157,32 +154,6 @@ program test_complex2_gpu_version_double_precision ...@@ -157,32 +154,6 @@ program test_complex2_gpu_version_double_precision
#define COMPLEXCASE #define COMPLEXCASE
#include "../../elpa_print_headers.X90" #include "../../elpa_print_headers.X90"
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call timer%measure_flops(.true.)
#endif
call timer%measure_allocated_memory(.true.)
call timer%measure_virtual_memory(.true.)
call timer%measure_max_allocated_memory(.true.)
call timer%set_print_options(&
#ifdef HAVE_LIBPAPI