Commit 0df899d0 authored by Andreas Marek's avatar Andreas Marek
Browse files

Reproduce old timing functionality of ELPA2 with the new timer objects

parent b2ca1a60
......@@ -108,11 +108,11 @@
integer(kind=ik) :: na, nev, lda, ldq, nblk, matrixCols, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_all
call obj%timer%start("solve_evp_&
call obj%timer%start("elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage" // &
&PRECISION_SUFFIX &
)
&_2stage_&
&PRECISION&
&")
na = obj%na
nev = obj%nev
......@@ -243,6 +243,7 @@
do_useGPU_trans_ev_tridi = .true.
endif
endif
call obj%timer%start("bandred")
if (obj%is_set("bandwidth") == 1) then
nbw=obj%get("bandwidth")
......@@ -286,7 +287,6 @@
endif
! Reduction full -> band
call obj%timer%start("bandred")
call bandred_&
&MATH_DATATYPE&
&_&
......@@ -298,9 +298,10 @@
, useQRActual &
#endif
)
call obj%timer%stop("bandred")
if (.not.(success)) return
end if ! matrix not already banded on input
call obj%timer%stop("bandred")
! Reduction band -> tridiagonal
......@@ -401,6 +402,7 @@
stop 1
endif
call obj%timer%start("trans_ev_to_full")
if(obj%is_set("bandwidth") .ne. 1) then
if ( (do_useGPU) .and. .not.(do_useGPU_trans_ev_tridi) ) then
! copy to device if we want to continue on GPU
......@@ -410,7 +412,6 @@
endif
! Backtransform stage 2
call obj%timer%start("trans_ev_to_full")
call trans_ev_band_to_full_&
&MATH_DATATYPE&
......@@ -424,7 +425,7 @@
, useQRActual &
#endif
)
call obj%timer%stop("trans_ev_to_full")
deallocate(tmat, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
......@@ -435,12 +436,13 @@
stop 1
endif
endif
call obj%timer%stop("trans_ev_to_full")
call obj%timer%stop("solve_evp_&
call obj%timer%stop("elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage" // &
&PRECISION_SUFFIX &
)
&_2stage_&
&PRECISION&
&")
1 format(a,f10.3)
end function elpa_solve_evp_&
......
......@@ -63,11 +63,6 @@
#endif
useGPU) result(success)
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use iso_c_binding
use elpa
use elpa_mpi
......@@ -97,12 +92,6 @@
integer(kind=c_int) :: successInternal
class(elpa_t), pointer :: e
call timer%start("solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&_legacy_interface")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
......@@ -204,6 +193,7 @@
success = .false.
return
endif
call e%set("timings", 1)
call e%solve(a(1:lda,1:matrixCols), ev, q(1:ldq,1:matrixCols), successInternal)
if (successInternal .ne. ELPA_OK) then
......@@ -212,20 +202,65 @@
return
endif
time_evp_fwd = e%get_double("time_evp_fwd")
time_evp_solve = e%get_double("time_evp_solve")
time_evp_back = e%get_double("time_evp_back")
time_evp_fwd = e%get_time("elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&","bandred")
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time bandred_real :', time_evp_fwd
time_evp_fwd = time_evp_fwd + e%get_time("elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&","tridiag")
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time tridiag_band_real :',e%get_time("elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&","tridiag")
time_evp_solve = e%get_time("elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&","solve")
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time solve_tridi :',time_evp_solve
time_evp_back = e%get_time("elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&","trans_ev_to_band")
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time trans_ev_tridi_to_band_real:',time_evp_back
time_evp_back = time_evp_back + &
e%get_time("elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&","trans_ev_to_full")
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time trans_ev_band_to_full_real :',e%get_time("elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&","trans_ev_to_full")
call elpa_deallocate(e)
call elpa_uninit()
call timer%stop("solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&_legacy_interface")
end function
! vim: syntax=fortran
......@@ -97,9 +97,6 @@ program test_complex2_double_precision
use redirect
#endif
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use output_types
implicit none
......@@ -156,33 +153,6 @@ program test_complex2_double_precision
#include "../../elpa_print_headers.X90"
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call timer%measure_flops(.true.)
#endif
call timer%measure_allocated_memory(.true.)
call timer%measure_virtual_memory(.true.)
call timer%measure_max_allocated_memory(.true.)
call timer%set_print_options(&
#ifdef HAVE_LIBPAPI
print_flop_count=.true., &
print_flop_rate=.true., &
#endif
print_allocated_memory = .true. , &
print_virtual_memory=.true., &
print_max_allocated_memory=.true.)
call timer%enable()
call timer%start("program: test_complex2_double_precision")
#endif
!-------------------------------------------------------------------------------
! Selection of number of processor rows/columns
! We try to set up the grid square-like, i.e. start the search for possible
......@@ -244,9 +214,6 @@ program test_complex2_double_precision
!-------------------------------------------------------------------------------
! Allocate matrices and set up a test matrix for the eigenvalue problem
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("set up matrix")
#endif
allocate(a (na_rows,na_cols))
allocate(z (na_rows,na_cols))
allocate(as(na_rows,na_cols))
......@@ -255,9 +222,6 @@ program test_complex2_double_precision
call prepare_matrix(na, myid, sc_desc, a, z, as)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("set up matrix")
#endif
! set print flag in elpa1
elpa_print_times = .true.
......@@ -314,14 +278,6 @@ program test_complex2_double_precision
deallocate(z)
deallocate(ev)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("program: test_complex2_double_precision")
print *," "
print *,"Timings program: test_complex2_double_precision"
call timer%print("program: test_complex2_double_precision")
print *," "
print *,"End timings program: test_complex2_double_precision"
#endif
#ifdef WITH_MPI
call blacs_gridexit(my_blacs_ctxt)
call mpi_finalize(mpierr)
......
......@@ -99,9 +99,6 @@ program test_complex2_choose_kernel_with_api_double_precision
use redirect
#endif
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use output_types
implicit none
......@@ -156,33 +153,6 @@ program test_complex2_choose_kernel_with_api_double_precision
#define COMPLEXCASE
#include "../../elpa_print_headers.X90"
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call timer%measure_flops(.true.)
#endif
call timer%measure_allocated_memory(.true.)
call timer%measure_virtual_memory(.true.)
call timer%measure_max_allocated_memory(.true.)
call timer%set_print_options(&
#ifdef HAVE_LIBPAPI
print_flop_count=.true., &
print_flop_rate=.true., &
#endif
print_allocated_memory = .true. , &
print_virtual_memory=.true., &
print_max_allocated_memory=.true.)
call timer%enable()
call timer%start("program: test_complex2_choose_kernel_with_api_double_precision")
#endif
!-------------------------------------------------------------------------------
! Selection of number of processor rows/columns
! We try to set up the grid square-like, i.e. start the search for possible
......@@ -314,9 +284,6 @@ program test_complex2_choose_kernel_with_api_double_precision
end if
!-------------------------------------------------------------------------------
! Allocate matrices and set up a test matrix for the eigenvalue problem
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("set up matrix")
#endif
allocate(a (na_rows,na_cols))
allocate(z (na_rows,na_cols))
allocate(as(na_rows,na_cols))
......@@ -325,10 +292,6 @@ program test_complex2_choose_kernel_with_api_double_precision
call prepare_matrix(na, myid, sc_desc, a, z, as)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("set up matrix")
#endif
! set print flag in elpa1
elpa_print_times = .true.
......@@ -447,14 +410,6 @@ program test_complex2_choose_kernel_with_api_double_precision
deallocate(z)
deallocate(ev)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("program: test_complex2_choose_kernel_with_api_double_precision")
print *," "
print *,"Timings program: test_complex2_choose_kernel_with_api_double_precision"
call timer%print("program: test_complex2_choose_kernel_with_api_double_precision")
print *," "
print *,"End timings program: test_complex2_choose_kernel_with_api_double_precision"
#endif
#ifdef WITH_MPI
call blacs_gridexit(my_blacs_ctxt)
call mpi_finalize(mpierr)
......
......@@ -99,9 +99,6 @@ program test_complex2_default_kernel_double_precision
use redirect
#endif
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use output_types
implicit none
......@@ -156,33 +153,6 @@ program test_complex2_default_kernel_double_precision
#define COMPLEXCASE
#include "../../elpa_print_headers.X90"
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call timer%measure_flops(.true.)
#endif
call timer%measure_allocated_memory(.true.)
call timer%measure_virtual_memory(.true.)
call timer%measure_max_allocated_memory(.true.)
call timer%set_print_options(&
#ifdef HAVE_LIBPAPI
print_flop_count=.true., &
print_flop_rate=.true., &
#endif
print_allocated_memory = .true. , &
print_virtual_memory=.true., &
print_max_allocated_memory=.true.)
call timer%enable()
call timer%start("program: test_complex2_default_kernel_double_precision")
#endif
!-------------------------------------------------------------------------------
! Selection of number of processor rows/columns
! We try to set up the grid square-like, i.e. start the search for possible
......@@ -260,9 +230,6 @@ program test_complex2_default_kernel_double_precision
end if
!-------------------------------------------------------------------------------
! Allocate matrices and set up a test matrix for the eigenvalue problem
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("set up matrix")
#endif
allocate(a (na_rows,na_cols))
allocate(z (na_rows,na_cols))
allocate(as(na_rows,na_cols))
......@@ -271,9 +238,6 @@ program test_complex2_default_kernel_double_precision
call prepare_matrix(na, myid, sc_desc, a, z, as)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("set up matrix")
#endif
! set print flag in elpa1
elpa_print_times = .true.
......@@ -342,14 +306,6 @@ program test_complex2_default_kernel_double_precision
deallocate(z)
deallocate(ev)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("program: test_complex2_default_kernel_double_precision")
print *," "
print *,"Timings program: test_complex2_default_kernel_double_precision"
call timer%print("program: test_complex2_default_kernel_double_precision")
print *," "
print *,"End timings program: test_complex2_default_kernel_double_precision"
#endif
#ifdef WITH_MPI
call blacs_gridexit(my_blacs_ctxt)
call mpi_finalize(mpierr)
......
......@@ -99,9 +99,6 @@ program test_complex2_gpu_version_double_precision
use redirect
#endif
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use output_types
implicit none
......@@ -157,32 +154,6 @@ program test_complex2_gpu_version_double_precision
#define COMPLEXCASE
#include "../../elpa_print_headers.X90"
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call timer%measure_flops(.true.)
#endif
call timer%measure_allocated_memory(.true.)
call timer%measure_virtual_memory(.true.)
call timer%measure_max_allocated_memory(.true.)
call timer%set_print_options(&
#ifdef HAVE_LIBPAPI
print_flop_count=.true., &
print_flop_rate=.true., &
#endif
print_allocated_memory = .true. , &
print_virtual_memory=.true., &
print_max_allocated_memory=.true.)
call timer%enable()
call timer%start("program: test_complex2_gpu_version_double_precision")
#endif
!-------------------------------------------------------------------------------
! Selection of number of processor rows/columns
......@@ -256,9 +227,6 @@ program test_complex2_gpu_version_double_precision
end if
!-------------------------------------------------------------------------------
! Allocate matrices and set up a test matrix for the eigenvalue problem
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("set up matrix")
#endif
allocate(a (na_rows,na_cols))
allocate(z (na_rows,na_cols))
allocate(as(na_rows,na_cols))
......@@ -267,9 +235,6 @@ program test_complex2_gpu_version_double_precision
call prepare_matrix(na, myid, sc_desc, a, z, as)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("set up matrix")
#endif
! set print flag in elpa1
elpa_print_times = .true.
......@@ -339,14 +304,6 @@ program test_complex2_gpu_version_double_precision
deallocate(z)
deallocate(ev)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("program: test_complex2_gpu_version_double_precision")
print *," "
print *,"Timings program: test_complex2_gpu_version_double_precision"
call timer%print("program: test_complex2_gpu_version_double_precision")
print *," "
print *,"End timings program: test_complex2_gpu_version_double_precision"
#endif
#ifdef WITH_MPI
call blacs_gridexit(my_blacs_ctxt)
call mpi_finalize(mpierr)
......
......@@ -96,9 +96,6 @@ program test_real2_double_precision
use elpa_mpi
#ifdef HAVE_REDIRECT
use redirect
#endif
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use output_types
implicit none
......@@ -151,33 +148,6 @@ program test_real2_double_precision
#define REALCASE
#include "../../elpa_print_headers.X90"
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call timer%measure_flops(.true.)
#endif
call timer%measure_allocated_memory(.true.)
call timer%measure_virtual_memory(.true.)
call timer%measure_max_allocated_memory(.true.)
call timer%set_print_options(&
#ifdef HAVE_LIBPAPI
print_flop_count=.true., &
print_flop_rate=.true., &
#endif
print_allocated_memory = .true. , &
print_virtual_memory=.true., &
print_max_allocated_memory=.true.)
call timer%enable()
call timer%start("program: test_real2_double_precision")
#endif
!-------------------------------------------------------------------------------
! Selection of number of processor rows/columns
! We try to set up the grid square-like, i.e. start the search for possible
......@@ -236,9 +206,6 @@ program test_real2_double_precision
!-------------------------------------------------------------------------------
! Allocate matrices and set up a test matrix for the eigenvalue problem
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("set up matrix")
#endif
allocate(a (na_rows,na_cols))
allocate(z (na_rows,na_cols))
allocate(as(na_rows,na_cols))
......@@ -247,9 +214,6 @@ program test_real2_double_precision
call prepare_matrix(na, myid, sc_desc, a, z, as)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("set up matrix")
#endif
! set print flag in elpa1
elpa_print_times = .true.
......@@ -319,14 +283,6 @@ program test_real2_double_precision
deallocate(z)
deallocate(ev)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("program: test_real2_double_precision")
print *," "
print *,"Timings program: test_real2_double_precision"
call timer%print("program: test_real2_double_precision")
print *," "
print *,"End timings program: test_real2_double_precision"
#endif
#ifdef WITH_MPI
call blacs_gridexit(my_blacs_ctxt)
call mpi_finalize(mpierr)
......
......@@ -100,9 +100,6 @@ program test_real2_choose_kernel_with_api_double_precision
use redirect
#endif
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use output_types
implicit none
......@@ -154,32 +151,6 @@ program test_real2_choose_kernel_with_api_double_precision
#define REALCASE
#include "../../elpa_print_headers.X90"
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call timer%measure_flops(.true.)