Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
0df899d0
Commit
0df899d0
authored
May 24, 2017
by
Andreas Marek
Browse files
Reproduce old timing functionality of ELPA2 with the new timer objects
parent
b2ca1a60
Changes
20
Hide whitespace changes
Inline
Side-by-side
src/elpa2/elpa2_template.X90
View file @
0df899d0
...
...
@@ -108,11 +108,11 @@
integer
(
kind
=
ik
)
::
na
,
nev
,
lda
,
ldq
,
nblk
,
matrixCols
,
&
mpi_comm_rows
,
mpi_comm_cols
,
mpi_comm_all
call
obj
%
timer
%
start
(
"solve_evp_&
call
obj
%
timer
%
start
(
"
elpa_
solve_evp_&
&MATH_DATATYPE&
&_2stage
"
//
&
&
PRECISION
_SUFFIX
&
)
&_2stage
_
&
&PRECISION&
&"
)
na
=
obj
%
na
nev
=
obj
%
nev
...
...
@@ -243,6 +243,7 @@
do_useGPU_trans_ev_tridi
=
.true.
endif
endif
call
obj
%
timer
%
start
(
"bandred"
)
if
(
obj
%
is_set
(
"bandwidth"
)
==
1
)
then
nbw
=
obj
%
get
(
"bandwidth"
)
...
...
@@ -286,7 +287,6 @@
endif
! Reduction full -> band
call
obj
%
timer
%
start
(
"bandred"
)
call
bandred_
&
&
MATH_DATATYPE
&
&
_
&
...
...
@@ -298,9 +298,10 @@
,
useQRActual
&
#endif
)
call
obj
%
timer
%
stop
(
"bandred"
)
if
(
.not.
(
success
))
return
end
if
! matrix not already banded on input
call
obj
%
timer
%
stop
(
"bandred"
)
! Reduction band -> tridiagonal
...
...
@@ -401,6 +402,7 @@
stop
1
endif
call
obj
%
timer
%
start
(
"trans_ev_to_full"
)
if
(
obj
%
is_set
(
"bandwidth"
)
.ne.
1
)
then
if
(
(
do_useGPU
)
.and.
.not.
(
do_useGPU_trans_ev_tridi
)
)
then
! copy to device if we want to continue on GPU
...
...
@@ -410,7 +412,6 @@
endif
! Backtransform stage 2
call
obj
%
timer
%
start
(
"trans_ev_to_full"
)
call
trans_ev_band_to_full_
&
&
MATH_DATATYPE
&
...
...
@@ -424,7 +425,7 @@
,
useQRActual
&
#endif
)
call
obj
%
timer
%
stop
(
"trans_ev_to_full"
)
deallocate
(
tmat
,
stat
=
istat
,
errmsg
=
errorMessage
)
if
(
istat
.ne.
0
)
then
...
...
@@ -435,12 +436,13 @@
stop
1
endif
endif
call
obj
%
timer
%
stop
(
"trans_ev_to_full"
)
call
obj
%
timer
%
stop
(
"solve_evp_&
call
obj
%
timer
%
stop
(
"
elpa_
solve_evp_&
&MATH_DATATYPE&
&_2stage
"
//
&
&
PRECISION
_SUFFIX
&
)
&_2stage
_
&
&PRECISION&
&"
)
1
format
(
a
,
f10.3
)
end
function
elpa_solve_evp_
&
...
...
src/elpa2/legacy_interface/elpa2_template.X90
View file @
0df899d0
...
...
@@ -63,11 +63,6 @@
#endif
useGPU
)
result
(
success
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#else
use
timings_dummy
#endif
use
iso_c_binding
use
elpa
use
elpa_mpi
...
...
@@ -97,12 +92,6 @@
integer
(
kind
=
c_int
)
::
successInternal
class
(
elpa_t
),
pointer
::
e
call
timer
%
start
(
"solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&_legacy_interface"
)
call
mpi_comm_rank
(
mpi_comm_rows
,
my_prow
,
mpierr
)
call
mpi_comm_rank
(
mpi_comm_cols
,
my_pcol
,
mpierr
)
...
...
@@ -204,6 +193,7 @@
success
=
.false.
return
endif
call
e
%
set
(
"timings"
,
1
)
call
e
%
solve
(
a
(
1
:
lda
,
1
:
matrixCols
),
ev
,
q
(
1
:
ldq
,
1
:
matrixCols
),
successInternal
)
if
(
successInternal
.ne.
ELPA_OK
)
then
...
...
@@ -212,20 +202,65 @@
return
endif
time_evp_fwd
=
e
%
get_double
(
"time_evp_fwd"
)
time_evp_solve
=
e
%
get_double
(
"time_evp_solve"
)
time_evp_back
=
e
%
get_double
(
"time_evp_back"
)
time_evp_fwd
=
e
%
get_time
(
"elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&"
,
"bandred"
)
if
(
my_prow
==
0
.and.
my_pcol
==
0
.and.
elpa_print_times
)
&
write
(
error_unit
,
*
)
'Time bandred_real :'
,
time_evp_fwd
time_evp_fwd
=
time_evp_fwd
+
e
%
get_time
(
"elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&"
,
"tridiag"
)
if
(
my_prow
==
0
.and.
my_pcol
==
0
.and.
elpa_print_times
)
&
write
(
error_unit
,
*
)
'Time tridiag_band_real :'
,
e
%
get_time
(
"elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&"
,
"tridiag"
)
time_evp_solve
=
e
%
get_time
(
"elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&"
,
"solve"
)
if
(
my_prow
==
0
.and.
my_pcol
==
0
.and.
elpa_print_times
)
&
write
(
error_unit
,
*
)
'Time solve_tridi :'
,
time_evp_solve
time_evp_back
=
e
%
get_time
(
"elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&"
,
"trans_ev_to_band"
)
if
(
my_prow
==
0
.and.
my_pcol
==
0
.and.
elpa_print_times
)
&
write
(
error_unit
,
*
)
'Time trans_ev_tridi_to_band_real:'
,
time_evp_back
time_evp_back
=
time_evp_back
+
&
e
%
get_time
(
"elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&"
,
"trans_ev_to_full"
)
if
(
my_prow
==
0
.and.
my_pcol
==
0
.and.
elpa_print_times
)
&
write
(
error_unit
,
*
)
'Time trans_ev_band_to_full_real :'
,
e
%
get_time
(
"elpa_solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&"
,
"trans_ev_to_full"
)
call
elpa_deallocate
(
e
)
call
elpa_uninit
()
call
timer
%
stop
(
"solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&_legacy_interface"
)
end
function
! vim: syntax=fortran
test/Fortran/elpa2/legacy_interface/legacy_complex.F90
View file @
0df899d0
...
...
@@ -97,9 +97,6 @@ program test_complex2_double_precision
use
redirect
#endif
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
output_types
implicit
none
...
...
@@ -156,33 +153,6 @@ program test_complex2_double_precision
#include "../../elpa_print_headers.X90"
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call
timer
%
measure_flops
(
.true.
)
#endif
call
timer
%
measure_allocated_memory
(
.true.
)
call
timer
%
measure_virtual_memory
(
.true.
)
call
timer
%
measure_max_allocated_memory
(
.true.
)
call
timer
%
set_print_options
(&
#ifdef HAVE_LIBPAPI
print_flop_count
=
.true.
,
&
print_flop_rate
=
.true.
,
&
#endif
print_allocated_memory
=
.true.
,
&
print_virtual_memory
=
.true.
,
&
print_max_allocated_memory
=
.true.
)
call
timer
%
enable
()
call
timer
%
start
(
"program: test_complex2_double_precision"
)
#endif
!-------------------------------------------------------------------------------
! Selection of number of processor rows/columns
! We try to set up the grid square-like, i.e. start the search for possible
...
...
@@ -244,9 +214,6 @@ program test_complex2_double_precision
!-------------------------------------------------------------------------------
! Allocate matrices and set up a test matrix for the eigenvalue problem
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"set up matrix"
)
#endif
allocate
(
a
(
na_rows
,
na_cols
))
allocate
(
z
(
na_rows
,
na_cols
))
allocate
(
as
(
na_rows
,
na_cols
))
...
...
@@ -255,9 +222,6 @@ program test_complex2_double_precision
call
prepare_matrix
(
na
,
myid
,
sc_desc
,
a
,
z
,
as
)
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"set up matrix"
)
#endif
! set print flag in elpa1
elpa_print_times
=
.true.
...
...
@@ -314,14 +278,6 @@ program test_complex2_double_precision
deallocate
(
z
)
deallocate
(
ev
)
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"program: test_complex2_double_precision"
)
print
*
,
" "
print
*
,
"Timings program: test_complex2_double_precision"
call
timer
%
print
(
"program: test_complex2_double_precision"
)
print
*
,
" "
print
*
,
"End timings program: test_complex2_double_precision"
#endif
#ifdef WITH_MPI
call
blacs_gridexit
(
my_blacs_ctxt
)
call
mpi_finalize
(
mpierr
)
...
...
test/Fortran/elpa2/legacy_interface/legacy_complex_api.F90
View file @
0df899d0
...
...
@@ -99,9 +99,6 @@ program test_complex2_choose_kernel_with_api_double_precision
use
redirect
#endif
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
output_types
implicit
none
...
...
@@ -156,33 +153,6 @@ program test_complex2_choose_kernel_with_api_double_precision
#define COMPLEXCASE
#include "../../elpa_print_headers.X90"
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call
timer
%
measure_flops
(
.true.
)
#endif
call
timer
%
measure_allocated_memory
(
.true.
)
call
timer
%
measure_virtual_memory
(
.true.
)
call
timer
%
measure_max_allocated_memory
(
.true.
)
call
timer
%
set_print_options
(&
#ifdef HAVE_LIBPAPI
print_flop_count
=
.true.
,
&
print_flop_rate
=
.true.
,
&
#endif
print_allocated_memory
=
.true.
,
&
print_virtual_memory
=
.true.
,
&
print_max_allocated_memory
=
.true.
)
call
timer
%
enable
()
call
timer
%
start
(
"program: test_complex2_choose_kernel_with_api_double_precision"
)
#endif
!-------------------------------------------------------------------------------
! Selection of number of processor rows/columns
! We try to set up the grid square-like, i.e. start the search for possible
...
...
@@ -314,9 +284,6 @@ program test_complex2_choose_kernel_with_api_double_precision
end
if
!-------------------------------------------------------------------------------
! Allocate matrices and set up a test matrix for the eigenvalue problem
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"set up matrix"
)
#endif
allocate
(
a
(
na_rows
,
na_cols
))
allocate
(
z
(
na_rows
,
na_cols
))
allocate
(
as
(
na_rows
,
na_cols
))
...
...
@@ -325,10 +292,6 @@ program test_complex2_choose_kernel_with_api_double_precision
call
prepare_matrix
(
na
,
myid
,
sc_desc
,
a
,
z
,
as
)
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"set up matrix"
)
#endif
! set print flag in elpa1
elpa_print_times
=
.true.
...
...
@@ -447,14 +410,6 @@ program test_complex2_choose_kernel_with_api_double_precision
deallocate
(
z
)
deallocate
(
ev
)
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"program: test_complex2_choose_kernel_with_api_double_precision"
)
print
*
,
" "
print
*
,
"Timings program: test_complex2_choose_kernel_with_api_double_precision"
call
timer
%
print
(
"program: test_complex2_choose_kernel_with_api_double_precision"
)
print
*
,
" "
print
*
,
"End timings program: test_complex2_choose_kernel_with_api_double_precision"
#endif
#ifdef WITH_MPI
call
blacs_gridexit
(
my_blacs_ctxt
)
call
mpi_finalize
(
mpierr
)
...
...
test/Fortran/elpa2/legacy_interface/legacy_complex_default_kernel.F90
View file @
0df899d0
...
...
@@ -99,9 +99,6 @@ program test_complex2_default_kernel_double_precision
use
redirect
#endif
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
output_types
implicit
none
...
...
@@ -156,33 +153,6 @@ program test_complex2_default_kernel_double_precision
#define COMPLEXCASE
#include "../../elpa_print_headers.X90"
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call
timer
%
measure_flops
(
.true.
)
#endif
call
timer
%
measure_allocated_memory
(
.true.
)
call
timer
%
measure_virtual_memory
(
.true.
)
call
timer
%
measure_max_allocated_memory
(
.true.
)
call
timer
%
set_print_options
(&
#ifdef HAVE_LIBPAPI
print_flop_count
=
.true.
,
&
print_flop_rate
=
.true.
,
&
#endif
print_allocated_memory
=
.true.
,
&
print_virtual_memory
=
.true.
,
&
print_max_allocated_memory
=
.true.
)
call
timer
%
enable
()
call
timer
%
start
(
"program: test_complex2_default_kernel_double_precision"
)
#endif
!-------------------------------------------------------------------------------
! Selection of number of processor rows/columns
! We try to set up the grid square-like, i.e. start the search for possible
...
...
@@ -260,9 +230,6 @@ program test_complex2_default_kernel_double_precision
end
if
!-------------------------------------------------------------------------------
! Allocate matrices and set up a test matrix for the eigenvalue problem
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"set up matrix"
)
#endif
allocate
(
a
(
na_rows
,
na_cols
))
allocate
(
z
(
na_rows
,
na_cols
))
allocate
(
as
(
na_rows
,
na_cols
))
...
...
@@ -271,9 +238,6 @@ program test_complex2_default_kernel_double_precision
call
prepare_matrix
(
na
,
myid
,
sc_desc
,
a
,
z
,
as
)
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"set up matrix"
)
#endif
! set print flag in elpa1
elpa_print_times
=
.true.
...
...
@@ -342,14 +306,6 @@ program test_complex2_default_kernel_double_precision
deallocate
(
z
)
deallocate
(
ev
)
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"program: test_complex2_default_kernel_double_precision"
)
print
*
,
" "
print
*
,
"Timings program: test_complex2_default_kernel_double_precision"
call
timer
%
print
(
"program: test_complex2_default_kernel_double_precision"
)
print
*
,
" "
print
*
,
"End timings program: test_complex2_default_kernel_double_precision"
#endif
#ifdef WITH_MPI
call
blacs_gridexit
(
my_blacs_ctxt
)
call
mpi_finalize
(
mpierr
)
...
...
test/Fortran/elpa2/legacy_interface/legacy_complex_gpu.F90
View file @
0df899d0
...
...
@@ -99,9 +99,6 @@ program test_complex2_gpu_version_double_precision
use
redirect
#endif
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
output_types
implicit
none
...
...
@@ -157,32 +154,6 @@ program test_complex2_gpu_version_double_precision
#define COMPLEXCASE
#include "../../elpa_print_headers.X90"
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call
timer
%
measure_flops
(
.true.
)
#endif
call
timer
%
measure_allocated_memory
(
.true.
)
call
timer
%
measure_virtual_memory
(
.true.
)
call
timer
%
measure_max_allocated_memory
(
.true.
)
call
timer
%
set_print_options
(&
#ifdef HAVE_LIBPAPI
print_flop_count
=
.true.
,
&
print_flop_rate
=
.true.
,
&
#endif
print_allocated_memory
=
.true.
,
&
print_virtual_memory
=
.true.
,
&
print_max_allocated_memory
=
.true.
)
call
timer
%
enable
()
call
timer
%
start
(
"program: test_complex2_gpu_version_double_precision"
)
#endif
!-------------------------------------------------------------------------------
! Selection of number of processor rows/columns
...
...
@@ -256,9 +227,6 @@ program test_complex2_gpu_version_double_precision
end
if
!-------------------------------------------------------------------------------
! Allocate matrices and set up a test matrix for the eigenvalue problem
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"set up matrix"
)
#endif
allocate
(
a
(
na_rows
,
na_cols
))
allocate
(
z
(
na_rows
,
na_cols
))
allocate
(
as
(
na_rows
,
na_cols
))
...
...
@@ -267,9 +235,6 @@ program test_complex2_gpu_version_double_precision
call
prepare_matrix
(
na
,
myid
,
sc_desc
,
a
,
z
,
as
)
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"set up matrix"
)
#endif
! set print flag in elpa1
elpa_print_times
=
.true.
...
...
@@ -339,14 +304,6 @@ program test_complex2_gpu_version_double_precision
deallocate
(
z
)
deallocate
(
ev
)
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"program: test_complex2_gpu_version_double_precision"
)
print
*
,
" "
print
*
,
"Timings program: test_complex2_gpu_version_double_precision"
call
timer
%
print
(
"program: test_complex2_gpu_version_double_precision"
)
print
*
,
" "
print
*
,
"End timings program: test_complex2_gpu_version_double_precision"
#endif
#ifdef WITH_MPI
call
blacs_gridexit
(
my_blacs_ctxt
)
call
mpi_finalize
(
mpierr
)
...
...
test/Fortran/elpa2/legacy_interface/legacy_real.F90
View file @
0df899d0
...
...
@@ -96,9 +96,6 @@ program test_real2_double_precision
use
elpa_mpi
#ifdef HAVE_REDIRECT
use
redirect
#endif
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
output_types
implicit
none
...
...
@@ -151,33 +148,6 @@ program test_real2_double_precision
#define REALCASE
#include "../../elpa_print_headers.X90"
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call
timer
%
measure_flops
(
.true.
)
#endif
call
timer
%
measure_allocated_memory
(
.true.
)
call
timer
%
measure_virtual_memory
(
.true.
)
call
timer
%
measure_max_allocated_memory
(
.true.
)
call
timer
%
set_print_options
(&
#ifdef HAVE_LIBPAPI
print_flop_count
=
.true.
,
&
print_flop_rate
=
.true.
,
&
#endif
print_allocated_memory
=
.true.
,
&
print_virtual_memory
=
.true.
,
&
print_max_allocated_memory
=
.true.
)
call
timer
%
enable
()
call
timer
%
start
(
"program: test_real2_double_precision"
)
#endif
!-------------------------------------------------------------------------------
! Selection of number of processor rows/columns
! We try to set up the grid square-like, i.e. start the search for possible
...
...
@@ -236,9 +206,6 @@ program test_real2_double_precision
!-------------------------------------------------------------------------------
! Allocate matrices and set up a test matrix for the eigenvalue problem
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"set up matrix"
)
#endif
allocate
(
a
(
na_rows
,
na_cols
))
allocate
(
z
(
na_rows
,
na_cols
))
allocate
(
as
(
na_rows
,
na_cols
))
...
...
@@ -247,9 +214,6 @@ program test_real2_double_precision
call
prepare_matrix
(
na
,
myid
,
sc_desc
,
a
,
z
,
as
)
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"set up matrix"
)
#endif
! set print flag in elpa1
elpa_print_times
=
.true.
...
...
@@ -319,14 +283,6 @@ program test_real2_double_precision
deallocate
(
z
)
deallocate
(
ev
)
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"program: test_real2_double_precision"
)
print
*
,
" "
print
*
,
"Timings program: test_real2_double_precision"
call
timer
%
print
(
"program: test_real2_double_precision"
)
print
*
,
" "
print
*
,
"End timings program: test_real2_double_precision"
#endif
#ifdef WITH_MPI
call
blacs_gridexit
(
my_blacs_ctxt
)
call
mpi_finalize
(
mpierr
)
...
...
test/Fortran/elpa2/legacy_interface/legacy_real_api.F90
View file @
0df899d0
...
...
@@ -100,9 +100,6 @@ program test_real2_choose_kernel_with_api_double_precision
use
redirect
#endif
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
output_types
implicit
none
...
...
@@ -154,32 +151,6 @@ program test_real2_choose_kernel_with_api_double_precision
#define REALCASE
#include "../../elpa_print_headers.X90"
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call
timer
%
measure_flops
(
.true.
)
#endif
call
timer
%
measure_allocated_memory
(
.true.
)
call
timer
%
measure_virtual_memory
(
.true.
)