-
Lorenz Huedepohl authored
The type timer_t got two new kind of methods to query information about the resource usage if its children. The method %get_in_children queries the seconds spent in all children of the queried node, and %get_without_children conversely returns the number of seconds spent excluding all children. Additionally, there also version where the node in question is passed directly instead of by a string path (*_node) and versions where the result is a type(value_t) with all measurements (get_value_*).
Lorenz Huedepohl authoredThe type timer_t got two new kind of methods to query information about the resource usage if its children. The method %get_in_children queries the seconds spent in all children of the queried node, and %get_without_children conversely returns the number of seconds spent excluding all children. Additionally, there also version where the node in question is passed directly instead of by a string path (*_node) and versions where the result is a type(value_t) with all measurements (get_value_*).
example.F90 4.65 KiB
! Copyright 2014 Lorenz Hüdepohl
!
! This file is part of ftimings.
!
! ftimings is free software: you can redistribute it and/or modify
! it under the terms of the GNU Lesser General Public License as published by
! the Free Software Foundation, either version 3 of the License, or
! (at your option) any later version.
!
! ftimings is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ftimings. If not, see <http://www.gnu.org/licenses/>.
#include "config-f90.h"
program test_timings
use ftimings
use iso_c_binding, only : C_DOUBLE
implicit none
type(timer_t) :: timer
integer :: i, j
call timer%measure_flops(.true.)
call timer%measure_allocated_memory(.true.)
call timer%measure_virtual_memory(.true.)
call timer%measure_max_allocated_memory(.true.)
call timer%measure_memory_bandwidth(.true.)
call timer%set_print_options(&
! print_flop_count=.true., &
print_flop_rate=.true., &
! print_max_allocated_memory=.true., &
! print_memory_transferred=.true., &
print_memory_bandwidth=.true., &
print_ai=.true. &
)
call timer%enable()
call timer%start("main-loop")
do i = 1,10
! Test a bit more complex enable/disable decisions
if (i < 5 .or. mod(i,4) == 0) then
call timer%enable()
else
call timer%disable()
endif
write(*,*) "Iteration", i
call timer%start("cycle") ! replace=.true.)
!$ call timer%start("parallel")
!$omp parallel do
do j = 1,5
call a()
call b()
end do
!$omp end parallel do
!$ call timer%stop("parallel")
call timer%stop("cycle")
if (timer%is_enabled()) then
if (i == 15) then
! test run-in-progress report when printing
! entries that are not yet done
call timer%print("main-loop")
else
! usual printing of current subtree
call timer%print("main-loop", "cycle")
endif
write(*,*)
write(*,'(a,f12.6)') " cycle total : ", timer%get("main-loop", "cycle")
write(*,'(a,f12.6)') " cycle '(own)': ", timer%get_without_children("main-loop", "cycle")
write(*,'(a,f12.6)') "cycle children: ", timer%get_in_children("main-loop", "cycle")
write(*,'(a,f12.6)') " in c entries: ", timer%in_entries("c")
write(*,'(a,f8.2,a)') " c part: ", timer%in_entries("c") / timer%get("main-loop", "cycle") * 100, "%"
write(*,'(a,f12.6)') " in b entries: ", timer%in_entries("b")
write(*,'(a,f8.2,a)') " b part: ", timer%in_entries("b") / timer%get("main-loop", "cycle") * 100, "%"
#ifndef _OPENMP
write(*,'(a,f12.6)') " cycle -> a -> b -> c : ", timer%get("main-loop", "cycle", "a", "b", "c")
#else
write(*,'(a,f12.6)') " cycle -> a -> b -> c : ", timer%get("main-loop", "cycle", "parallel", "a", "b", "c")
#endif
write(*,*)
endif
end do
call timer%enable()
call timer%stop("main-loop")
write(*,*)
write(*,*) "Total main-loop:"
call timer%print("main-loop")
write(*,*)
write(*,*) "Sorted:"
call timer%sort()
call timer%print("main-loop")
write(*,*)
write(*,*) "Ignoring entries <0.02s:"
call timer%print("main-loop", threshold=0.02_C_DOUBLE)
write(*,*)
write(*,*) "Whole tree:"
call timer%print()
call timer%free()
contains
subroutine a()
call timer%start("a")
call b
call c
call timer%stop("a")
end subroutine
subroutine b()
integer :: i
call timer%start("b")
call timer%start("empty")
call timer%stop("empty")
!$omp parallel do
do i = 1, 4
call c
end do
!$omp end parallel do
call timer%stop("b")
end subroutine
subroutine c()
interface
subroutine vector_triad() bind(C, name="vector_triad")
end subroutine
end interface
interface
subroutine peak_perf() bind(C, name="peak_perf")
end subroutine
end interface
interface
subroutine fill_100_mebi() bind(C, name="fill_100_mebi")
end subroutine
end interface
call timer%start("c")
call timer%start("10.0 Mflop, AI=0.0625")
call vector_triad()
call timer%stop()
#ifdef HAVE_AVX
call timer%start("10.0 Mflop within registers (AVX)")
#else
call timer%start("10.0 Mflop within registers (SSE)")
#endif
call peak_perf()
call timer%stop()
call timer%start("Fill 100 MiB")
call fill_100_mebi()
call timer%stop()
call timer%stop()
end subroutine
end program