Commit 3fe6c3d1 authored by Lorenz Huedepohl's avatar Lorenz Huedepohl
Browse files

Allow the user to activate FLOPS/RAM measurements

Now, one can select which kind of measurements are taken by calling the
member functions %measure_flops, and %measure_memory of a timer_t
object. For example

  type(timer_t) :: timer

  call timer%measure_flops(.true.)
  call timer%measure_memory(.true.)

  call timer%enable()

An explicit ftimings_init() call is now no longer necessary, PAPI will
be initialized on the first %measure_flops(.true.) call.
parent 1b9bfbb2
......@@ -14,7 +14,6 @@ libftimings_@FTIMINGS_API_VERSION@_@FC@_la_SOURCES = \
ftimings/resident_set_size.c \
ftimings/ftimings_type.F90 \
ftimings/ftimings_value.F90 \
ftimings/ftimings_node.F90 \
ftimings/ftimings.F90
libftimings_@FTIMINGS_API_VERSION@_@FC@_la_LDFLAGS = -version-info $(FTIMINGS_SO_VERSION)
......@@ -29,7 +28,9 @@ pkgconfig_DATA = ftimings-$(FTIMINGS_API_VERSION)-$(FC).pc
bin_PROGRAMS = ftimings_@FC@_test
# test
ftimings_@FC@_test_SOURCES = test/test_timings.F90
ftimings_@FC@_test_SOURCES = \
test/test_timings.F90 \
test/do_flops.c
ftimings_@FC@_test_LDADD = libftimings-@FTIMINGS_API_VERSION@-@FC@.la
ftimings_@FC@_test_LDFLAGS = -static
......
This diff is collapsed.
module ftimings_node
use ftimings_type
use ftimings_value
implicit none
! Private type node_t, representing a graph node
!
type node_t
character(len=name_length) :: name ! Descriptive name, used when printing the timings
integer :: count = 0 ! Number of node_stop calls
type(value_t) :: value ! The actual counter data, see ftimings_values.F90
logical :: is_running = .false. ! .true. if still running
type(node_t), pointer :: firstChild => NULL()
type(node_t), pointer :: lastChild => NULL()
type(node_t), pointer :: parent => NULL()
type(node_t), pointer :: nextSibling => NULL()
contains
procedure, pass :: start => node_start
procedure, pass :: stop => node_stop
procedure, pass :: new_child => node_new_child
procedure, pass :: get_child => node_get_child
procedure, pass :: get_value => node_get_value
procedure, pass :: sum_of_children => node_sum_of_children
procedure, pass :: sum_of_children_with_name => node_sum_of_children_with_name
procedure, pass :: sum_of_children_below => node_sum_of_children_below
procedure, pass :: print => node_print
procedure, pass :: print_graph => node_print_graph
procedure, pass :: sort_children => node_sort_children
end type
character(len=name_length), private, parameter :: own = "(own)"
character(len=name_length), private, parameter :: below = "(below threshold)"
contains
subroutine node_start(self)
class(node_t), intent(inout) :: self
! take the time
self%value = self%value - now()
self%is_running = .true.
end subroutine
subroutine node_stop(self)
class(node_t), intent(inout) :: self
self%count = self%count + 1
! take the time
self%value = self%value + now()
self%is_running = .false.
end subroutine
function node_new_child(self, name) result(new)
class(node_t), intent(inout), target :: self
character(len=*), intent(in) :: name
type(node_t), pointer :: new
if (.not. associated(self%lastChild)) then
allocate(self%lastChild)
new => self%lastChild
self%firstChild => new
else
allocate(self%lastChild%nextSibling)
new => self%lastChild%nextSibling
self%lastChild => new
endif
new%name = name
new%count = 0
select type (self)
type is (node_t)
new%parent => self
class default
stop "node_new_child(): This should not happen"
end select
nullify(new%firstChild)
nullify(new%lastChild)
nullify(new%nextSibling)
end function
function string_eq(str1, str2) result(eq)
character(len=name_length), intent(in) :: str1
character(len=*), intent(in) :: str2
logical :: eq
eq = trim(str1) .eq. str2(1:min(len(trim(str2)), name_length))
end function
function node_get_child(self, name) result(child)
class(node_t), intent(in) :: self
character(len=*), intent(in) :: name
type(node_t), pointer :: child
child => self%firstChild
do while (associated(child))
if (string_eq(child%name, name)) then
return
endif
child => child%nextSibling
enddo
nullify(child)
end function
function node_get_value(self) result(val)
class(node_t), intent(in) :: self
type(value_t) :: val
val = self%value
if (self%is_running) then
! we have not finished, give time up to NOW
val = val + now()
endif
end function
recursive subroutine deallocate_node(entry)
type(node_t), intent(inout), pointer :: entry
type(node_t), pointer :: nextSibling
if (associated(entry%firstChild)) then
call deallocate_node(entry%firstChild)
endif
nextSibling => entry%nextSibling
deallocate(entry)
nullify(entry)
if (associated(nextSibling)) then
call deallocate_node(nextSibling)
endif
end subroutine
function node_sum_of_children(self) result(sum_time)
class(node_t), intent(in) :: self
type(node_t), pointer :: cur_entry
type(value_t) :: sum_time
cur_entry => self%firstChild
do while (associated(cur_entry))
sum_time = sum_time + cur_entry%get_value()
cur_entry => cur_entry%nextSibling
enddo
end function
recursive function node_sum_of_children_with_name(self, name) result(sum_time)
class(node_t), intent(in) :: self
character(len=*), intent(in) :: name
type(node_t), pointer :: cur_entry
type(value_t) :: sum_time
cur_entry => self%firstChild
do while (associated(cur_entry))
if (string_eq(cur_entry%name, name)) then
sum_time = sum_time + cur_entry%value
else
sum_time = sum_time + cur_entry%sum_of_children_with_name(name)
endif
cur_entry => cur_entry%nextSibling
enddo
end function
function node_sum_of_children_below(self, threshold) result(sum_time)
class(node_t), intent(in) :: self
real(kind=rk), intent(in), optional :: threshold
type(node_t), pointer :: cur_entry
type(value_t) :: sum_time, cur_value
if (.not. present(threshold)) then
return
endif
cur_entry => self%firstChild
do while (associated(cur_entry))
cur_value = cur_entry%get_value()
if (cur_value%micros * 1e-6_rk < threshold) then
sum_time = sum_time + cur_value
endif
cur_entry => cur_entry%nextSibling
enddo
end function
subroutine insert_into_sorted_list(head, node)
type(node_t), pointer, intent(inout) :: head
type(node_t), target, intent(inout) :: node
type(node_t), pointer :: cur
if (node%value%micros >= head%value%micros) then
node%nextSibling => head
head => node
return
endif
cur => head
do while (associated(cur%nextSibling))
if (cur%value%micros > node%value%micros .and. node%value%micros >= cur%nextSibling%value%micros) then
node%nextSibling => cur%nextSibling
cur%nextSibling => node
return
endif
cur => cur%nextSibling
end do
! node has to be appended at the end
cur%nextSibling => node
node%nextSibling => NULL()
end subroutine
subroutine remove_from_list(head, node)
type(node_t), pointer, intent(inout) :: head
type(node_t), pointer, intent(in) :: node
type(node_t), pointer :: cur
if (associated(head,node)) then
head => head%nextSibling
return
endif
cur => head
do while (associated(cur%nextSibling))
if (associated(cur%nextSibling,node)) then
cur%nextSibling => cur%nextSibling%nextSibling
return
endif
cur => cur%nextSibling
end do
end subroutine
subroutine node_print(self, indent_level, total, unit)
class(node_t), intent(inout) :: self
integer, intent(in) :: indent_level
type(value_t), intent(in) :: total
type(value_t) :: val
integer, intent(in) :: unit
character(len=name_length) :: name, suffix
if (self%is_running) then
name = trim(self%name) // " (running)"
else
name = self%name
endif
if (self%count > 1) then
write(suffix, '(" (",i0,"x)")') self%count
name = trim(name) // " " // trim(suffix)
endif
if (self%is_running) then
val = self%value + now()
else
val = self%value
endif
call val%print(indent_level, name, total, unit)
end subroutine
recursive subroutine node_print_graph(self, indent_level, threshold, is_sorted, total, unit)
use, intrinsic :: iso_fortran_env, only : output_unit
class(node_t), intent(inout) :: self
integer, intent(in) :: indent_level
real(kind=rk), intent(in), optional :: threshold
logical, intent(in), optional :: is_sorted
type(value_t), intent(in), optional :: total
integer, intent(in), optional :: unit
type(node_t), pointer :: node
integer :: i
type(value_t) :: cur_value, node_value, own_value, below_threshold_value, total_act
type(node_t), pointer :: own_node, threshold_node
real(kind=rk) :: threshold_act
logical :: is_sorted_act, print_own, print_threshold
integer :: unit_act
if (present(threshold)) then
threshold_act = threshold
else
threshold_act = 0
endif
if (present(is_sorted)) then
is_sorted_act = is_sorted
else
is_sorted_act = .false.
endif
cur_value = self%get_value()
if (present(total)) then
total_act = total
else
total_act = cur_value
endif
if (present(unit)) then
unit_act = unit
else
unit_act = output_unit
endif
call self%print(indent_level, total_act, unit_act)
own_value = cur_value - self%sum_of_children()
below_threshold_value = self%sum_of_children_below(threshold)
print_own = associated(self%firstChild)
print_threshold = below_threshold_value%micros > 0
! Deal with "(own)" and "(below threshold)" entries
if (is_sorted_act) then
! sort them in
if (print_own) then
! insert an "(own)" node
allocate(own_node)
own_node%value = own_value
own_node%name = own
call insert_into_sorted_list(self%firstChild, own_node)
endif
if (print_threshold) then
! insert a "(below threshold)" node
allocate(threshold_node)
threshold_node%value = below_threshold_value
threshold_node%name = below
call insert_into_sorted_list(self%firstChild, threshold_node)
endif
else
! print them first
if (print_own) then
call own_value%print(indent_level + 1, own, cur_value, unit_act)
endif
if (print_threshold) then
call below_threshold_value%print(indent_level + 1, below, cur_value, unit_act)
endif
endif
! print children
node => self%firstChild
do while (associated(node))
node_value = node%get_value()
if (node_value%micros * 1e-6_rk >= threshold_act &
.or. associated(node, threshold_node) &
.or. associated(node, own_node)) then
call node%print_graph(indent_level + 1, threshold, is_sorted, cur_value, unit_act)
endif
node => node%nextSibling
end do
if (is_sorted_act) then
! remove inserted dummy nodes again
if (print_own) then
call remove_from_list(self%firstChild, own_node)
deallocate(own_node)
endif
if (print_threshold) then
call remove_from_list(self%firstChild, threshold_node)
deallocate(threshold_node)
endif
endif
end subroutine
! In-place sort a node_t linked list and return the first and last element,
subroutine sort_nodes(head, tail)
type(node_t), pointer, intent(inout) :: head, tail
type(node_t), pointer :: p, q, e
type(value_t) :: p_val, q_val
integer :: insize, nmerges, psize, qsize, i
if (.not. associated(head)) then
nullify(tail)
return
endif
insize = 1
do while (.true.)
p => head
nullify(head)
nullify(tail)
nmerges = 0
do while(associated(p))
nmerges = nmerges + 1
q => p
psize = 0
do i = 1, insize
psize = psize + 1
q => q%nextSibling
if (.not. associated(q)) then
exit
endif
end do
qsize = insize
do while (psize > 0 .or. (qsize > 0 .and. associated(q)))
if (psize == 0) then
e => q
q => q%nextSibling
qsize = qsize - 1
else if (qsize == 0 .or. (.not. associated(q))) then
e => p;
p => p%nextSibling
psize = psize - 1
else
p_val = p%get_value()
q_val = q%get_value()
if (p_val%micros >= q_val%micros) then
e => p
p => p%nextSibling
psize = psize - 1
else
e => q
q => q%nextSibling
qsize = qsize - 1
end if
end if
if (associated(tail)) then
tail%nextSibling => e
else
head => e
endif
tail => e
end do
p => q
end do
nullify(tail%nextSibling)
if (nmerges <= 1) then
return
endif
insize = insize * 2
end do
end subroutine
recursive subroutine node_sort_children(self)
class(node_t), intent(inout) :: self
type(node_t), pointer :: node
call sort_nodes(self%firstChild, self%lastChild)
node => self%firstChild
do while (associated(node))
call node%sort_children()
node => node%nextSibling
enddo
end subroutine
end module
......@@ -7,40 +7,10 @@ module ftimings_value
implicit none
public
interface
function microseconds_since_epoch() result(us) bind(C, name="ftimings_microseconds_since_epoch")
use, intrinsic :: iso_c_binding
implicit none
integer(kind=C_INT64_T) :: us
end function
end interface
#ifdef HAVE_LIBPAPI
logical :: papi_supported = .false.
interface
function current_flop_count() result(cnt) bind(C, name="ftimings_current_flop_count")
use, intrinsic :: iso_c_binding
implicit none
integer(kind=C_LONG_LONG) :: cnt
end function
end interface
#endif
interface
function resident_set_size() result(rsssize) bind(C, name="ftimings_resident_set_size")
use, intrinsic :: iso_c_binding
implicit none
integer(kind=C_LONG) :: rsssize
end function
end interface
type value_t
integer(kind=C_INT64_T) :: micros = 0 ! Cumulative microseconds spent in this node
integer(kind=C_LONG) :: rsssize = 0
#ifdef HAVE_LIBPAPI
integer(kind=C_LONG_LONG) :: flop_count = 0 ! Cumulative floating point operations done in this node
#endif
contains
procedure, pass :: print => print_value
end type
......@@ -54,37 +24,12 @@ module ftimings_value
module procedure value_inverse
end interface
type(value_t), parameter :: null_value = value_t( &
micros = 0, rsssize = 0 &
#ifdef HAVE_LIBPAPI
,flop_count = 0 &
#endif
)
type(value_t), parameter :: null_value = value_t(micros = 0, &
rsssize = 0, &
flop_count = 0)
contains
! This is the function that actually returns the current timestamp and all other counters
function now() result(val)
use, intrinsic :: iso_c_binding
type(value_t) :: val
! current time
val%micros = microseconds_since_epoch()
! current memory
val%rsssize = resident_set_size()
#ifdef HAVE_LIBPAPI
if (papi_supported) then
! flop counter
val%flop_count = current_flop_count()
else
val%flop_count = 0
endif
#endif
end function
pure elemental function value_add(a,b) result(c)
class(value_t), intent(in) :: a, b
type(value_t) :: c
......@@ -115,32 +60,42 @@ module ftimings_value
#endif
end function
subroutine print_value(self, indent_level, label, total, unit)
subroutine print_value(self, indent_level, &
print_memory, print_flop_count, print_flop_rate, &
label, total, unit)
class(value_t), intent(in) :: self
integer, intent(in) :: indent_level
logical, intent(in) :: print_memory
logical, intent(in) :: print_flop_count, print_flop_rate
character(len=name_length), intent(in) :: label
type(value_t), intent(in) :: total
integer, intent(in) :: unit
character(len=64) :: format_spec
integer :: unit
write(format_spec,'("(",i0,"x,""|_ "",a",i0,",2x,f12.6,5x,f8.3,5x,a8)")') indent_level * 2 + 1, name_length
write(format_spec,'("(",i0,"x,""|_ "",a",i0,",2x,f12.6,1x,f12.3)")') indent_level * 2 + 1, name_length
write(unit,format_spec,advance='no') &
label, &
real(self%micros, kind=rk) * 1e-6_rk, &
real(self%micros, kind=rk) / real(total%micros, kind=rk), &
nice_format(real(self%rsssize, kind=C_DOUBLE))
real(self%micros, kind=rk) / real(total%micros, kind=rk)
#ifdef HAVE_LIBPAPI