Commit 57ad918e authored by Lorenz Huedepohl's avatar Lorenz Huedepohl
Browse files

Add wrapper library libftimings_perflib

This is intended as a drop-in replacement for programs instrumented with perflib
(http://www.rzg.mpg.de/services/computing/software/performance-tools/perf.html)

Also, the library naming scheme was slightly changed, as it is not
necessary to incorporate the full version number into the libraries
name, only the API version.
parent 4907df75
...@@ -5,10 +5,10 @@ AM_LDFLAGS = -L$(libdir) -Wl,-rpath -Wl,$(libdir) @AM_LDFLAGS@ $(OPENMP_CFLAGS) ...@@ -5,10 +5,10 @@ AM_LDFLAGS = -L$(libdir) -Wl,-rpath -Wl,$(libdir) @AM_LDFLAGS@ $(OPENMP_CFLAGS)
AM_FCFLAGS = @AM_FCFLAGS@ $(OPENMP_FCFLAGS) AM_FCFLAGS = @AM_FCFLAGS@ $(OPENMP_FCFLAGS)
# libraries # libraries
lib_LTLIBRARIES = libftimings-@FTIMINGS_API_VERSION@-@FC@.la lib_LTLIBRARIES = libftimings-@FC@.la libftimings_perflib-@FC@.la
# libftimings # libftimings
libftimings_@FTIMINGS_API_VERSION@_@FC@_la_SOURCES = \ libftimings_@FC@_la_SOURCES = \
ftimings/time.c \ ftimings/time.c \
ftimings/resident_set_size.c \ ftimings/resident_set_size.c \
ftimings/virtual_memory.c \ ftimings/virtual_memory.c \
...@@ -19,43 +19,50 @@ libftimings_@FTIMINGS_API_VERSION@_@FC@_la_SOURCES = \ ...@@ -19,43 +19,50 @@ libftimings_@FTIMINGS_API_VERSION@_@FC@_la_SOURCES = \
ftimings/ftimings_c_support.c ftimings/ftimings_c_support.c
if HAVE_LIBPAPI if HAVE_LIBPAPI
libftimings_@FTIMINGS_API_VERSION@_@FC@_la_SOURCES += \ libftimings_@FC@_la_SOURCES += \
ftimings/papi.c ftimings/papi.c
endif endif
if HAVE_PERF if HAVE_PERF
libftimings_@FTIMINGS_API_VERSION@_@FC@_la_SOURCES += \ libftimings_@FC@_la_SOURCES += \
ftimings/perf_memory_counter.c ftimings/perf_memory_counter.c
endif endif
libftimings_@FTIMINGS_API_VERSION@_@FC@_la_LDFLAGS = -version-info $(FTIMINGS_SO_VERSION) libftimings_@FC@_la_LDFLAGS = -version-info @FTIMINGS_SO_VERSION@
ftimings_includedir = $(includedir)/ftimings-$(FTIMINGS_API_VERSION)-$(FC) # perflib wrapper library
libftimings_perflib_@FC@_la_SOURCES = \
perf_wrapper/perf_wrapper.F90
libftimings_perflib_@FC@_la_LDFLAGS = -version-info @FTIMINGS_SO_VERSION@
ftimings_includedir = $(includedir)/ftimings-@API_VERSION@-$(FC)
nobase_ftimings_include_HEADERS = ftimings.mod nobase_ftimings_include_HEADERS = ftimings.mod
ftimings_include_HEADERS = ftimings/ftimings.h ftimings/ftimings_generated.h ftimings_include_HEADERS = ftimings/ftimings.h ftimings/ftimings_generated.h
ftimings_libincludedir = $(libdir)/ftimings-$(FTIMINGS_API_VERSION)-$(FC)/include ftimings_libincludedir = $(libdir)/ftimings-@API_VERSION@-$(FC)/include
pkgconfigdir = $(libdir)/pkgconfig pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = ftimings-$(FTIMINGS_API_VERSION)-$(FC).pc pkgconfig_DATA = ftimings-@API_VERSION@-$(FC).pc ftimings_perflib-@API_VERSION@-$(FC).pc
# programs # programs
bin_PROGRAMS = ftimings_@FC@_example ftimings_c_example noinst_PROGRAMS = ftimings_@FC@_example ftimings_c_example do_flops do_memory_transfer test_perf test_papi test_perflib
# test # Fortran example
ftimings_@FC@_example_SOURCES = \ ftimings_@FC@_example_SOURCES = \
examples/example.F90 \ examples/example.F90 \
examples/do_flops.c \ examples/do_flops.c \
examples/do_memory_transfer.c examples/do_memory_transfer.c
ftimings_@FC@_example_LDADD = libftimings-@FTIMINGS_API_VERSION@-@FC@.la ftimings_@FC@_example_LDADD = libftimings-@FC@.la
ftimings_@FC@_example_LDFLAGS = -static ftimings_@FC@_example_LDFLAGS = -static
# ftimings_c_example # C example
ftimings_c_example_SOURCES = \ ftimings_c_example_SOURCES = \
examples/c_example.c examples/c_example.c
ftimings_c_example_LDADD = libftimings-@FTIMINGS_API_VERSION@-@FC@.la ftimings_c_example_LDADD = libftimings-@FC@.la
ftimings_c_example_LINK = $(FCLINK) -static ftimings_c_example_LINK = $(FCLINK) -static
noinst_PROGRAMS = do_flops do_memory_transfer test_perf test_papi
# Small test programs:
do_flops_SOURCES = \ do_flops_SOURCES = \
examples/do_flops.c examples/do_flops.c
...@@ -73,6 +80,10 @@ test_papi_SOURCES = \ ...@@ -73,6 +80,10 @@ test_papi_SOURCES = \
ftimings/papi.c ftimings/papi.c
test_papi_CPPFLAGS = -DTEST_PAPI test_papi_CPPFLAGS = -DTEST_PAPI
test_perflib_SOURCES = \
examples/perflib.F90
test_perflib_LDADD = libftimings-@FC@.la libftimings_perflib-@FC@.la
# other files to distribute # other files to distribute
filesdir = $(datadir)/@PACKAGE@-@FC@/examples filesdir = $(datadir)/@PACKAGE@-@FC@/examples
files_DATA = examples/example.F90 files_DATA = examples/example.F90
...@@ -84,3 +95,8 @@ clean-local: ...@@ -84,3 +95,8 @@ clean-local:
distclean-local: distclean-local:
rm config-f90.h rm config-f90.h
# till autotools figure out a way to version .la files we simply remove
# them again after installation
install-exec-hook:
rm -f $(DESTDIR)$(libdir)/*.la
AC_INIT([ftimings], [0.1]) AC_INIT([ftimings], [0.2])
AC_PREREQ([2.59]) AC_PREREQ([2.59])
AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_MACRO_DIR([m4])
API_VERSION=1
AC_CONFIG_HEADERS([config.h]) AC_CONFIG_HEADERS([config.h])
AM_INIT_AUTOMAKE([foreign -Wall -Werror]) AM_INIT_AUTOMAKE([foreign -Wall -Werror])
...@@ -36,7 +38,7 @@ if test "$want_perf" = "yes" ; then ...@@ -36,7 +38,7 @@ if test "$want_perf" = "yes" ; then
fi fi
AM_CONDITIONAL([HAVE_PERF],[test "$want_perf" = "yes"]) AM_CONDITIONAL([HAVE_PERF],[test "$want_perf" = "yes"])
AC_MSG_CHECKING([for AVX support (important for example programs only)]) AC_MSG_CHECKING([whether we can compile AVX opcodes (important for example programs only)])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([ AC_COMPILE_IFELSE([AC_LANG_SOURCE([
int main(int argc, char **argv) { int main(int argc, char **argv) {
__asm__ __volatile__( __asm__ __volatile__(
...@@ -74,8 +76,8 @@ DX_MAN_FEATURE(ON) ...@@ -74,8 +76,8 @@ DX_MAN_FEATURE(ON)
DX_HTML_FEATURE(ON) DX_HTML_FEATURE(ON)
DX_INIT_DOXYGEN([ftimings], [Doxyfile], [docs]) DX_INIT_DOXYGEN([ftimings], [Doxyfile], [docs])
AC_SUBST([FTIMINGS_SO_VERSION], [1:0:0]) AC_SUBST([API_VERSION])
AC_SUBST([FTIMINGS_API_VERSION], [0.1]) AC_SUBST([FTIMINGS_SO_VERSION], [$API_VERSION:0:0])
AC_SUBST([AM_CFLAGS]) AC_SUBST([AM_CFLAGS])
AC_SUBST([AM_FCFLAGS]) AC_SUBST([AM_FCFLAGS])
AC_SUBST([AM_LDFLAGS]) AC_SUBST([AM_LDFLAGS])
...@@ -83,7 +85,8 @@ AC_SUBST([DOXYGEN_OUTPUT_DIR], [docs]) ...@@ -83,7 +85,8 @@ AC_SUBST([DOXYGEN_OUTPUT_DIR], [docs])
AC_CONFIG_FILES([Makefile AC_CONFIG_FILES([Makefile
Doxyfile Doxyfile
ftimings-${FTIMINGS_API_VERSION}-${FC}.pc:ftimings.pc.in ftimings-${API_VERSION}-${FC}.pc:ftimings.pc.in
ftimings_perflib-${API_VERSION}-${FC}.pc:ftimings_perflib.pc.in
]) ])
AC_OUTPUT AC_OUTPUT
......
PROGRAM tperf_context
!$ use omp_lib, only : omp_get_thread_num
IMPLICIT NONE
INTEGER, PARAMETER :: N_ITERATIONS=1000
INTEGER :: ii
REAL(8) :: pi(N_ITERATIONS), uu
INTEGER, PARAMETER:: SZ=N_ITERATIONS
REAL(8) :: a(SZ), b(SZ)
REAL(8) :: inc_time,inc_MFlops
!$omp parallel
!$ print *, 'This message must appear for each thread:', omp_get_thread_num()
!$omp end parallel
Call perfinit
Call perfon ('tperf')
call perf_context_start("first context");
Call perfon ('calc')
!$omp parallel do
DO ii=1, N_ITERATIONS
call calc (pi)
Enddo
Call perfoff
DO ii=1, N_ITERATIONS
call calc(pi)
END DO
CALL perf_context_end()
Call perfon ('random')
Call random_number(a)
Call random_number(b)
Call perfoff
CALL perf_context_start("context 2");
Call perfon ('calc')
Do ii=1, N_ITERATIONS
uu = calc_nochmal(a, b)
Enddo
Call perfoff
CALL perf_context_end();
! print *, 'Call perfout'
Call perfoff
inc_time = 0.0_8
inc_MFlops = 0.0_8
CALL perf_get('tperf',inc_time,inc_MFlops)
WRITE(*,"(A,2F8.3)") "tperf ",inc_time,inc_MFlops
!CALL perf_reset('calc')
Call perfout ('tperf')
! print *, 'Return perfout'
CONTAINS
FUNCTION calc_nochmal (a, b)
REAL(8) :: calc_nochmal
REAL(8) :: a(:), b(:)
calc_nochmal = SUM (a * b)
END FUNCTION calc_nochmal
SUBROUTINE calc (p_pi)
INTEGER:: ii
REAL(8):: p_pi(:)
DO ii=1, SIZE(p_pi)
p_pi(ii) = SIN(REAL(ii))*SQRT(REAL(ii))
ENDDO
END SUBROUTINE calc
END PROGRAM tperf_context
...@@ -3,9 +3,9 @@ exec_prefix=@exec_prefix@ ...@@ -3,9 +3,9 @@ exec_prefix=@exec_prefix@
libdir=@libdir@ libdir=@libdir@
includedir=@includedir@ includedir=@includedir@
Name: @PACKAGE_NAME@ Name: @PACKAGE_NAME@-@API_VERSION@-@FC@
Description: FTIMINGS Description: ftimings, API version @API_VERSION@ for Fortran compiler @FC@
Version: @PACKAGE_VERSION@ Version: @PACKAGE_VERSION@
URL: @PACKAGE_URL@ URL: @PACKAGE_URL@
Libs: -L${libdir} -lftimings-@FTIMINGS_API_VERSION@-@FC@ Libs: -L${libdir} -lftimings-@API_VERSION@-@FC@
fcflags= -I${includedir}/ftimings-@FTIMINGS_API_VERSION@-@FC@ fcflags= -I${includedir}/ftimings-@API_VERSION@-@FC@
...@@ -35,14 +35,6 @@ module ftimings ...@@ -35,14 +35,6 @@ module ftimings
private private
! do not clutter public namespace with value_t
private :: &
value_t, &
null_value, &
value_add, &
value_minus, &
value_inverse
! this is mainly needed for Doxygen, they are ! this is mainly needed for Doxygen, they are
! by implicitly reachable as type-bound procedures ! by implicitly reachable as type-bound procedures
! of timer_t, however Doxygen does not document them ! of timer_t, however Doxygen does not document them
...@@ -53,6 +45,7 @@ module ftimings ...@@ -53,6 +45,7 @@ module ftimings
timer_enable, timer_disable, timer_is_enabled, & timer_enable, timer_disable, timer_is_enabled, &
timer_in_entries, timer_in_entries_node, & timer_in_entries, timer_in_entries_node, &
timer_get, timer_get_node, & timer_get, timer_get_node, &
timer_get_value, timer_get_value_node, &
timer_since, timer_since_node, & timer_since, timer_since_node, &
timer_sort, & timer_sort, &
timer_set_print_options, & timer_set_print_options, &
...@@ -161,11 +154,14 @@ module ftimings ...@@ -161,11 +154,14 @@ module ftimings
procedure, pass :: in_entries_node => timer_in_entries_node procedure, pass :: in_entries_node => timer_in_entries_node
procedure, pass :: get => timer_get procedure, pass :: get => timer_get
procedure, pass :: get_node => timer_get_node procedure, pass :: get_node => timer_get_node
procedure, pass :: get_value => timer_get_value
procedure, pass :: get_value_node => timer_get_value_node
procedure, pass :: since => timer_since procedure, pass :: since => timer_since
procedure, pass :: since_node => timer_since_node procedure, pass :: since_node => timer_since_node
procedure, pass :: sort => timer_sort procedure, pass :: sort => timer_sort
procedure, pass :: register_error_handler => timer_register_error_handler procedure, pass :: register_error_handler => timer_register_error_handler
procedure, private, pass :: error => timer_error procedure, private, pass :: error => timer_error
procedure, private, pass :: resolve_node => timer_resolve_node
end type end type
!> Opaque type node_t, representing a graph node !> Opaque type node_t, representing a graph node
...@@ -773,30 +769,14 @@ module ftimings ...@@ -773,30 +769,14 @@ module ftimings
endif endif
end subroutine end subroutine
! private helper function
!> Print a timing graph function timer_resolve_node(self, name1, name2, name3, name4, name5, name6) result(node)
!>
!> \param name1 If given, first descend one level to the node with name name1
!> \param name2 If given, also descend another level to the node with name2 there
!> \param name3 etc.
!> \param name4 etc.
!> \param name5 etc.
!> \param name6 etc.
!> \param threshold If given, subsume any entries with a value of threshold
!> seconds in a single node "(below threshold)"
!> \param unit The unit number on which to print, default stdout
!>
subroutine timer_print(self, name1, name2, name3, name4, name5, name6, threshold, unit)
class(timer_t), intent(inout), target :: self class(timer_t), intent(inout), target :: self
! this is clunky, but what can you do..
character(len=*), intent(in), optional :: name1, name2, name3, name4, name5, name6 character(len=*), intent(in), optional :: name1, name2, name3, name4, name5, name6
real(kind=rk), intent(in), optional :: threshold
integer, intent(in), optional :: unit
character(len=128) :: errormessage
type(node_t), pointer :: node type(node_t), pointer :: node
if (.not. self%active) then character(len=128) :: errormessage
return
endif
node => self%root node => self%root
if (present(name1)) then if (present(name1)) then
...@@ -848,8 +828,34 @@ module ftimings ...@@ -848,8 +828,34 @@ module ftimings
endif endif
end if end if
call timer_print_node(self, node, threshold, unit) end function
!> Print a timing graph
!>
!> \param name1 If given, first descend one level to the node with name name1
!> \param name2 If given, also descend another level to the node with name2 there
!> \param name3 etc.
!> \param name4 etc.
!> \param name5 etc.
!> \param name6 etc.
!> \param threshold If given, subsume any entries with a value of threshold
!> seconds in a single node "(below threshold)"
!> \param unit The unit number on which to print, default stdout
!>
subroutine timer_print(self, name1, name2, name3, name4, name5, name6, threshold, unit)
class(timer_t), intent(inout), target :: self
character(len=*), intent(in), optional :: name1, name2, name3, name4, name5, name6
real(kind=rk), intent(in), optional :: threshold
integer, intent(in), optional :: unit
type(node_t), pointer :: node
if (.not. self%active) then
return
endif
node => self%resolve_node(name1, name2, name3, name4, name5, name6)
call timer_print_node(self, node, threshold, unit)
end subroutine end subroutine
...@@ -982,14 +988,13 @@ module ftimings ...@@ -982,14 +988,13 @@ module ftimings
!> For example, timer\%in_entries("foo", "bar", "parallel") returns !> For example, timer\%in_entries("foo", "bar", "parallel") returns
!> the sum of all entries named "parallel" below the foo->bar node !> the sum of all entries named "parallel" below the foo->bar node
!> !>
function timer_in_entries(self, name1, name2, name3, name4, name5, name6) result(s) function timer_in_entries(self, name1, name2, name3, name4, name5, name6, name7) result(s)
class(timer_t), intent(inout), target :: self class(timer_t), intent(inout), target :: self
character(len=*), intent(in) :: name1 character(len=*), intent(in) :: name1
character(len=*), intent(in), optional :: name2, name3, name4, name5, name6 character(len=*), intent(in), optional :: name2, name3, name4, name5, name6, name7
real(kind=rk) :: s real(kind=rk) :: s
type(node_t), pointer :: node ! the starting node type(node_t), pointer :: node ! the starting node
character(len=name_length) :: name ! the name of the sections character(len=name_length) :: name ! the name of the sections
character(len=128) :: errormessage
s = 0._rk s = 0._rk
...@@ -997,54 +1002,28 @@ module ftimings ...@@ -997,54 +1002,28 @@ module ftimings
return return
endif endif
node => self%root if (present(name7)) then
name = name1 node => self%resolve_node(name1, name2, name3, name4, name5, name6)
name = name7
if (present(name2)) then else if (present(name6)) then
node => node%get_child(name1) node => self%resolve_node(name1, name2, name3, name4, name5)
if (.not. associated(node)) then
write(errormessage,'(a)') "Could not descend to """ // trim(name1) // """"
call self%error(errormessage)
return
endif
name = name2
end if
if (present(name3)) then
node => node%get_child(name2)
if (.not. associated(node)) then
write(errormessage,'(a)') "Could not descend to """ // trim(name2) // """"
call self%error(errormessage)
return
endif
name = name3
end if
if (present(name4)) then
node => node%get_child(name3)
if (.not. associated(node)) then
write(errormessage,'(a)') "Could not descend to """ // trim(name3) // """"
call self%error(errormessage)
return
endif
name = name4
end if
if (present(name5)) then
node => node%get_child(name4)
if (.not. associated(node)) then
write(errormessage,'(a)') "Could not descend to """ // trim(name4) // """"
call self%error(errormessage)
return
endif
name = name5
end if
if (present(name6)) then
node => node%get_child(name5)
if (.not. associated(node)) then
write(errormessage,'(a)') "Could not descend to """ // trim(name5) // """"
call self%error(errormessage)
return
endif
name = name6 name = name6
end if else if (present(name5)) then
node => self%resolve_node(name1, name2, name3, name4)
name = name5
else if (present(name4)) then
node => self%resolve_node(name1, name2, name3)
name = name4
else if (present(name3)) then
node => self%resolve_node(name1, name2)
name = name3
else if (present(name2)) then
node => self%resolve_node(name1)
name = name2
else
node => self%resolve_node()
name = name1
endif
s = self%in_entries_node(node, name) s = self%in_entries_node(node, name)
end function end function
...@@ -1077,9 +1056,7 @@ module ftimings ...@@ -1077,9 +1056,7 @@ module ftimings
!> !>
function timer_get(self, name1, name2, name3, name4, name5, name6) result(s) function timer_get(self, name1, name2, name3, name4, name5, name6) result(s)
class(timer_t), intent(inout), target :: self class(timer_t), intent(inout), target :: self
! this is clunky, but what can you do..
character(len=*), intent(in), optional :: name1, name2, name3, name4, name5, name6 character(len=*), intent(in), optional :: name1, name2, name3, name4, name5, name6
character(len=128) :: errormessage
real(kind=rk) :: s real(kind=rk) :: s
type(node_t), pointer :: node type(node_t), pointer :: node
...@@ -1089,69 +1066,17 @@ module ftimings ...@@ -1089,69 +1066,17 @@ module ftimings
return return
endif endif
node => self%root node => self%resolve_node(name1, name2, name3, name4, name5, name6)
if (present(name1)) then
node => node%get_child(name1)
if (.not. associated(node)) then
write(errormessage,'(a)') "Could not descend to """ // trim(name1) // """"
call self%error(trim(errormessage))
return
endif
end if
if (present(name2)) then
node => node%get_child(name2)
if (.not. associated(node)) then
write(errormessage,'(a)') "Could not descend to """ // trim(name2) // """"
call self%error(trim(errormessage))
return
endif
end if
if (present(name3)) then
node => node%get_child(name3)
if (.not. associated(node)) then
write(errormessage,'(a)') "Could not descend to """ // trim(name3) // """"
call self%error(trim(errormessage))
return
endif
end if
if (present(name4)) then
node => node%get_child(name4)
if (.not. associated(node)) then
write(errormessage,'(a)') "Could not descend to """ // trim(name4) // """"
call self%error(trim(errormessage))
return
endif
end if
if (present(name5)) then
node => node%get_child(name5)
if (.not. associated(node)) then
write(errormessage,'(a)') "Could not descend to """ // trim(name5) // """"
call self%error(trim(errormessage))
return
endif
end if
if (present(name6)) then
node => node%get_child(name6)
if (.not. associated(node)) then
write(errormessage,'(a)') "Could not descend to """ // trim(name6) // """"
call self%error(trim(errormessage))
return
endif
end if
if (node%is_running) then
write(errormessage,'(a)') "Timer """ // trim(node%name) // """ not yet stopped"
call self%error(trim(errormessage))
return
endif
s = self%get_node(node) s = self%get_node(node)
end function end function
!> _node version of timer_get !> _node version of timer_get
pure function timer_get_node(self, node) result(s) function timer_get_node(self, node) result(s)
class(timer_t), intent(in) :: self