! Copyright 2014 Lorenz Hüdepohl ! ! This file is part of ftimings. ! ! ftimings is free software: you can redistribute it and/or modify ! it under the terms of the GNU Lesser General Public License as published by ! the Free Software Foundation, either version 3 of the License, or ! (at your option) any later version. ! ! ftimings is distributed in the hope that it will be useful, ! but WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! GNU Lesser General Public License for more details. ! ! You should have received a copy of the GNU Lesser General Public License ! along with ftimings. If not, see . #ifdef HAVE_CONFIG_H #include "config-f90.h" #endif !> \mainpage Ftimings !> !> An almost pure-fortran attempt to play with tree structures, which evolved !> into the timing library used e.g. by the VERTEX supernova code. !> !> All you need to know is contained in the \ref ftimings::timer_t derived type. module ftimings use ftimings_type use ftimings_value use, intrinsic :: iso_c_binding, only : c_size_t, c_ptr, c_char, & c_associated, c_loc, c_f_pointer, c_funptr, c_f_procpointer, c_null_ptr, c_null_funptr, c_null_char implicit none save private ! do not clutter public namespace with value_t private :: & value_t, & null_value, & value_add, & value_minus, & value_inverse ! this is mainly needed for Doxygen, they are ! by implicitly reachable as type-bound procedures ! of timer_t, however Doxygen does not document them ! if they are not also public public timer_start, timer_stop, & timer_free, & timer_print, timer_print_node, & timer_enable, timer_disable, timer_is_enabled, & timer_in_entries, timer_in_entries_node, & timer_get, timer_get_node, & timer_since, timer_since_node, & timer_sort, & timer_set_print_options, & timer_measure_flops, & timer_measure_allocated_memory, & timer_measure_virtual_memory, & timer_measure_max_allocated_memory, & timer_measure_memory_bandwidth, & timer_register_error_handler, & node_get_child character(len=name_length), private, parameter :: own = "(own)" character(len=name_length), private, parameter :: below = "(below threshold)" !> An opaque handle that is provided for custom error callbacks type, abstract, public :: timer_error_handle_t contains procedure(timer_error_handle_free), pass, public, deferred :: free end type ! Used to provide C-API error handlers type, extends(timer_error_handle_t), private :: timer_c_error_handle_t type(c_funptr) :: c_handler = c_null_funptr type(c_ptr) :: c_handle = c_null_ptr contains procedure, pass, public :: free => c_error_handle_free end type !> Type for a timer instance. !> !> Typical usage: !> \code{.f90} !> type(timer_t) :: timer !> !> call timer%enable() !> !> call timer%start("section") !> ... !> call timer%start("subsection") !> ... !> call timer%stop("subsection") !> ... !> call timer%stop("section") !> !> call timer%print() !> \endcode !> !> Every first call to timer%start() at a certain point in the graph !> allocates a small amount of memory. If the timer is no longer needed, !> all that memory can be freed again with !> !> \code{.f90} !> call timer%free() !> \endcode !> !c> !c> /* Define timer_t as an opaque type */ !c> struct ftimer_struct; !c> typedef struct ftimer_struct ftimer_t; !c> type, public :: timer_t logical, private :: active = .false. !< If set to .false., most operations return immediately without any action logical, private :: record_allocated_memory = .false. !< IF set to .true., record also the current resident set size logical, private :: record_virtual_memory = .false. !< IF set to .true., record also the virtual memory logical, private :: record_max_allocated_memory = .false. !< IF set to .true., record also the max resident set size ("high water mark") logical, private :: record_flop_counts = .false. !< If set to .true., record also FLOP counts via PAPI calls logical, private :: record_memory_bandwidth = .false. !< If set to .true., record also memory bandwidth via PAPI calls logical, private :: print_allocated_memory = .false. logical, private :: print_max_allocated_memory = .false. logical, private :: print_virtual_memory = .false. logical, private :: print_flop_count = .false. logical, private :: print_flop_rate = .false. logical, private :: print_memory_transferred = .false. logical, private :: print_memory_bandwidth = .false. logical, private :: print_ai = .false. logical, private :: is_sorted = .false. type(node_t), private, pointer :: root => NULL() !< Start of graph type(node_t), private, pointer :: current_node => NULL() !< Current position in the graph procedure(timer_error_handler_t), private, pass, pointer:: error_handler => default_error_handler class(timer_error_handle_t), private, pointer :: error_handle => NULL() contains procedure, pass :: start => timer_start procedure, pass :: stop => timer_stop procedure, pass :: free => timer_free procedure, pass :: print => timer_print procedure, pass :: print_node => timer_print_node procedure, pass :: enable => timer_enable procedure, pass :: disable => timer_disable procedure, pass :: is_enabled => timer_is_enabled procedure, pass :: measure_flops => timer_measure_flops procedure, pass :: measure_allocated_memory => timer_measure_allocated_memory procedure, pass :: measure_virtual_memory => timer_measure_virtual_memory procedure, pass :: measure_max_allocated_memory => timer_measure_max_allocated_memory procedure, pass :: measure_memory_bandwidth => timer_measure_memory_bandwidth procedure, pass :: set_print_options => timer_set_print_options procedure, pass :: in_entries => timer_in_entries procedure, pass :: in_entries_node => timer_in_entries_node procedure, pass :: get => timer_get procedure, pass :: get_node => timer_get_node procedure, pass :: since => timer_since procedure, pass :: since_node => timer_since_node procedure, pass :: sort => timer_sort procedure, pass :: register_error_handler => timer_register_error_handler procedure, private, pass :: error => timer_error end type !> Opaque type node_t, representing a graph node !> !c> struct ftimer_node_struct; !c> typedef struct ftimer_node_struct ftimer_node_t; type, public :: node_t character(len=name_length), private :: name = "" ! Descriptive name, used when printing the timings integer, private :: count = 0 ! Number of node_stop calls type(value_t), private :: value ! The actual counter data, see ftimings_values.F90 logical, private :: is_running = .false. ! .true. if still running type(node_t), private, pointer :: firstChild => NULL() type(node_t), private, pointer :: lastChild => NULL() type(node_t), private, pointer :: parent => NULL() type(node_t), private, pointer :: nextSibling => NULL() class(timer_t), private, pointer :: timer => NULL() contains procedure, private, pass :: now => node_now procedure, private, pass :: start => node_start procedure, private, pass :: stop => node_stop procedure, private, pass :: get_value => node_get_value procedure, private, pass :: new_child => node_new_child procedure, public, pass :: get_child => node_get_child procedure, private, pass :: sum_of_children => node_sum_of_children procedure, private, pass :: sum_of_descendants_with_name => node_sum_of_descendants_with_name procedure, private, pass :: sum_of_children_below => node_sum_of_children_below procedure, private, pass :: print => node_print procedure, private, pass :: print_graph => node_print_graph procedure, private, pass :: sort_children => node_sort_children end type !> Interface for error callback routine abstract interface subroutine timer_error_handler_t(timer, handle, message) import timer_t, timer_error_handle_t class(timer_t), intent(inout), target :: timer class(timer_error_handle_t), intent(in) :: handle character(len=*), intent(in) :: message end subroutine end interface abstract interface subroutine timer_error_handle_free(handle, timer) import timer_t, timer_error_handle_t class(timer_error_handle_t), intent(inout) :: handle class(timer_t), intent(inout) :: timer end subroutine end interface interface pure function microseconds_since_epoch() result(us) bind(C, name="ftimings_microseconds_since_epoch") use, intrinsic :: iso_c_binding implicit none integer(kind=c_int64_t) :: us end function end interface #ifdef HAVE_LIBPAPI interface function flop_init() result(ret) bind(C, name="ftimings_flop_init") use, intrinsic :: iso_c_binding implicit none integer(kind=c_int) :: ret end function end interface interface pure subroutine flop_counter(flop) bind(C, name="ftimings_flop_counter") use, intrinsic :: iso_c_binding implicit none integer(kind=c_long_long), intent(out) :: flop end subroutine end interface #endif #ifdef HAVE_PERF interface function perf_memory_counters_init() result(ret) bind(C, name="ftimings_perf_memory_counters_init") use, intrinsic :: iso_c_binding implicit none integer(kind=c_int) :: ret end function end interface interface pure subroutine perf_memory_counters(mem_reads, mem_writes) bind(C, name="ftimings_perf_memory_counters") use, intrinsic :: iso_c_binding implicit none integer(kind=c_int64_t), intent(out) :: mem_reads, mem_writes end subroutine end interface #endif interface pure function resident_set_size() result(rsssize) bind(C, name="ftimings_resident_set_size") use, intrinsic :: iso_c_binding implicit none integer(kind=c_long) :: rsssize end function end interface interface pure function virtual_memory() result(virtualmem) bind(C, name="ftimings_virtual_memory") use, intrinsic :: iso_c_binding implicit none integer(kind=c_long) :: virtualmem end function end interface interface pure function max_resident_set_size() result(maxrsssize) bind(C, name="ftimings_highwater_mark") use, intrinsic :: iso_c_binding implicit none integer(kind=c_long) :: maxrsssize end function end interface interface strnlen pure function strnlen(ptr, maxlen) result(size) bind(c, name="strnlen") use, intrinsic :: iso_c_binding type(c_ptr), intent(in), value :: ptr integer(kind=c_size_t), intent(in), value :: maxlen integer(kind=c_size_t) :: size end function end interface interface strlen pure function strlen(ptr) result(size) bind(c, name="strlen") use, intrinsic :: iso_c_binding type(c_ptr), intent(in), value :: ptr integer(kind=c_size_t) :: size end function end interface contains !> Activate the timer, without this, most methods are non-ops. !> subroutine timer_enable(self) class(timer_t), intent(inout), target :: self self%active = .true. end subroutine !> Register an error handler callback subroutine timer_register_error_handler(self, error_handler, handle) class(timer_t), intent(inout) :: self procedure(timer_error_handler_t), pointer, intent(in) :: error_handler class(timer_error_handle_t), pointer, intent(in) :: handle self%error_handler => error_handler self%error_handle => handle end subroutine !> Default error handler, print the message and deactivate and free the timer subroutine default_error_handler(timer, handle, message) use, intrinsic :: iso_fortran_env, only : error_unit class(timer_t), intent(inout), target :: timer class(timer_error_handle_t), intent(in), pointer :: handle character(len=*), intent(in) :: message write(error_unit,'(a,a)') "ftimings encountered an error, disabling this timer: ", message if (associated(handle)) then write(error_unit, '(a,a)') " additionally: An error handle was provided without an error handler" endif stop 1 call timer%disable() call timer%free() end subroutine subroutine timer_error(self, message) class(timer_t), intent(inout) :: self character(len=*), intent(in) :: message call self%error_handler(self%error_handle, message) end subroutine !> Call with enabled = .true. to also record amount of newly allocated memory. !> By default, memory usage is not recored. Call with .false. to deactivate again. !> !> This opens /proc/self/statm, parses it, and closes it again and is thus !> quite costly, use when appropriate. !> subroutine timer_measure_allocated_memory(self, enabled) class(timer_t), intent(inout) :: self logical, intent(in) :: enabled self%record_allocated_memory = enabled end subroutine !> Call with enabled = .true. to also record amount of newly created virtual memory. !> By default, memory usage is not recored. Call with .false. to deactivate again. !> !> This opens /proc/self/statm, parses it, and closes it again and is thus !> quite costly, use when appropriate. !> subroutine timer_measure_virtual_memory(self, enabled) class(timer_t), intent(inout) :: self logical, intent(in) :: enabled self%record_virtual_memory = enabled end subroutine !> Call with enabled = .true. to also record amount of newly increase of max. !> resident memory !> By default, memory usage is not recored. Call with .false. to deactivate again. !> !> This opens /proc/self/status, parses it, and closes it again and is thus !> quite costly, use when appropriate. !> subroutine timer_measure_max_allocated_memory(self, enabled) class(timer_t), intent(inout) :: self logical, intent(in) :: enabled self%record_max_allocated_memory = enabled end subroutine !> Call with enabled = .true. to also record the memory bandwidth with PAPI !> By default, this is not recorded. Call with .false. to deactivate again. !> subroutine timer_measure_memory_bandwidth(self, enabled) class(timer_t), intent(inout) :: self logical, intent(in) :: enabled if (enabled) then #ifdef HAVE_PERF if (perf_memory_counters_init() == 1) then self%record_memory_bandwidth = .true. else write(0,'(a)') "ftimings: Could not initialize Linux perf, disabling memory counters" self%record_memory_bandwidth = .false. endif #else write(0,'(a)') "ftimings: not compiled with Linux perf support, disabling memory counters" self%record_memory_bandwidth = .false. #endif else ! explicitly set to .false. by caller self%record_memory_bandwidth = .false. endif end subroutine !> Call with enabled = .true. to also record FLOP counts via PAPI calls. !> By default no FLOPS are recored. Call with .false. to deactivate again. !> subroutine timer_measure_flops(self, enabled) class(timer_t), intent(inout) :: self logical, intent(in) :: enabled if (enabled) then #ifdef HAVE_LIBPAPI if (flop_init() == 1) then self%record_flop_counts = .true. else write(0,'(a)') "ftimings: Could not initialize PAPI, disabling FLOP counter" self%record_flop_counts = .false. endif #else write(0,'(a)') "ftimings: not compiled with PAPI support, disabling FLOP counter" self%record_flop_counts = .false. #endif else ! Explicitly set to .false. by caller self%record_flop_counts = .false. endif end subroutine !> Deactivate the timer !> subroutine timer_disable(self) class(timer_t), intent(inout), target :: self self%active = .false. end subroutine !> Return whether the timer is currently running !> function timer_is_enabled(self) result(is) class(timer_t), intent(inout), target :: self logical :: is is = self%active end function !> Control what to print on following %print calls !> !> \param print_allocated_memory Amount of newly allocated, !> resident memory !> \param print_virtual_memory Amount of newly created virtual !> memory !> \param print_max_allocated_memory Amount of new increase of max. !> resident memory ("high water mark") !> \param print_flop_count Number of floating point operations !> \param print_flop_rate Rate of floating point operations per second !> \param print_memory_transferred Memory transferred from RAM to CPU !> \param print_memory_bandwidth Memory bandwidth from RAM to CPU !> \param print_ai Arithmetic intensity, that is number of !> floating point operations per !> number of bytes transferred !> operations (currently untested) subroutine timer_set_print_options(self, & print_allocated_memory, & print_virtual_memory, & print_max_allocated_memory, & print_flop_count, & print_flop_rate, & print_memory_transferred, & print_memory_bandwidth, & print_ai) class(timer_t), intent(inout) :: self logical, intent(in), optional :: & print_allocated_memory, & print_virtual_memory, & print_max_allocated_memory, & print_flop_count, & print_flop_rate, & print_memory_transferred, & print_memory_bandwidth, & print_ai if (present(print_allocated_memory)) then self%print_allocated_memory = print_allocated_memory if ((.not. self%record_allocated_memory) .and. self%print_allocated_memory) then write(0,'(a)') "ftimings: Warning: RSS size recording was disabled, expect zeros!" endif endif if (present(print_virtual_memory)) then self%print_virtual_memory = print_virtual_memory if ((.not. self%record_virtual_memory) .and. self%print_virtual_memory) then write(0,'(a)') "ftimings: Warning: Virtual memory recording was disabled, expect zeros!" endif endif if (present(print_max_allocated_memory)) then self%print_max_allocated_memory = print_max_allocated_memory if ((.not. self%record_max_allocated_memory) .and. self%print_max_allocated_memory) then write(0,'(a)') "ftimings: Warning: HWM recording was disabled, expect zeros!" endif endif if (present(print_flop_count)) then self%print_flop_count = print_flop_count if ((.not. self%record_flop_counts) .and. self%print_flop_count) then write(0,'(a)') "ftimings: Warning: FLOP counter was disabled, expect zeros!" endif endif if (present(print_flop_rate)) then self%print_flop_rate = print_flop_rate if ((.not. self%record_flop_counts) .and. self%print_flop_rate) then write(0,'(a)') "ftimings: Warning: FLOP counter was disabled, expect zeros!" endif endif if (present(print_memory_transferred)) then self%print_memory_transferred = print_memory_transferred if ((.not. self%record_memory_bandwidth) .and. self%print_memory_transferred) then write(0,'(a)') "ftimings: Warning: Memory counters were disabled, expect zeros!" endif endif if (present(print_memory_bandwidth)) then self%print_memory_bandwidth = print_memory_bandwidth if ((.not. self%record_memory_bandwidth) .and. self%print_memory_bandwidth) then write(0,'(a)') "ftimings: Warning: Memory counters were disabled, expect zeros for memory bandwidth!" endif endif if (present(print_ai)) then self%print_ai = print_ai if (.not. (self%record_memory_bandwidth .and. self%record_flop_counts)) then write(0,'(a)') "ftimings: Warning: Memory bandwidth or FLOP counters were disabled, expect invalid values for AI" endif endif end subroutine !> Start a timing section !> !> \param name A descriptive name !> \param replace If .true. (default .false.), replace any entries at the !> current position with the same name. If .false., add the !> time to a possibly existing entry !> !> Care must be taken to balance any invocations of %start() and %stop(), e.g. !> the following is valid !> !> \code{.f90} !> call timer%start("A") !> call timer%start("B") !> call timer%stop("B") !> call timer%stop("A") !> \endcode !> !> while the following is not !> !> \code{.f90} !> call timer%start("A") !> call timer%start("B") !> call timer%stop("A") !> call timer%stop("B") !> \endcode !> subroutine timer_start(self, name, replace) #ifdef FTIMINGS_DEBUG use, intrinsic :: iso_fortran_env, only : error_unit #endif !$ use omp_lib class(timer_t), intent(inout), target :: self character(len=*), intent(in) :: name logical, intent(in), optional :: replace type(node_t), pointer :: node !$ integer :: i if (.not. self%active) then return endif ! Deal with nested parallelization !$ do i = 0, omp_get_level() !$ if (omp_get_ancestor_thread_num(i) > 0) then !$ return !$ endif !$ end do !$omp master if (.not. associated(self%current_node)) then ! First call to timer_start() allocate(self%root) self%root%name = "[Root]" self%root%timer => self call self%root%start() nullify(self%root%firstChild) nullify(self%root%lastChild) nullify(self%root%parent) nullify(self%root%nextSibling) self%current_node => self%root endif if (string_eq(self%current_node%name, name)) then #ifdef FTIMINGS_DEBUG !$omp critical write(error_unit,*) "Recursion error! Printing tree so far.." write(error_unit,*) "Got %start(""" // trim(name) // """), while %start(""" // trim(name) // """) was still active" !$ write(*,*) "omp_get_thread_num() = ", omp_get_thread_num() !$ write(*,*) "omp_get_num_threads() = ", omp_get_num_threads() !$ write(*,*) "omp_get_level() = ", omp_get_level() !$ do i = 0, omp_get_level() !$ write(*,*) "omp_get_ancestor_thread_num(", i, ") = ", omp_get_ancestor_thread_num(i) !$ end do call self%root%print_graph(0) !$omp end critical #endif call self%error("timer_start() while same timer was active") endif node => self%current_node%get_child(name) if (.not. associated(node)) then node => self%current_node%new_child(name) self%is_sorted = .false. else if (present(replace)) then if (replace) then node%value = null_value node%count = 0 if (associated(node%firstChild)) then call deallocate_node(node%firstChild) nullify(node%firstChild) nullify(node%lastChild) endif self%is_sorted = .false. endif endif call node%start() self%current_node => node !$omp end master end subroutine !> End a timing segment, \sa timer_start !> !> \param name The exact same name as was used for %start(). !> If not provided, close the currently active region. !> If given, warns if it does not match the last %start() !> call on stderr and disables the current timer instance. !> subroutine timer_stop(self, name) class(timer_t), intent(inout), target :: self character(len=*), intent(in), optional :: name character(len=128) :: errormessage logical :: error !$ integer :: omp_get_level, omp_get_ancestor_thread_num !$ integer :: i if (.not. self%active) then return endif ! Deal with nested parallelization !$ do i = 0, omp_get_level() !$ if (omp_get_ancestor_thread_num(i) > 0) then !$ return !$ endif !$ end do !$omp master error = .false. if (.not. associated(self%current_node)) then call self%error("Called timer_stop() without first calling any timer_start(), disabling timings") error = .true. else if (present(name)) then if (.not. string_eq(self%current_node%name, name)) then write(errormessage,'(a)') "Expected %stop(""" // trim(self%current_node%name) // """),& & but got %stop(""" // trim(name) // """), disabling timings" call self%error(trim(errormessage)) error = .true. endif endif if (.not. error) then call self%current_node%stop() ! climb up to parent if (.not. associated(self%current_node%parent)) then write(errormessage,'(a)') "Error: No valid parent node found for node '" // trim(self%current_node%name) // "'" call self%error(trim(errormessage)) endif self%current_node => self%current_node%parent endif !$omp end master end subroutine !> Deallocate all objects associated with (but not including) self !> subroutine timer_free(self) class(timer_t), intent(inout), target :: self if (associated(self%root)) then call deallocate_node(self%root) endif nullify(self%root) nullify(self%current_node) if (associated(self%error_handle)) then call self%error_handle%free(self) endif end subroutine !> Print a timing graph !> !> \param name1 If given, first descend one level to the node with name name1 !> \param name2 If given, also descend another level to the node with name2 there !> \param name3 etc. !> \param name4 etc. !> \param name5 etc. !> \param name6 etc. !> \param threshold If given, subsume any entries with a value of threshold !> seconds in a single node "(below threshold)" !> \param unit The unit number on which to print, default stdout !> subroutine timer_print(self, name1, name2, name3, name4, name5, name6, threshold, unit) class(timer_t), intent(inout), target :: self character(len=*), intent(in), optional :: name1, name2, name3, name4, name5, name6 real(kind=rk), intent(in), optional :: threshold integer, intent(in), optional :: unit character(len=128) :: errormessage type(node_t), pointer :: node if (.not. self%active) then return endif node => self%root if (present(name1)) then node => node%get_child(name1) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name1) // """" call self%error(errormessage) return endif end if if (present(name2)) then node => node%get_child(name2) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name2) // """" call self%error(errormessage) return endif end if if (present(name3)) then node => node%get_child(name3) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name3) // """" call self%error(errormessage) return endif end if if (present(name4)) then node => node%get_child(name4) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name4) // """" call self%error(errormessage) return endif end if if (present(name5)) then node => node%get_child(name5) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name5) // """" call self%error(errormessage) return endif end if if (present(name6)) then node => node%get_child(name6) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name6) // """" call self%error(errormessage) return endif end if call timer_print_node(self, node, threshold, unit) end subroutine !> _node version of timer_print subroutine timer_print_node(self, node, threshold, unit) use, intrinsic :: iso_fortran_env, only : output_unit class(timer_t), intent(in), target :: self type(node_t), intent(in), pointer :: node real(kind=rk), intent(in), optional :: threshold integer, intent(in), optional :: unit integer :: unit_act character(len=64) :: format_spec ! I hate fortran's string handling character(len=name_length), parameter :: group = "Group" character(len=12), parameter :: seconds = " [s]" character(len=12), parameter :: fract = " fraction" character(len=12), parameter :: ram = " alloc. RAM" character(len=12), parameter :: vmem = " alloc. VM" character(len=12), parameter :: hwm = " alloc. HWM" character(len=12), parameter :: flop_rate = " float op/s" character(len=12), parameter :: flop_count = "float op cnt" character(len=12), parameter :: mem_reads = " RAM read" character(len=12), parameter :: mem_writes = " RAM written" character(len=12), parameter :: bandwidth_read = " RAM read/s" character(len=12), parameter :: bandwidth_write = " RAM write/s" character(len=12), parameter :: ai = "arithm. Int." character(len=12), parameter :: dash = "============" if (present(unit)) then unit_act = unit else unit_act = output_unit endif ! I really do hate it .. write(format_spec,'("("" /= "",a",i0,",2x,a12,1x,a12)")') name_length write(unit_act, format_spec, advance='no') adjustl(group), seconds, fract if (self%print_allocated_memory) then write(unit_act,'(1x,a12)',advance='no') ram endif if (self%print_virtual_memory) then write(unit_act,'(1x,a12)',advance='no') vmem endif if (self%print_max_allocated_memory) then write(unit_act,'(1x,a12)',advance='no') hwm endif if (self%print_flop_count) then write(unit_act,'(1x,a12)',advance='no') flop_count endif if (self%print_flop_rate) then write(unit_act,'(1x,a12)',advance='no') flop_rate endif if (self%print_memory_transferred) then write(unit_act,'(1x,a12)',advance='no') mem_reads write(unit_act,'(1x,a12)',advance='no') mem_writes endif if (self%print_memory_bandwidth) then write(unit_act,'(1x,a12)',advance='no') bandwidth_read write(unit_act,'(1x,a12)',advance='no') bandwidth_write endif if (self%print_ai) then write(unit_act,'(1x,a12)',advance='no') ai endif write(unit_act,'(a)') "" write(format_spec,'("("" | "",a",i0,",1x,2(1x,a12))")') name_length write(unit_act, format_spec, advance='no') "", dash, dash if (self%print_allocated_memory) then write(unit_act,'(1x,a12)',advance='no') dash endif if (self%print_virtual_memory) then write(unit_act,'(1x,a12)',advance='no') dash endif if (self%print_max_allocated_memory) then write(unit_act,'(1x,a12)',advance='no') dash endif if (self%print_flop_count) then write(unit_act,'(1x,a12)',advance='no') dash endif if (self%print_flop_rate) then write(unit_act,'(1x,a12)',advance='no') dash endif if (self%print_memory_transferred) then write(unit_act,'(1x,a12)',advance='no') dash write(unit_act,'(1x,a12)',advance='no') dash endif if (self%print_memory_bandwidth) then write(unit_act,'(1x,a12)',advance='no') dash write(unit_act,'(1x,a12)',advance='no') dash endif if (self%print_ai) then write(unit_act,'(1x,a12)',advance='no') dash endif write(unit_act,'(a)') "" call node%print_graph(0, threshold, is_sorted=self%is_sorted, unit=unit) end subroutine !> Return the sum of all entries with a certain name below !> a given node. Specify the name with the last argument, the !> path to the starting point with the first few parameters !> !> \param name1, .., namei-1 The path to the starting node !> \param namei The name of all sub-entries below this !> node which should be summed together !> !> For example, timer\%in_entries("foo", "bar", "parallel") returns !> the sum of all entries named "parallel" below the foo->bar node !> function timer_in_entries(self, name1, name2, name3, name4, name5, name6) result(s) class(timer_t), intent(inout), target :: self character(len=*), intent(in) :: name1 character(len=*), intent(in), optional :: name2, name3, name4, name5, name6 real(kind=rk) :: s type(node_t), pointer :: node ! the starting node character(len=name_length) :: name ! the name of the sections character(len=128) :: errormessage s = 0._rk if (.not. self%active) then return endif node => self%root name = name1 if (present(name2)) then node => node%get_child(name1) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name1) // """" call self%error(errormessage) return endif name = name2 end if if (present(name3)) then node => node%get_child(name2) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name2) // """" call self%error(errormessage) return endif name = name3 end if if (present(name4)) then node => node%get_child(name3) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name3) // """" call self%error(errormessage) return endif name = name4 end if if (present(name5)) then node => node%get_child(name4) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name4) // """" call self%error(errormessage) return endif name = name5 end if if (present(name6)) then node => node%get_child(name5) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name5) // """" call self%error(errormessage) return endif name = name6 end if s = self%in_entries_node(node, name) end function !> _node version of timer_in_entries pure function timer_in_entries_node(self, node, name) result(s) class(timer_t), intent(in), target :: self type(node_t), intent(in), pointer :: node ! the starting node character(len=name_length), intent(in) :: name ! the name of the sections real(kind=rk) :: s type(value_t) :: val s = 0._rk if (.not. self%active) then return endif val = node%sum_of_descendants_with_name(name) s = real(val%micros, kind=rk) * 1e-6_rk end function !> Access a specific, already stopped entry of the graph by specifying the !> names of the nodes along the graph from the root node !> !> The result is only meaningfull if the entry was never appended by !> additional %start() calls. !> function timer_get(self, name1, name2, name3, name4, name5, name6) result(s) class(timer_t), intent(inout), target :: self ! this is clunky, but what can you do.. character(len=*), intent(in), optional :: name1, name2, name3, name4, name5, name6 character(len=128) :: errormessage real(kind=rk) :: s type(node_t), pointer :: node s = 0._rk if (.not. self%active) then return endif node => self%root if (present(name1)) then node => node%get_child(name1) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name1) // """" call self%error(trim(errormessage)) return endif end if if (present(name2)) then node => node%get_child(name2) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name2) // """" call self%error(trim(errormessage)) return endif end if if (present(name3)) then node => node%get_child(name3) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name3) // """" call self%error(trim(errormessage)) return endif end if if (present(name4)) then node => node%get_child(name4) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name4) // """" call self%error(trim(errormessage)) return endif end if if (present(name5)) then node => node%get_child(name5) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name5) // """" call self%error(trim(errormessage)) return endif end if if (present(name6)) then node => node%get_child(name6) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name6) // """" call self%error(trim(errormessage)) return endif end if if (node%is_running) then write(errormessage,'(a)') "Timer """ // trim(node%name) // """ not yet stopped" call self%error(trim(errormessage)) return endif s = self%get_node(node) end function !> _node version of timer_get pure function timer_get_node(self, node) result(s) class(timer_t), intent(in) :: self type(node_t), pointer, intent(in) :: node real(kind=rk) :: s s = 0.0_rk if (.not. self%active) then return endif s = real(node%value%micros, kind=rk) * 1e-6_rk end function !> Access a specific, not yet stopped entry of the graph by specifying the !> names of the nodes along the graph from the root node and return the !> seconds that have passed since the entry was created. !> !> The result is only meaningfull if the entry was never appended by !> additional %start() calls. !> function timer_since(self, name1, name2, name3, name4, name5, name6) result(s) class(timer_t), intent(inout), target :: self character(len=*), intent(in), optional :: name1, name2, name3, name4, name5, name6 character(len=128) :: errormessage real(kind=rk) :: s type(node_t), pointer :: node s = 0._rk if (.not. self%active) then return endif node => self%root if (present(name1)) then node => node%get_child(name1) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name1) // """" call self%error(trim(errormessage)) return endif end if if (present(name2)) then node => node%get_child(name2) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name2) // """" call self%error(trim(errormessage)) return endif end if if (present(name3)) then node => node%get_child(name3) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name3) // """" call self%error(trim(errormessage)) return endif end if if (present(name4)) then node => node%get_child(name4) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name4) // """" call self%error(trim(errormessage)) return endif end if if (present(name5)) then node => node%get_child(name5) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name5) // """" call self%error(trim(errormessage)) return endif end if if (present(name6)) then node => node%get_child(name6) if (.not. associated(node)) then write(errormessage,'(a)') "Could not descend to """ // trim(name6) // """" call self%error(trim(errormessage)) return endif end if if (node%is_running .neqv. .true.) then write(errormessage,'(a)') "Timer """ // trim(node%name) // """ already stopped" call self%error(trim(errormessage)) return endif s = self%since_node(node) end function !> _node version of timer_since pure function timer_since_node(self, node) result(s) class(timer_t), intent(in) :: self type(node_t), intent(in) :: node real(kind=rk) :: s type(value_t) :: val s = 0._rk if (.not. self%active) then return endif val = node%value + node%now() s = real(val%micros, kind=rk) * 1e-6_rk end function !> Sort the graph on each level. !> Warning: This irrevocable destroys the old ordering. !> subroutine timer_sort(self) class(timer_t), intent(inout), target :: self type(node_t), pointer :: node call sort_nodes(self%root, node) node => self%root do while (associated(node)) call node%sort_children() node => node%nextSibling enddo self%is_sorted = .true. end subroutine ! Now methods of node_t: ! This is the function that actually returns the current timestamp and all other counters pure function node_now(self) result(val) use, intrinsic :: iso_c_binding class(node_t), intent(in) :: self type(value_t) :: val ! current time val%micros = microseconds_since_epoch() if (self%timer%record_allocated_memory) then val%rsssize = resident_set_size() endif if (self%timer%record_virtual_memory) then val%virtualmem = virtual_memory() endif if (self%timer%record_max_allocated_memory) then val%maxrsssize = max_resident_set_size() endif #ifdef HAVE_LIBPAPI if (self%timer%record_flop_counts) then call flop_counter(val%flop_count) endif #endif #ifdef HAVE_PERF if (self%timer%record_memory_bandwidth) then call perf_memory_counters(val%mem_reads, val%mem_writes) endif #endif end function subroutine node_start(self) class(node_t), intent(inout) :: self ! take the time self%value = self%value - self%now() self%is_running = .true. end subroutine subroutine node_stop(self) class(node_t), intent(inout) :: self self%count = self%count + 1 ! take the time self%value = self%value + self%now() self%is_running = .false. end subroutine function node_get_value(self) result(val) class(node_t), intent(in) :: self type(value_t) :: val val = self%value if (self%is_running) then ! we have not finished, give time up to NOW val = val + self%now() endif end function function node_new_child(self, name) result(new) class(node_t), intent(inout), target :: self character(len=*), intent(in) :: name type(node_t), pointer :: new if (.not. associated(self%lastChild)) then allocate(self%lastChild) new => self%lastChild self%firstChild => new else allocate(self%lastChild%nextSibling) new => self%lastChild%nextSibling self%lastChild => new endif select type (self) type is (node_t) new%parent => self class default stop "node_new_child(): This should not happen" end select new%name = name new%count = 0 new%timer => self%timer nullify(new%firstChild) nullify(new%lastChild) nullify(new%nextSibling) end function pure function string_eq(str1, str2) result(eq) character(len=name_length), intent(in) :: str1 character(len=*), intent(in) :: str2 logical :: eq eq = trim(str1) .eq. str2(1:min(len(trim(str2)), name_length)) end function !> Returns a particular child-node !> !> \param name The name of the child to look for !> !> \returns the child node, or NULL() if no node with the given name can be found !> function node_get_child(self, name) result(child) class(node_t), intent(in) :: self character(len=*), intent(in) :: name type(node_t), pointer :: child child => self%firstChild do while (associated(child)) if (string_eq(child%name, name)) then return endif child => child%nextSibling enddo nullify(child) end function recursive subroutine deallocate_node(entry) type(node_t), intent(inout), pointer :: entry type(node_t), pointer :: nextSibling if (associated(entry%firstChild)) then call deallocate_node(entry%firstChild) endif nextSibling => entry%nextSibling deallocate(entry) nullify(entry) if (associated(nextSibling)) then call deallocate_node(nextSibling) endif end subroutine function node_sum_of_children(self) result(sum_time) class(node_t), intent(in) :: self type(node_t), pointer :: cur_entry type(value_t) :: sum_time cur_entry => self%firstChild do while (associated(cur_entry)) sum_time = sum_time + cur_entry%get_value() cur_entry => cur_entry%nextSibling enddo end function !> Sum-up the values of all descendants with a particular name pure recursive function node_sum_of_descendants_with_name(self, name) result(sum_time) class(node_t), intent(in) :: self character(len=*), intent(in) :: name type(value_t) :: sum_time if (associated(self%firstChild)) then sum_time = node_sum_of_descendants_with_name_for_child(self, name, self%firstChild) end if end function !> Helper function for node_sum_of_descendants_with_name (sadly necessary for "pure"ness) pure recursive function node_sum_of_descendants_with_name_for_child(self, name, child) result(sum_time) class(node_t), intent(in) :: self character(len=*), intent(in) :: name class(node_t), intent(in) :: child type(value_t) :: sum_time if (string_eq(child%name, name)) then sum_time = sum_time + child%value else ! recurse over grand-children sum_time = sum_time + child%sum_of_descendants_with_name(name) endif ! recurse over direct children if (associated(child%nextSibling)) then sum_time = sum_time + node_sum_of_descendants_with_name_for_child(self, name, child%nextSibling) end if end function function node_sum_of_children_below(self, threshold) result(sum_time) class(node_t), intent(in) :: self real(kind=rk), intent(in), optional :: threshold type(node_t), pointer :: cur_entry type(value_t) :: sum_time, cur_value if (.not. present(threshold)) then return endif cur_entry => self%firstChild do while (associated(cur_entry)) cur_value = cur_entry%get_value() if (cur_value%micros * 1e-6_rk < threshold) then sum_time = sum_time + cur_value endif cur_entry => cur_entry%nextSibling enddo end function subroutine insert_into_sorted_list(head, node) type(node_t), pointer, intent(inout) :: head type(node_t), target, intent(inout) :: node type(node_t), pointer :: cur if (node%value%micros >= head%value%micros) then node%nextSibling => head head => node return endif cur => head do while (associated(cur%nextSibling)) if (cur%value%micros > node%value%micros .and. node%value%micros >= cur%nextSibling%value%micros) then node%nextSibling => cur%nextSibling cur%nextSibling => node return endif cur => cur%nextSibling end do ! node has to be appended at the end cur%nextSibling => node node%nextSibling => NULL() end subroutine subroutine remove_from_list(head, node) type(node_t), pointer, intent(inout) :: head type(node_t), pointer, intent(in) :: node type(node_t), pointer :: cur if (associated(head,node)) then head => head%nextSibling return endif cur => head do while (associated(cur%nextSibling)) if (associated(cur%nextSibling,node)) then cur%nextSibling => cur%nextSibling%nextSibling return endif cur => cur%nextSibling end do end subroutine subroutine node_print(self, indent_level, total, unit) class(node_t), intent(inout) :: self integer, intent(in) :: indent_level type(value_t), intent(in) :: total type(value_t) :: val integer, intent(in) :: unit character(len=name_length) :: name, suffix if (self%is_running) then name = trim(self%name) // " (running)" else name = self%name endif if (self%count > 1) then write(suffix, '(" (",i0,"x)")') self%count name = trim(name) // " " // trim(suffix) endif if (self%is_running) then val = self%value + self%now() else val = self%value endif call print_value(val, self%timer, indent_level, name, total, unit) end subroutine recursive subroutine node_print_graph(self, indent_level, threshold, is_sorted, total, unit) use, intrinsic :: iso_fortran_env, only : output_unit class(node_t), intent(inout) :: self integer, intent(in) :: indent_level real(kind=rk), intent(in), optional :: threshold logical, intent(in), optional :: is_sorted type(value_t), intent(in), optional :: total integer, intent(in), optional :: unit type(node_t), pointer :: node type(value_t) :: cur_value, node_value, own_value, below_threshold_value, total_act type(node_t), pointer :: own_node, threshold_node real(kind=rk) :: threshold_act logical :: is_sorted_act, print_own, print_threshold integer :: unit_act nullify(own_node) nullify(threshold_node) if (present(threshold)) then threshold_act = threshold else threshold_act = 0 endif if (present(is_sorted)) then is_sorted_act = is_sorted else is_sorted_act = .false. endif cur_value = self%get_value() if (present(total)) then total_act = total else total_act = cur_value endif if (present(unit)) then unit_act = unit else unit_act = output_unit endif call self%print(indent_level, total_act, unit_act) own_value = cur_value - self%sum_of_children() below_threshold_value = self%sum_of_children_below(threshold) print_own = associated(self%firstChild) print_threshold = below_threshold_value%micros > 0 ! Deal with "(own)" and "(below threshold)" entries if (is_sorted_act) then ! sort them in if (print_own) then ! insert an "(own)" node allocate(own_node) own_node%value = own_value own_node%name = own own_node%timer => self%timer call insert_into_sorted_list(self%firstChild, own_node) endif if (print_threshold) then ! insert a "(below threshold)" node allocate(threshold_node) threshold_node%value = below_threshold_value threshold_node%name = below threshold_node%timer => self%timer call insert_into_sorted_list(self%firstChild, threshold_node) endif else ! print them first if (print_own) then call print_value(own_value, self%timer, indent_level + 1, own, cur_value, unit_act) endif if (print_threshold) then call print_value(below_threshold_value, self%timer, indent_level + 1, below, cur_value, unit_act) endif endif ! print children node => self%firstChild do while (associated(node)) node_value = node%get_value() if (node_value%micros * 1e-6_rk >= threshold_act & .or. associated(node, threshold_node) & .or. associated(node, own_node)) then call node%print_graph(indent_level + 1, threshold, is_sorted, cur_value, unit_act) endif node => node%nextSibling end do if (is_sorted_act) then ! remove inserted dummy nodes again if (print_own) then call remove_from_list(self%firstChild, own_node) deallocate(own_node) endif if (print_threshold) then call remove_from_list(self%firstChild, threshold_node) deallocate(threshold_node) endif endif end subroutine ! In-place sort a node_t linked list and return the first and last element, subroutine sort_nodes(head, tail) type(node_t), pointer, intent(inout) :: head, tail type(node_t), pointer :: p, q, e type(value_t) :: p_val, q_val integer :: insize, nmerges, psize, qsize, i if (.not. associated(head)) then nullify(tail) return endif insize = 1 do while (.true.) p => head nullify(head) nullify(tail) nmerges = 0 do while(associated(p)) nmerges = nmerges + 1 q => p psize = 0 do i = 1, insize psize = psize + 1 q => q%nextSibling if (.not. associated(q)) then exit endif end do qsize = insize do while (psize > 0 .or. (qsize > 0 .and. associated(q))) if (psize == 0) then e => q q => q%nextSibling qsize = qsize - 1 else if (qsize == 0 .or. (.not. associated(q))) then e => p; p => p%nextSibling psize = psize - 1 else p_val = p%get_value() q_val = q%get_value() if (p_val%micros >= q_val%micros) then e => p p => p%nextSibling psize = psize - 1 else e => q q => q%nextSibling qsize = qsize - 1 end if end if if (associated(tail)) then tail%nextSibling => e else head => e endif tail => e end do p => q end do nullify(tail%nextSibling) if (nmerges <= 1) then return endif insize = insize * 2 end do end subroutine recursive subroutine node_sort_children(self) class(node_t), intent(inout) :: self type(node_t), pointer :: node call sort_nodes(self%firstChild, self%lastChild) node => self%firstChild do while (associated(node)) call node%sort_children() node => node%nextSibling enddo end subroutine subroutine print_value(value, timer, indent_level, label, total, unit) type(value_t), intent(in) :: value type(timer_t), intent(in) :: timer integer, intent(in) :: indent_level character(len=name_length), intent(in) :: label type(value_t), intent(in) :: total integer, intent(in) :: unit character(len=64) :: format_spec write(format_spec,'("(",i0,"x,""|_ "",a",i0,",2x,f12.6,1x,f12.3)")') indent_level * 2 + 1, name_length write(unit,format_spec,advance='no') & label, & real(value%micros, kind=rk) * 1e-6_rk, & real(value%micros, kind=rk) / real(total%micros, kind=rk) if (timer%print_allocated_memory) then write(unit,'(1x,a12)',advance='no') & nice_format_2(real(value%rsssize, kind=c_double)) endif if (timer%print_virtual_memory) then write(unit,'(1x,a12)',advance='no') & nice_format_2(real(value%virtualmem, kind=c_double)) endif if (timer%print_max_allocated_memory) then write(unit,'(1x,a12)',advance='no') & nice_format_2(real(value%maxrsssize, kind=c_double)) endif if (timer%print_flop_count) then write(unit,'(1x,a12)',advance='no') nice_format_10(real(value%flop_count, kind=rk)) endif if (timer%print_flop_rate) then write(unit,'(1x,a12)',advance='no') nice_format_10(real(value%flop_count, kind=rk) / (value%micros * 1e-6_rk)) endif if (timer%print_memory_transferred) then write(unit,'(1x,a12)',advance='no') nice_format_2(real(value%mem_reads, kind=rk)) write(unit,'(1x,a12)',advance='no') nice_format_2(real(value%mem_writes, kind=rk)) endif if (timer%print_memory_bandwidth) then write(unit,'(1x,a12)',advance='no') nice_format_2(real(value%mem_reads, kind=rk) / (value%micros * 1e-6_rk)) write(unit,'(1x,a12)',advance='no') nice_format_2(real(value%mem_writes, kind=rk) / (value%micros * 1e-6_rk)) endif if (timer%print_ai) then write(unit,'(1x,f12.4)',advance='no') real(value%flop_count, kind=rk) / (value%mem_writes + value%mem_reads) endif write(unit,'(a)') "" end subroutine pure elemental function nice_format_2(number) result(string) real(kind=c_double), intent(in) :: number character(len=12) :: string real(kind=c_double), parameter :: & kibi = 2.0_c_double**10, & mebi = 2.0_c_double**20, & gibi = 2.0_c_double**30, & tebi = 2.0_c_double**40, & pebi = 2.0_c_double**50 if (abs(number) >= pebi) then write(string,'(es12.2)') number else if (abs(number) >= tebi) then write(string,'(f9.2,'' Ti'')') number / tebi else if (abs(number) >= gibi) then write(string,'(f9.2,'' Gi'')') number / gibi else if (abs(number) >= mebi) then write(string,'(f9.2,'' Mi'')') number / mebi else if (abs(number) >= kibi) then write(string,'(f9.2,'' ki'')') number / kibi else write(string,'(f12.2)') number endif end function pure elemental function nice_format_10(number) result(string) real(kind=c_double), intent(in) :: number character(len=12) :: string real(kind=c_double), parameter :: & kilo = 1e3_c_double, & mega = 1e6_c_double, & giga = 1e9_c_double, & tera = 1e12_c_double, & peta = 1e15_c_double if (abs(number) >= peta) then write(string,'(es12.2)') number else if (abs(number) >= tera) then write(string,'(f9.2,'' T'')') number / tera else if (abs(number) >= giga) then write(string,'(f9.2,'' G'')') number / giga else if (abs(number) >= mega) then write(string,'(f9.2,'' M'')') number / mega else if (abs(number) >= kilo) then write(string,'(f9.2,'' k'')') number / kilo else write(string,'(f12.2)') number endif end function ! Now functions for C-API ! See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64947 #ifdef __GFORTRAN__ #define c_string(x) trim(safe_c_string(x)) function safe_c_string(ptr) result(str) type(c_ptr), value, intent(in) :: ptr character(kind=c_char, len=strnlen(ptr, int(name_length, kind=c_size_t))), pointer :: s character(kind=c_char, len=name_length) :: str call c_f_pointer(ptr, s) str = s end function #else function c_string(ptr) result(s) type(c_ptr), value, intent(in) :: ptr character(kind=c_char, len=strnlen(ptr, int(name_length, kind=c_size_t))), pointer :: s call c_f_pointer(ptr, s) end function #endif !c> ftimer_node_t* ftimings_root_node(ftimer_t *timer); function ftimings_root_node(ptr) result(root_ptr) bind(c, name="ftimings_root_node") type(c_ptr), value, intent(in) :: ptr type(timer_t), pointer :: timer type(c_ptr) :: root_ptr call c_f_pointer(ptr, timer) root_ptr = c_loc(timer%root) end function !c> ftimer_node_t* ftimings_node_get_child(ftimer_node_t *node, const char *name); function ftimings_node_get_child(node_ptr, name) result(child_ptr) bind(c, name="ftimings_node_get_child") type(c_ptr), intent(in), value :: node_ptr type(c_ptr), intent(in), value :: name type(c_ptr) :: child_ptr type(node_t), pointer :: node, child call c_f_pointer(node_ptr, node) child => node%get_child(c_string(name)) child_ptr = c_loc(child) end function !c> ftimer_t* ftimings_create(); function ftimings_create() result(ptr) bind(c, name="ftimings_create") type(c_ptr) :: ptr type(timer_t), pointer :: timer allocate(timer) ptr = c_loc(timer) end function !c> void ftimings_destroy(ftimer_t *timer); subroutine ftimings_destroy(ptr) bind(c, name="ftimings_destroy") type(c_ptr), value, intent(in) :: ptr type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%free() deallocate(timer) end subroutine !c> void ftimings_enable(ftimer_t *timer); subroutine ftimings_enable(ptr) bind(c, name="ftimings_enable") type(c_ptr), value, intent(in) :: ptr type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%enable() end subroutine !c> void ftimings_disable(ftimer_t *timer); subroutine ftimings_disable(ptr) bind(c, name="ftimings_disable") type(c_ptr), value, intent(in) :: ptr type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%disable() end subroutine !c> #ifdef FTIMINGS_PRIVATE !c> void ftimings_error(ftimer_t *timer, const char *message); !c> #endif subroutine ftimings_error(timer_ptr, message_ptr) bind(c, name="ftimings_error") type(c_ptr), value, intent(in) :: timer_ptr, message_ptr type(timer_t), pointer :: timer character(kind=c_char, len=strlen(message_ptr)), pointer :: message call c_f_pointer(timer_ptr, timer) call c_f_pointer(message_ptr, message) call timer%error(message) end subroutine subroutine c_error_handler_wrapper(timer, handle, message) class(timer_t), intent(inout), target :: timer class(timer_error_handle_t), intent(in), pointer :: handle character(len=*), intent(in) :: message class(timer_c_error_handle_t), pointer :: c_error_handle type(timer_t), pointer :: base_type_timer abstract interface subroutine c_error_handler(timer_ptr, handle_ptr, message) import c_ptr, c_char type(c_ptr), value, intent(in) :: timer_ptr, handle_ptr character(kind=c_char, len=*) :: message end subroutine end interface procedure(c_error_handler), pointer :: c_handler c_error_handle => NULL() select type (handle) type is (timer_c_error_handle_t) c_error_handle => handle class default stop "c_error_handler_wrapper: Unknown runtime type for 'handle': This should not happen" end select base_type_timer => timer call c_f_procpointer(c_error_handle%c_handler, c_handler) call c_handler(c_loc(base_type_timer), c_error_handle%c_handle, trim(message) // c_null_char) end subroutine subroutine c_error_handle_free(handle, timer) class(timer_c_error_handle_t), intent(inout) :: handle class(timer_t), intent(inout) :: timer handle%c_handle = c_null_ptr handle%c_handler = c_null_funptr deallocate(timer%error_handle) nullify(timer%error_handle) end subroutine !c> void ftimings_register_error_handler(ftimer_t *timer, void (*error_handler)(ftimer_t*, void*, const char*), void *handle); subroutine ftimings_register_error_handler(ptr, c_error_handler, c_handle) bind(c, name="ftimings_register_error_handler") type(c_ptr), value, intent(in) :: ptr type(c_funptr), value, intent(in) :: c_error_handler type(c_ptr), value, intent(in) :: c_handle type(timer_t), pointer :: timer type(timer_c_error_handle_t), pointer :: handle call c_f_pointer(ptr, timer) allocate(handle) ! deallocated in c_error_handle_free handle%c_handler = c_error_handler handle%c_handle = c_handle timer%error_handler => c_error_handler_wrapper timer%error_handle => handle end subroutine !c> void ftimings_timer_measure_allocated_memory(ftimer_t *timer, int enabled); subroutine ftimings_measure_allocated_memory(ptr, enabled) bind(c, name="ftimings_measure_allocated_memory") type(c_ptr), value, intent(in) :: ptr integer(kind=c_int), value, intent(in) :: enabled type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%measure_allocated_memory(enabled == 1) end subroutine !c> void ftimings_measure_virtual_memory(ftimer_t *timer, int enabled); subroutine ftimings_measure_virtual_memory(ptr, enabled) bind(c, name="ftimings_measure_virtual_memory") type(c_ptr), value, intent(in) :: ptr integer(kind=c_int), value, intent(in) :: enabled type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%measure_virtual_memory(enabled == 1) end subroutine !c> void ftimings_measure_max_allocated_memory(ftimer_t *timer, int enabled); subroutine ftimings_measure_max_allocated_memory(ptr, enabled) bind(c, name="ftimings_measure_max_allocated_memory") type(c_ptr), value, intent(in) :: ptr integer(kind=c_int), value, intent(in) :: enabled type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%measure_max_allocated_memory(enabled == 1) end subroutine !c> void ftimings_measure_memory_bandwidth(ftimer_t *timer, int enabled); subroutine ftimings_measure_memory_bandwidth(ptr, enabled) bind(c, name="ftimings_measure_memory_bandwidth") type(c_ptr), value, intent(in) :: ptr integer(kind=c_int), value, intent(in) :: enabled type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%measure_memory_bandwidth(enabled == 1) end subroutine !c> void ftimings_measure_flops(ftimer_t *timer, int enabled); subroutine ftimings_measure_flops(ptr, enabled) bind(c, name="ftimings_measure_flops") type(c_ptr), value, intent(in) :: ptr integer(kind=c_int), value, intent(in) :: enabled type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%measure_flops(enabled == 1) end subroutine !c> int ftimings_is_enabled(ftimer_t *timer); function ftimings_is_enabled(ptr) result(is) bind(c, name="ftimings_is_enabled") type(c_ptr), value, intent(in) :: ptr integer(kind=c_int) :: is type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) if (timer%is_enabled()) then is = 1 else is = 0 endif end function !c> void ftimings_print_allocated_memory(ftimer_t *timer, int enable); subroutine ftimings_print_allocated_memory(ptr, enabled) bind(c, name="ftimings_print_allocated_memory") type(c_ptr), value, intent(in) :: ptr integer(kind=c_int), value, intent(in) :: enabled type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%set_print_options(print_allocated_memory = enabled == 1) end subroutine !c> void ftimings_print_virtual_memory(ftimer_t *timer, int enable); subroutine ftimings_print_virtual_memory(ptr, enabled) bind(c, name="ftimings_print_virtual_memory") type(c_ptr), value, intent(in) :: ptr integer(kind=c_int), value, intent(in) :: enabled type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%set_print_options(print_virtual_memory = enabled == 1) end subroutine !c> void ftimings_print_max_allocated_memory(ftimer_t *timer, int enable); subroutine ftimings_print_max_allocated_memory(ptr, enabled) bind(c, name="ftimings_print_max_allocated_memory") type(c_ptr), value, intent(in) :: ptr integer(kind=c_int), value, intent(in) :: enabled type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%set_print_options(print_max_allocated_memory = enabled == 1) end subroutine !c> void ftimings_print_flop_count(ftimer_t *timer, int enable); subroutine ftimings_print_flop_count(ptr, enabled) bind(c, name="ftimings_print_flop_count") type(c_ptr), value, intent(in) :: ptr integer(kind=c_int), value, intent(in) :: enabled type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%set_print_options(print_flop_count = enabled == 1) end subroutine !c> void ftimings_print_flop_rate(ftimer_t *timer, int enable); subroutine ftimings_print_flop_rate(ptr, enabled) bind(c, name="ftimings_print_flop_rate") type(c_ptr), value, intent(in) :: ptr integer(kind=c_int), value, intent(in) :: enabled type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%set_print_options(print_flop_rate = enabled == 1) end subroutine !c> void ftimings_print_memory_transferred(ftimer_t *timer, int enable); subroutine ftimings_print_memory_transferred(ptr, enabled) bind(c, name="ftimings_print_memory_transferred") type(c_ptr), value, intent(in) :: ptr integer(kind=c_int), value, intent(in) :: enabled type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%set_print_options(print_memory_transferred = enabled == 1) end subroutine !c> void ftimings_print_memory_bandwidth(ftimer_t *timer, int enable); subroutine ftimings_print_memory_bandwidth(ptr, enabled) bind(c, name="ftimings_print_memory_bandwidth") type(c_ptr), value, intent(in) :: ptr integer(kind=c_int), value, intent(in) :: enabled type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%set_print_options(print_memory_bandwidth = enabled == 1) end subroutine !c> void ftimings_print_ai(ftimer_t *timer, int enable); subroutine ftimings_print_ai(ptr, enabled) bind(c, name="ftimings_print_ai") type(c_ptr), value, intent(in) :: ptr integer(kind=c_int), value, intent(in) :: enabled type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%set_print_options(print_ai = enabled == 1) end subroutine !c> void ftimings_start(ftimer_t *timer, const char *name); subroutine ftimings_start(ptr, name) bind(c, name="ftimings_start") type(c_ptr), value, intent(in) :: ptr type(c_ptr), value, intent(in) :: name type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%start(c_string(name)) end subroutine !c> void ftimings_start_replace(ftimer_t *timer, const char *name, int replace); subroutine ftimings_start_replace(ptr, name, replace) bind(c, name="ftimings_start_replace") type(c_ptr), value, intent(in) :: ptr type(c_ptr), value, intent(in) :: name integer(kind=c_int), value, intent(in) :: replace type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%start(c_string(name), replace == 1) end subroutine !c> void ftimings_stop(ftimer_t *timer, const char *name); subroutine ftimings_stop(ptr, name) bind(c, name="ftimings_stop") type(c_ptr), value, intent(in) :: ptr type(c_ptr), value, intent(in) :: name type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) if (c_associated(name)) then call timer%stop(c_string(name)) else call timer%stop() endif end subroutine !c> void ftimings_print_node(ftimer_t *timer, double threshold, ftimer_node_t *node); subroutine ftimings_print_node(timer_ptr, node_ptr, threshold) bind(c, name="ftimings_print_node") type(c_ptr), value, intent(in) :: timer_ptr type(c_ptr), value, intent(in) :: node_ptr real(kind=c_double), value, intent(in) :: threshold type(timer_t), pointer :: timer type(node_t), pointer :: node call c_f_pointer(timer_ptr, timer) call c_f_pointer(node_ptr, node) if (.not. associated(node)) then node => timer%root endif call timer_print_node(timer, node, threshold) end subroutine !c> double ftimings_in_entries_node(ftimer_t *timer, ftimer_node_t *node, const char *name); function ftimings_in_entries_node(timer_ptr, node_ptr, name) result(s) bind(c, name="ftimings_in_entries_node") type(c_ptr), value, intent(in) :: timer_ptr, node_ptr, name real(kind=c_double) :: s type(timer_t), pointer :: timer type(node_t), pointer :: node type(value_t) :: val call c_f_pointer(timer_ptr, timer) call c_f_pointer(node_ptr, node) if (.not. associated(node)) then node => timer%root endif val = node%sum_of_descendants_with_name(c_string(name)) s = real(val%micros, kind=rk) * 1e-6_rk end function !c> double ftimings_get_node(ftimer_t *timer, ftimer_node_t *node); function ftimings_get_node(timer_ptr, node_ptr) result(s) bind(c, name="ftimings_get_node") type(c_ptr), value, intent(in) :: timer_ptr, node_ptr real(kind=c_double) :: s type(timer_t), pointer :: timer type(node_t), pointer :: node call c_f_pointer(timer_ptr, timer) call c_f_pointer(node_ptr, node) s = 0._rk if (.not. timer%active) then return endif s = real(node%value%micros, kind=rk) * 1e-6_rk end function !c> double ftimings_since_node(ftimer_t *timer, ftimer_node_t *node); function ftimings_since_node(timer_ptr, node_ptr) result(s) bind(c, name="ftimings_since_node") type(c_ptr), value, intent(in) :: timer_ptr, node_ptr real(kind=c_double) :: s type(timer_t), pointer :: timer type(node_t), pointer :: node type(value_t) :: val call c_f_pointer(timer_ptr, timer) call c_f_pointer(node_ptr, node) s = 0._rk if (.not. timer%active) then return endif val = node%value + node%now() s = real(val%micros, kind=rk) * 1e-6_rk end function !c> void ftimings_sort(ftimer_t *timer); subroutine ftimings_sort(ptr) bind(c, name="ftimings_sort") type(c_ptr), value, intent(in) :: ptr type(timer_t), pointer :: timer call c_f_pointer(ptr, timer) call timer%sort() end subroutine end module