Commit 65eecca1 authored by Lorenz Huedepohl's avatar Lorenz Huedepohl
Browse files

New feature: measure CPU cycles

parent bf2857e1
......@@ -57,6 +57,7 @@ module ftimings
timer_sort, &
timer_set_print_options, &
timer_measure_flops, &
timer_measure_cycles, &
timer_measure_allocated_memory, &
timer_measure_virtual_memory, &
timer_measure_max_allocated_memory, &
......@@ -118,14 +119,16 @@ module ftimings
logical, private :: record_allocated_memory = .false. !< IF set to .true., record also the current resident set size
logical, private :: record_virtual_memory = .false. !< IF set to .true., record also the virtual memory
logical, private :: record_max_allocated_memory = .false. !< IF set to .true., record also the max resident set size ("high water mark")
logical, private :: record_flop_counts = .false. !< If set to .true., record also FLOP counts via PAPI calls
logical, private :: record_memory_bandwidth = .false. !< If set to .true., record also memory bandwidth via PAPI calls
logical, private :: record_flop_count = .false. !< If set to .true., record also FLOP counts via PAPI calls
logical, private :: record_cycle_count = .false. !< If set to .true., record also CPU cycle counts via PAPI calls
logical, private :: record_memory_bandwidth = .false. !< If set to .true., record also memory bandwidth via Linut perf syscalls
logical, private :: print_allocated_memory = .false.
logical, private :: print_max_allocated_memory = .false.
logical, private :: print_virtual_memory = .false.
logical, private :: print_flop_count = .false.
logical, private :: print_flop_rate = .false.
logical, private :: print_cycle_count = .false.
logical, private :: print_memory_transferred = .false.
logical, private :: print_memory_bandwidth = .false.
logical, private :: print_ai = .false.
......@@ -148,6 +151,7 @@ module ftimings
procedure, pass :: disable => timer_disable
procedure, pass :: is_enabled => timer_is_enabled
procedure, pass :: measure_flops => timer_measure_flops
procedure, pass :: measure_cycles => timer_measure_cycles
procedure, pass :: measure_allocated_memory => timer_measure_allocated_memory
procedure, pass :: measure_virtual_memory => timer_measure_virtual_memory
procedure, pass :: measure_max_allocated_memory => timer_measure_max_allocated_memory
......@@ -229,10 +233,18 @@ module ftimings
end interface
interface
pure subroutine flop_counter(flop) bind(C, name="ftimings_flop_counter")
function cycle_init() result(ret) bind(C, name="ftimings_cycle_init")
use, intrinsic :: iso_c_binding
implicit none
integer(kind=c_long_long), intent(out) :: flop
integer(kind=c_int) :: ret
end function
end interface
interface
pure subroutine papi_counters(flop, cycles) bind(C, name="ftimings_papi_counters")
use, intrinsic :: iso_c_binding
implicit none
integer(kind=c_long_long), intent(out) :: flop, cycles
end subroutine
end interface
#endif
......@@ -423,18 +435,44 @@ module ftimings
if (enabled) then
#ifdef HAVE_LIBPAPI
if (flop_init() == 1) then
self%record_flop_counts = .true.
self%record_flop_count = .true.
else
write(0,'(a)') "ftimings: Could not initialize PAPI, disabling FLOP counter"
self%record_flop_counts = .false.
self%record_flop_count = .false.
endif
#else
write(0,'(a)') "ftimings: not compiled with PAPI support, disabling FLOP counter"
self%record_flop_counts = .false.
self%record_flop_count = .false.
#endif
else
! Explicitly set to .false. by caller
self%record_flop_count = .false.
endif
end subroutine
!> Call with enabled = .true. to also record FLOP counts via PAPI calls.
!> By default no FLOPS are recored. Call with .false. to deactivate again.
!>
subroutine timer_measure_cycles(self, enabled)
class(timer_t), intent(inout) :: self
logical, intent(in) :: enabled
if (enabled) then
#ifdef HAVE_LIBPAPI
if (cycle_init() == 1) then
self%record_cycle_count = .true.
else
write(0,'(a)') "ftimings: Could not initialize PAPI, disabling CPU cycle counter"
self%record_cycle_count = .false.
endif
#else
write(0,'(a)') "ftimings: not compiled with PAPI support, disabling CPU cycle counter"
self%record_cycle_count = .false.
#endif
else
! Explicitly set to .false. by caller
self%record_flop_counts = .false.
self%record_cycle_count = .false.
endif
end subroutine
......@@ -466,7 +504,8 @@ module ftimings
!> resident memory ("high water mark")
!> \param print_flop_count Number of floating point operations
!> \param print_flop_rate Rate of floating point operations per second
!> \param print_memory_transferred Memory transferred from RAM to CPU
!> \param print_cycle_count Number of CPU cycles
!> \param print_memory_transferred Memory transferred from RAM to CPU
!> \param print_memory_bandwidth Memory bandwidth from RAM to CPU
!> \param print_ai Arithmetic intensity, that is number of
!> floating point operations per
......@@ -478,6 +517,7 @@ module ftimings
print_max_allocated_memory, &
print_flop_count, &
print_flop_rate, &
print_cycle_count, &
print_memory_transferred, &
print_memory_bandwidth, &
print_ai)
......@@ -488,6 +528,7 @@ module ftimings
print_max_allocated_memory, &
print_flop_count, &
print_flop_rate, &
print_cycle_count, &
print_memory_transferred, &
print_memory_bandwidth, &
print_ai
......@@ -495,35 +536,42 @@ module ftimings
if (present(print_allocated_memory)) then
self%print_allocated_memory = print_allocated_memory
if ((.not. self%record_allocated_memory) .and. self%print_allocated_memory) then
write(0,'(a)') "ftimings: Warning: RSS size recording was disabled, expect zeros!"
write(0,'(a)') "ftimings: Warning: RSS size recording is disabled, expect zeros!"
endif
endif
if (present(print_virtual_memory)) then
self%print_virtual_memory = print_virtual_memory
if ((.not. self%record_virtual_memory) .and. self%print_virtual_memory) then
write(0,'(a)') "ftimings: Warning: Virtual memory recording was disabled, expect zeros!"
write(0,'(a)') "ftimings: Warning: Virtual memory recording is disabled, expect zeros!"
endif
endif
if (present(print_max_allocated_memory)) then
self%print_max_allocated_memory = print_max_allocated_memory
if ((.not. self%record_max_allocated_memory) .and. self%print_max_allocated_memory) then
write(0,'(a)') "ftimings: Warning: HWM recording was disabled, expect zeros!"
write(0,'(a)') "ftimings: Warning: HWM recording is disabled, expect zeros!"
endif
endif
if (present(print_flop_count)) then
self%print_flop_count = print_flop_count
if ((.not. self%record_flop_counts) .and. self%print_flop_count) then
write(0,'(a)') "ftimings: Warning: FLOP counter was disabled, expect zeros!"
if ((.not. self%record_flop_count) .and. self%print_flop_count) then
write(0,'(a)') "ftimings: Warning: FLOP counter is disabled, expect zeros!"
endif
endif
if (present(print_flop_rate)) then
self%print_flop_rate = print_flop_rate
if ((.not. self%record_flop_counts) .and. self%print_flop_rate) then
write(0,'(a)') "ftimings: Warning: FLOP counter was disabled, expect zeros!"
if ((.not. self%record_flop_count) .and. self%print_flop_rate) then
write(0,'(a)') "ftimings: Warning: FLOP counter is disabled, expect zeros!"
endif
endif
if (present(print_cycle_count)) then
self%print_cycle_count = print_cycle_count
if ((.not. self%record_cycle_count) .and. self%print_cycle_count) then
write(0,'(a)') "ftimings: Warning: Cycle counter is disabled, expect zeros!"
endif
endif
......@@ -542,7 +590,7 @@ module ftimings
if (present(print_ai)) then
self%print_ai = print_ai
if (.not. (self%record_memory_bandwidth .and. self%record_flop_counts)) then
if (.not. (self%record_memory_bandwidth .and. self%record_flop_count)) then
write(0,'(a)') "ftimings: Warning: Memory bandwidth or FLOP counters were disabled, expect invalid values for AI"
endif
endif
......@@ -824,8 +872,9 @@ module ftimings
character(len=12), parameter :: ram = " alloc. RAM"
character(len=12), parameter :: vmem = " alloc. VM"
character(len=12), parameter :: hwm = " alloc. HWM"
character(len=12), parameter :: flop_rate = " float op/s"
character(len=12), parameter :: flop_count = "float op cnt"
character(len=12), parameter :: flop_rate = " float op/s"
character(len=12), parameter :: cycle_count = " CPU Cycles"
character(len=12), parameter :: mem_reads = " RAM read"
character(len=12), parameter :: mem_writes = " RAM written"
character(len=12), parameter :: bandwidth_read = " RAM read/s"
......@@ -862,6 +911,9 @@ module ftimings
if (self%print_flop_rate) then
write(unit_act,'(1x,a12)',advance='no') flop_rate
endif
if (self%print_cycle_count) then
write(unit_act,'(1x,a12)',advance='no') cycle_count
endif
if (self%print_memory_transferred) then
write(unit_act,'(1x,a12)',advance='no') mem_reads
write(unit_act,'(1x,a12)',advance='no') mem_writes
......@@ -897,6 +949,9 @@ module ftimings
if (self%print_flop_rate) then
write(unit_act,'(1x,a12)',advance='no') dash
endif
if (self%print_cycle_count) then
write(unit_act,'(1x,a12)',advance='no') dash
endif
if (self%print_memory_transferred) then
write(unit_act,'(1x,a12)',advance='no') dash
write(unit_act,'(1x,a12)',advance='no') dash
......@@ -1251,8 +1306,8 @@ module ftimings
endif
#ifdef HAVE_LIBPAPI
if (self%timer%record_flop_counts) then
call flop_counter(val%flop_count)
if (self%timer%record_flop_count) then
call papi_counters(val%flop_count, val%cycle_count)
endif
#endif
......@@ -1750,6 +1805,9 @@ module ftimings
if (timer%print_flop_rate) then
write(unit,'(1x,a12)',advance='no') nice_format_10(real(value%flop_count, kind=rk) / (value%micros * 1e-6_rk))
endif
if (timer%print_cycle_count) then
write(unit,'(1x,a12)',advance='no') nice_format_10(real(value%cycle_count, kind=rk))
endif
if (timer%print_memory_transferred) then
write(unit,'(1x,a12)',advance='no') nice_format_2(real(value%mem_reads, kind=rk))
write(unit,'(1x,a12)',advance='no') nice_format_2(real(value%mem_writes, kind=rk))
......@@ -1935,7 +1993,12 @@ module ftimings
stop "c_error_handler_wrapper: Unknown runtime type for 'handle': This should not happen"
end select
base_type_timer => timer
select type (timer)
type is (timer_t)
base_type_timer => timer
class default
stop "c_error_handler_wrapper: Unknown runtime type for 'timer': This should not happen"
end select
call c_f_procpointer(c_error_handle%c_handler, c_handler)
call c_handler(c_loc(base_type_timer), c_error_handle%c_handle, trim(message) // c_null_char)
......@@ -2019,6 +2082,16 @@ module ftimings
call timer%measure_flops(enabled == 1)
end subroutine
!c> void ftimings_measure_cycles(ftimer_t *timer, int enabled);
subroutine ftimings_measure_cycles(ptr, enabled) bind(c, name="ftimings_measure_cycles")
type(c_ptr), value, intent(in) :: ptr
integer(kind=c_int), value, intent(in) :: enabled
type(timer_t), pointer :: timer
call c_f_pointer(ptr, timer)
call timer%measure_cycles(enabled == 1)
end subroutine
!c> int ftimings_is_enabled(ftimer_t *timer);
function ftimings_is_enabled(ptr) result(is) bind(c, name="ftimings_is_enabled")
type(c_ptr), value, intent(in) :: ptr
......@@ -2087,6 +2160,17 @@ module ftimings
call timer%set_print_options(print_flop_rate = enabled == 1)
end subroutine
!c> void ftimings_print_cycle_count(ftimer_t *timer, int enable);
subroutine ftimings_print_cycle_count(ptr, enabled) bind(c, name="ftimings_print_cycle_count")
type(c_ptr), value, intent(in) :: ptr
integer(kind=c_int), value, intent(in) :: enabled
type(timer_t), pointer :: timer
call c_f_pointer(ptr, timer)
call timer%set_print_options(print_cycle_count = enabled == 1)
end subroutine
!c> void ftimings_print_memory_transferred(ftimer_t *timer, int enable);
subroutine ftimings_print_memory_transferred(ptr, enabled) bind(c, name="ftimings_print_memory_transferred")
type(c_ptr), value, intent(in) :: ptr
......
......@@ -28,13 +28,14 @@ module ftimings_value
type value_t
integer(kind=C_INT64_T) :: micros = 0 ! microseconds spent in this node
integer(kind=C_LONG) :: virtualmem = 0 ! newly created virtual memory
integer(kind=C_LONG) :: maxrsssize = 0 ! newly used max. resident mem ("high water mark")
integer(kind=C_LONG) :: maxrsssize = 0 ! newly used max. resident mem ("high water mark")
integer(kind=C_LONG) :: rsssize = 0 ! newly used resident memory
integer(kind=C_LONG_LONG) :: flop_count = 0 ! floating point operations done in this node
integer(kind=C_INT64_T) :: mem_reads = 0 ! bytes read from DRAM
integer(kind=C_INT64_T) :: mem_writes = 0 ! bytes written to DRAM
integer(kind=C_LONG_LONG) :: cycle_count = 0 ! cpu cycles done in this node
integer(kind=C_INT64_T) :: mem_reads = 0 ! bytes read from main RAM
integer(kind=C_INT64_T) :: mem_writes = 0 ! bytes written to main RAM
end type
interface operator(+)
......@@ -50,7 +51,8 @@ module ftimings_value
rsssize = 0, &
virtualmem = 0, &
maxrsssize = 0, &
flop_count = 0)
flop_count = 0, &
cycle_count = 0)
contains
......@@ -63,6 +65,7 @@ module ftimings_value
c%maxrsssize = a%maxrsssize + b%maxrsssize
#ifdef HAVE_LIBPAPI
c%flop_count = a%flop_count + b%flop_count
c%cycle_count = a%cycle_count + b%cycle_count
#endif
#ifdef HAVE_PERF
c%mem_reads = a%mem_reads + b%mem_reads
......@@ -79,6 +82,7 @@ module ftimings_value
c%maxrsssize = a%maxrsssize - b%maxrsssize
#ifdef HAVE_LIBPAPI
c%flop_count = a%flop_count - b%flop_count
c%cycle_count = a%cycle_count - b%cycle_count
#endif
#ifdef HAVE_PERF
c%mem_reads = a%mem_reads - b%mem_reads
......@@ -95,6 +99,7 @@ module ftimings_value
neg_a%maxrsssize = - a%maxrsssize
#ifdef HAVE_LIBPAPI
neg_a%flop_count = - a%flop_count
neg_a%cycle_count = - a%cycle_count
#endif
#ifdef HAVE_PERF
neg_a%mem_reads = - a%mem_reads
......
......@@ -29,7 +29,8 @@ static int event_set;
static int tried_papi_init = 0;
static int papi_available = 0;
static int flops_available = 0;
static int flop_available = 0;
static int cycle_available = 0;
#ifdef HAVE_LIBPAPI
#include <papi.h>
......@@ -75,15 +76,36 @@ static int papi_init(void) {
if ((ret = PAPI_query_event(PAPI_DP_OPS)) < 0) {
fprintf(stderr, "ftimings: %s:%d: PAPI_query_event(PAPI_DP_INS): %s\n",
__FILE__, __LINE__, PAPI_strerror(ret));
flops_available = 0;
flop_available = 0;
} else if ((ret = PAPI_add_event(event_set, PAPI_DP_OPS)) < 0) {
fprintf(stderr, "ftimings: %s:%d PAPI_add_event(): %s\n",
__FILE__, __LINE__, PAPI_strerror(ret));
flops_available = 0;
flop_available = 0;
} else {
flops_available = 1;
flop_available = 1;
}
/* Cycle counter
*/
if ((ret = PAPI_query_event(PAPI_TOT_CYC)) < 0) {
fprintf(stderr, "ftimings: %s:%d: PAPI_query_event(PAPI_TOT_CYC): %s\n",
__FILE__, __LINE__, PAPI_strerror(ret));
cycle_available = 0;
} else if ((ret = PAPI_add_event(event_set, PAPI_TOT_CYC)) < 0) {
fprintf(stderr, "ftimings: %s:%d PAPI_add_event(): %s\n",
__FILE__, __LINE__, PAPI_strerror(ret));
cycle_available = 0;
} else {
cycle_available = 1;
}
/* Start */
if ((ret = PAPI_start(event_set)) < 0) {
fprintf(stderr, "ftimings: %s:%d PAPI_start(): %s\n",
__FILE__, __LINE__, PAPI_strerror(ret));
papi_available = 0;
break;
}
/* PAPI works */
papi_available = 1;
......@@ -99,11 +121,19 @@ int ftimings_flop_init(void) {
papi_init();
}
return flops_available;
return flop_available;
}
void ftimings_flop_counter(long long *flop) {
long long res[3];
int ftimings_cycle_init(void) {
if (!tried_papi_init) {
papi_init();
}
return cycle_available;
}
void ftimings_papi_counters(long long *flop, long long *cycles) {
long long res[2];
int i, ret;
if ((ret = PAPI_read(event_set, &res[0])) < 0) {
......@@ -112,10 +142,15 @@ void ftimings_flop_counter(long long *flop) {
}
i = 0;
if (flops_available) {
if (flop_available) {
*flop = res[i++];
} else {
*flop = 0LL;
}
if (cycle_available) {
*cycles = res[i++];
} else {
*cycles = 0LL;
}
}
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment