Commit 9a2fa0e7 authored by Andreas Marek's avatar Andreas Marek
Browse files

Introduce option to record max. resident memory

parent 5a8923f4
......@@ -13,6 +13,7 @@ libftimings_@FTIMINGS_API_VERSION@_@FC@_la_SOURCES = \
ftimings/papi.c \
ftimings/resident_set_size.c \
ftimings/virtual_memory.c \
ftimings/highwater_mark.c \
ftimings/ftimings_type.F90 \
ftimings/ftimings_value.F90 \
ftimings/ftimings.F90
......
......@@ -28,6 +28,7 @@ module ftimings
timer_measure_flops, &
timer_measure_allocated_memory, &
timer_measure_virtual_memory, &
timer_measure_max_allocated_memory, &
timer_measure_memory_bandwidth
character(len=name_length), private, parameter :: own = "(own)"
......@@ -63,10 +64,12 @@ module ftimings
logical, private :: active = .false. !< If set to .false., most operations return immediately without any action
logical, private :: record_allocated_memory = .false. !< IF set to .true., record also the current resident set size
logical, private :: record_virtual_memory = .false. !< IF set to .true., record also the virtual memory
logical, private :: record_max_allocated_memory = .false. !< IF set to .true., record also the max resident set size ("high water mark")
logical, private :: record_flop_counts = .false. !< If set to .true., record also FLOP counts via PAPI calls
logical, private :: record_memory_bandwidth = .false. !< If set to .true., record also FLOP counts via PAPI calls
logical, private :: print_allocated_memory = .false.
logical, private :: print_max_allocated_memory = .false.
logical, private :: print_virtual_memory = .false.
logical, private :: print_flop_count = .false.
logical, private :: print_flop_rate = .false.
......@@ -88,6 +91,7 @@ module ftimings
procedure, pass :: measure_flops => timer_measure_flops
procedure, pass :: measure_allocated_memory => timer_measure_allocated_memory
procedure, pass :: measure_virtual_memory => timer_measure_virtual_memory
procedure, pass :: measure_max_allocated_memory => timer_measure_max_allocated_memory
procedure, pass :: measure_memory_bandwidth => timer_measure_memory_bandwidth
procedure, pass :: set_print_options => timer_set_print_options
procedure, pass :: in_entries => timer_in_entries
......@@ -173,6 +177,14 @@ module ftimings
end function
end interface
interface
function max_resident_set_size() result(maxrsssize) bind(C, name="ftimings_highwater_mark")
use, intrinsic :: iso_c_binding
implicit none
integer(kind=C_LONG) :: maxrsssize
end function
end interface
contains
!> Activate the timer, without this, most methods are non-ops.
......@@ -209,6 +221,20 @@ module ftimings
self%record_virtual_memory = enabled
end subroutine
!> Call with enabled = .true. to also record amount of newly increase of max.
!> resident memory
!> By default, memory usage is not recored. Call with .false. to deactivate again.
!>
!> This opens /proc/self/status, parses it, and closes it agagain and is thus
!> quite costly, use when appropriate.
!>
subroutine timer_measure_max_allocated_memory(self, enabled)
class(timer_t), intent(inout) :: self
logical, intent(in) :: enabled
self%record_max_allocated_memory = enabled
end subroutine
!> Call with enabled = .true. to also record the memory bandwidth with PAPI
!> By default, this is not recorded. Call with .false. to deactivate again.
!>
......@@ -276,20 +302,26 @@ module ftimings
!> Control what to print on following %print calls
!>
!> \param print_allocated_memory Amount of newly allocated, resident memory
!> \param print_virtual_memory Amount of newly created virtual memory
!> \param print_flop_count Number of floating point operations
!> \param print_flop_rate Rate of floating point operations per second
!> \param print_ldst Number of loads+stores
!> \param print_memory_bandwidth Rate of loads+stores per second
!> \param print_ai Arithmetic intensity, that is number of
!> floating point operations per number of
!> load and store operations (currently untested)
!> \param bytes_per_ldst For calculating the AI, assume this number
!> of bytes per load or store (default: 8)
!> \param print_allocated_memory Amount of newly allocated,
!> resident memory
!> \param print_virtual_memory Amount of newly created virtual
!> memory
!> \param print_max_allocated_memory Amount of new increase of max.
!> resident memory ("high water mark")
!> \param print_flop_count Number of floating point operations
!> \param print_flop_rate Rate of floating point operations per second
!> \param print_ldst Number of loads+stores
!> \param print_memory_bandwidth Rate of loads+stores per second
!> \param print_ai Arithmetic intensity, that is number of
!> floating point operations per
!> number of load and store
!> operations (currently untested)
!> \param bytes_per_ldst For calculating the AI, assume this number
!> of bytes per load or store (default: 8)
subroutine timer_set_print_options(self, &
print_allocated_memory, &
print_virtual_memory, &
print_max_allocated_memory, &
print_flop_count, &
print_flop_rate, &
print_ldst, &
......@@ -300,6 +332,7 @@ module ftimings
logical, intent(in), optional :: &
print_allocated_memory, &
print_virtual_memory, &
print_max_allocated_memory, &
print_flop_count, &
print_flop_rate, &
print_ldst, &
......@@ -321,6 +354,13 @@ module ftimings
endif
endif
if (present(print_max_allocated_memory)) then
self%print_max_allocated_memory = print_max_allocated_memory
if ((.not. self%record_max_allocated_memory) .and. self%print_max_allocated_memory) then
write(0,'(a)') "ftimings: Warning: HWM recording was disabled, expect zeros!"
endif
endif
if (present(print_flop_count)) then
self%print_flop_count = print_flop_count
if ((.not. self%record_flop_counts) .and. self%print_flop_count) then
......@@ -556,6 +596,7 @@ module ftimings
character(len=12), parameter :: fract = " fraction"
character(len=12), parameter :: ram = " alloc. RAM"
character(len=12), parameter :: vmem = " alloc. VM"
character(len=12), parameter :: hwm = " alloc. HWM"
character(len=12), parameter :: flop_rate = " Mflop/s"
character(len=12), parameter :: flop_count = " Mflop"
character(len=12), parameter :: ldst = "loads+stores"
......@@ -615,6 +656,10 @@ module ftimings
write(unit_act,'(1x,a12)',advance='no') vmem
endif
if (self%print_max_allocated_memory) then
write(unit_act,'(1x,a12)',advance='no') hwm
endif
if (self%print_flop_count) then
write(unit_act,'(1x,a12)',advance='no') flop_count
endif
......@@ -644,6 +689,10 @@ module ftimings
write(unit_act,'(1x,a12)',advance='no') dash
endif
if (self%print_max_allocated_memory) then
write(unit_act,'(1x,a12)',advance='no') dash
endif
if (self%print_flop_count) then
write(unit_act,'(1x,a12)',advance='no') dash
endif
......@@ -885,6 +934,10 @@ module ftimings
val%virtualmem = virtual_memory()
endif
if (self%timer%record_max_allocated_memory) then
val%maxrsssize = max_resident_set_size()
endif
#ifdef HAVE_LIBPAPI
if (self%timer%record_flop_counts .or. self%timer%record_memory_bandwidth) then
call papi_counters(val%flop_count, val%ldst)
......@@ -1347,6 +1400,11 @@ module ftimings
nice_format(real(value%virtualmem, kind=C_DOUBLE))
endif
if (timer%print_max_allocated_memory) then
write(unit,'(1x,a12)',advance='no') &
nice_format(real(value%maxrsssize, kind=C_DOUBLE))
endif
if (timer%print_flop_count) then
write(unit,'(1x,f12.2)',advance='no') real(value%flop_count, kind=rk) / 1e6_rk
endif
......
......@@ -10,6 +10,8 @@ module ftimings_value
type value_t
integer(kind=C_INT64_T) :: micros = 0 ! microseconds spent in this node
integer(kind=C_LONG) :: virtualmem = 0 ! newly created virtual memory
integer(kind=C_LONG) :: maxrsssize = 0 ! newly used max. resident mem ("high water mark")
integer(kind=C_LONG) :: rsssize = 0 ! newly used resident memory
integer(kind=C_LONG_LONG) :: flop_count = 0 ! floating point operations done in this node
......@@ -28,6 +30,7 @@ module ftimings_value
type(value_t), parameter :: null_value = value_t(micros = 0, &
rsssize = 0, &
virtualmem = 0, &
maxrsssize = 0, &
flop_count = 0)
contains
......@@ -38,6 +41,7 @@ module ftimings_value
c%micros = a%micros + b%micros
c%rsssize = a%rsssize + b%rsssize
c%virtualmem = a%virtualmem + b%virtualmem
c%maxrsssize = a%maxrsssize + b%maxrsssize
#ifdef HAVE_LIBPAPI
c%flop_count = a%flop_count + b%flop_count
c%ldst = a%ldst + b%ldst
......@@ -50,6 +54,7 @@ module ftimings_value
c%micros = a%micros - b%micros
c%rsssize = a%rsssize - b%rsssize
c%virtualmem = a%virtualmem - b%virtualmem
c%maxrsssize = a%maxrsssize - b%maxrsssize
#ifdef HAVE_LIBPAPI
c%flop_count = a%flop_count - b%flop_count
c%ldst = a%ldst - b%ldst
......@@ -62,6 +67,7 @@ module ftimings_value
neg_a%micros = - a%micros
neg_a%rsssize = - a%rsssize
neg_a%virtualmem = - a%virtualmem
neg_a%maxrsssize = - a%maxrsssize
#ifdef HAVE_LIBPAPI
neg_a%flop_count = - a%flop_count
neg_a%ldst = - a%ldst
......
#include <sys/types.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
long ftimings_highwater_mark() {
long hwm = 0L;
FILE* fp = NULL;
char *line;
char *vmhwm;
size_t len;
vmhwm = NULL;
line = malloc(128);
len = 128;
if ((fp = fopen( "/proc/self/status", "r" )) == NULL ) {
return 0L;
}
/* Read memory size data from /proc/pid/status */
while (!vmhwm)
{
if (getline(&line, &len, fp) == -1)
{
/* Some of the information isn't there, die */
return 1;
}
// /* Find VmPeak */
//if (!strncmp(line, "VmPeak:", 7))
// {
//vmpeak = strdup(&line[7]);
// }
//*/
// /* Find VmSize */
// else if (!strncmp(line, "VmSize:", 7))
//{
// vmsize = strdup(&line[7]);
//}
///* Find VmRSS */
//else if (!strncmp(line, "VmRSS:", 6))
//{
// vmrss = strdup(&line[7]);
//}
/* Find VmHWM */
if (!strncmp(line, "VmHWM:", 6))
{
vmhwm = strdup(&line[7]);
}
}
free(line);
fclose(fp);
/* Get rid of " kB\n"*/
len = strlen(vmhwm);
vmhwm[len - 4] = 0;
hwm = atol(vmhwm)*1024L;
// printf("in c: %s \n",vmhwm);
//printf("in c: %d \n",hwm);
// printf("in c: %d \n",sysconf( _SC_PAGESIZE));
return hwm ;
//* sysconf( _SC_PAGESIZE);
}
......@@ -9,11 +9,13 @@ program test_timings
call timer%measure_allocated_memory(.true.)
call timer%measure_virtual_memory(.true.)
call timer%measure_memory_bandwidth(.true.)
call timer%measure_max_allocated_memory(.true.)
call timer%set_print_options(&
print_flop_count=.true., &
print_flop_rate=.true., &
print_virtual_memory=.true., &
print_max_allocated_memory=.true., &
print_memory_bandwidth=.true., &
print_ai=.true., bytes_per_ldst=16)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment