Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Lorenz Huedepohl
ftimings
Commits
65eecca1
Commit
65eecca1
authored
Mar 02, 2015
by
Lorenz Huedepohl
Browse files
New feature: measure CPU cycles
parent
bf2857e1
Changes
3
Hide whitespace changes
Inline
Side-by-side
ftimings/ftimings.F90
View file @
65eecca1
...
...
@@ -57,6 +57,7 @@ module ftimings
timer_sort
,
&
timer_set_print_options
,
&
timer_measure_flops
,
&
timer_measure_cycles
,
&
timer_measure_allocated_memory
,
&
timer_measure_virtual_memory
,
&
timer_measure_max_allocated_memory
,
&
...
...
@@ -118,14 +119,16 @@ module ftimings
logical
,
private
::
record_allocated_memory
=
.false.
!< IF set to .true., record also the current resident set size
logical
,
private
::
record_virtual_memory
=
.false.
!< IF set to .true., record also the virtual memory
logical
,
private
::
record_max_allocated_memory
=
.false.
!< IF set to .true., record also the max resident set size ("high water mark")
logical
,
private
::
record_flop_counts
=
.false.
!< If set to .true., record also FLOP counts via PAPI calls
logical
,
private
::
record_memory_bandwidth
=
.false.
!< If set to .true., record also memory bandwidth via PAPI calls
logical
,
private
::
record_flop_count
=
.false.
!< If set to .true., record also FLOP counts via PAPI calls
logical
,
private
::
record_cycle_count
=
.false.
!< If set to .true., record also CPU cycle counts via PAPI calls
logical
,
private
::
record_memory_bandwidth
=
.false.
!< If set to .true., record also memory bandwidth via Linut perf syscalls
logical
,
private
::
print_allocated_memory
=
.false.
logical
,
private
::
print_max_allocated_memory
=
.false.
logical
,
private
::
print_virtual_memory
=
.false.
logical
,
private
::
print_flop_count
=
.false.
logical
,
private
::
print_flop_rate
=
.false.
logical
,
private
::
print_cycle_count
=
.false.
logical
,
private
::
print_memory_transferred
=
.false.
logical
,
private
::
print_memory_bandwidth
=
.false.
logical
,
private
::
print_ai
=
.false.
...
...
@@ -148,6 +151,7 @@ module ftimings
procedure
,
pass
::
disable
=>
timer_disable
procedure
,
pass
::
is_enabled
=>
timer_is_enabled
procedure
,
pass
::
measure_flops
=>
timer_measure_flops
procedure
,
pass
::
measure_cycles
=>
timer_measure_cycles
procedure
,
pass
::
measure_allocated_memory
=>
timer_measure_allocated_memory
procedure
,
pass
::
measure_virtual_memory
=>
timer_measure_virtual_memory
procedure
,
pass
::
measure_max_allocated_memory
=>
timer_measure_max_allocated_memory
...
...
@@ -229,10 +233,18 @@ module ftimings
end
interface
interface
pure
subroutine
flop_counter
(
flop
)
bind
(
C
,
name
=
"ftimings_
flop_counter
"
)
function
cycle_init
()
result
(
ret
)
bind
(
C
,
name
=
"ftimings_
cycle_init
"
)
use
,
intrinsic
::
iso_c_binding
implicit
none
integer
(
kind
=
c_long_long
),
intent
(
out
)
::
flop
integer
(
kind
=
c_int
)
::
ret
end
function
end
interface
interface
pure
subroutine
papi_counters
(
flop
,
cycles
)
bind
(
C
,
name
=
"ftimings_papi_counters"
)
use
,
intrinsic
::
iso_c_binding
implicit
none
integer
(
kind
=
c_long_long
),
intent
(
out
)
::
flop
,
cycles
end
subroutine
end
interface
#endif
...
...
@@ -423,18 +435,44 @@ module ftimings
if
(
enabled
)
then
#ifdef HAVE_LIBPAPI
if
(
flop_init
()
==
1
)
then
self
%
record_flop_count
s
=
.true.
self
%
record_flop_count
=
.true.
else
write
(
0
,
'(a)'
)
"ftimings: Could not initialize PAPI, disabling FLOP counter"
self
%
record_flop_count
s
=
.false.
self
%
record_flop_count
=
.false.
endif
#else
write
(
0
,
'(a)'
)
"ftimings: not compiled with PAPI support, disabling FLOP counter"
self
%
record_flop_counts
=
.false.
self
%
record_flop_count
=
.false.
#endif
else
! Explicitly set to .false. by caller
self
%
record_flop_count
=
.false.
endif
end
subroutine
!> Call with enabled = .true. to also record FLOP counts via PAPI calls.
!> By default no FLOPS are recored. Call with .false. to deactivate again.
!>
subroutine
timer_measure_cycles
(
self
,
enabled
)
class
(
timer_t
),
intent
(
inout
)
::
self
logical
,
intent
(
in
)
::
enabled
if
(
enabled
)
then
#ifdef HAVE_LIBPAPI
if
(
cycle_init
()
==
1
)
then
self
%
record_cycle_count
=
.true.
else
write
(
0
,
'(a)'
)
"ftimings: Could not initialize PAPI, disabling CPU cycle counter"
self
%
record_cycle_count
=
.false.
endif
#else
write
(
0
,
'(a)'
)
"ftimings: not compiled with PAPI support, disabling CPU cycle counter"
self
%
record_cycle_count
=
.false.
#endif
else
! Explicitly set to .false. by caller
self
%
record_
flop
_count
s
=
.false.
self
%
record_
cycle
_count
=
.false.
endif
end
subroutine
...
...
@@ -466,7 +504,8 @@ module ftimings
!> resident memory ("high water mark")
!> \param print_flop_count Number of floating point operations
!> \param print_flop_rate Rate of floating point operations per second
!> \param print_memory_transferred Memory transferred from RAM to CPU
!> \param print_cycle_count Number of CPU cycles
!> \param print_memory_transferred Memory transferred from RAM to CPU
!> \param print_memory_bandwidth Memory bandwidth from RAM to CPU
!> \param print_ai Arithmetic intensity, that is number of
!> floating point operations per
...
...
@@ -478,6 +517,7 @@ module ftimings
print_max_allocated_memory
,
&
print_flop_count
,
&
print_flop_rate
,
&
print_cycle_count
,
&
print_memory_transferred
,
&
print_memory_bandwidth
,
&
print_ai
)
...
...
@@ -488,6 +528,7 @@ module ftimings
print_max_allocated_memory
,
&
print_flop_count
,
&
print_flop_rate
,
&
print_cycle_count
,
&
print_memory_transferred
,
&
print_memory_bandwidth
,
&
print_ai
...
...
@@ -495,35 +536,42 @@ module ftimings
if
(
present
(
print_allocated_memory
))
then
self
%
print_allocated_memory
=
print_allocated_memory
if
((
.not.
self
%
record_allocated_memory
)
.and.
self
%
print_allocated_memory
)
then
write
(
0
,
'(a)'
)
"ftimings: Warning: RSS size recording
wa
s disabled, expect zeros!"
write
(
0
,
'(a)'
)
"ftimings: Warning: RSS size recording
i
s disabled, expect zeros!"
endif
endif
if
(
present
(
print_virtual_memory
))
then
self
%
print_virtual_memory
=
print_virtual_memory
if
((
.not.
self
%
record_virtual_memory
)
.and.
self
%
print_virtual_memory
)
then
write
(
0
,
'(a)'
)
"ftimings: Warning: Virtual memory recording
wa
s disabled, expect zeros!"
write
(
0
,
'(a)'
)
"ftimings: Warning: Virtual memory recording
i
s disabled, expect zeros!"
endif
endif
if
(
present
(
print_max_allocated_memory
))
then
self
%
print_max_allocated_memory
=
print_max_allocated_memory
if
((
.not.
self
%
record_max_allocated_memory
)
.and.
self
%
print_max_allocated_memory
)
then
write
(
0
,
'(a)'
)
"ftimings: Warning: HWM recording
wa
s disabled, expect zeros!"
write
(
0
,
'(a)'
)
"ftimings: Warning: HWM recording
i
s disabled, expect zeros!"
endif
endif
if
(
present
(
print_flop_count
))
then
self
%
print_flop_count
=
print_flop_count
if
((
.not.
self
%
record_flop_count
s
)
.and.
self
%
print_flop_count
)
then
write
(
0
,
'(a)'
)
"ftimings: Warning: FLOP counter
wa
s disabled, expect zeros!"
if
((
.not.
self
%
record_flop_count
)
.and.
self
%
print_flop_count
)
then
write
(
0
,
'(a)'
)
"ftimings: Warning: FLOP counter
i
s disabled, expect zeros!"
endif
endif
if
(
present
(
print_flop_rate
))
then
self
%
print_flop_rate
=
print_flop_rate
if
((
.not.
self
%
record_flop_counts
)
.and.
self
%
print_flop_rate
)
then
write
(
0
,
'(a)'
)
"ftimings: Warning: FLOP counter was disabled, expect zeros!"
if
((
.not.
self
%
record_flop_count
)
.and.
self
%
print_flop_rate
)
then
write
(
0
,
'(a)'
)
"ftimings: Warning: FLOP counter is disabled, expect zeros!"
endif
endif
if
(
present
(
print_cycle_count
))
then
self
%
print_cycle_count
=
print_cycle_count
if
((
.not.
self
%
record_cycle_count
)
.and.
self
%
print_cycle_count
)
then
write
(
0
,
'(a)'
)
"ftimings: Warning: Cycle counter is disabled, expect zeros!"
endif
endif
...
...
@@ -542,7 +590,7 @@ module ftimings
if
(
present
(
print_ai
))
then
self
%
print_ai
=
print_ai
if
(
.not.
(
self
%
record_memory_bandwidth
.and.
self
%
record_flop_count
s
))
then
if
(
.not.
(
self
%
record_memory_bandwidth
.and.
self
%
record_flop_count
))
then
write
(
0
,
'(a)'
)
"ftimings: Warning: Memory bandwidth or FLOP counters were disabled, expect invalid values for AI"
endif
endif
...
...
@@ -824,8 +872,9 @@ module ftimings
character
(
len
=
12
),
parameter
::
ram
=
" alloc. RAM"
character
(
len
=
12
),
parameter
::
vmem
=
" alloc. VM"
character
(
len
=
12
),
parameter
::
hwm
=
" alloc. HWM"
character
(
len
=
12
),
parameter
::
flop_rate
=
" float op/s"
character
(
len
=
12
),
parameter
::
flop_count
=
"float op cnt"
character
(
len
=
12
),
parameter
::
flop_rate
=
" float op/s"
character
(
len
=
12
),
parameter
::
cycle_count
=
" CPU Cycles"
character
(
len
=
12
),
parameter
::
mem_reads
=
" RAM read"
character
(
len
=
12
),
parameter
::
mem_writes
=
" RAM written"
character
(
len
=
12
),
parameter
::
bandwidth_read
=
" RAM read/s"
...
...
@@ -862,6 +911,9 @@ module ftimings
if
(
self
%
print_flop_rate
)
then
write
(
unit_act
,
'(1x,a12)'
,
advance
=
'no'
)
flop_rate
endif
if
(
self
%
print_cycle_count
)
then
write
(
unit_act
,
'(1x,a12)'
,
advance
=
'no'
)
cycle_count
endif
if
(
self
%
print_memory_transferred
)
then
write
(
unit_act
,
'(1x,a12)'
,
advance
=
'no'
)
mem_reads
write
(
unit_act
,
'(1x,a12)'
,
advance
=
'no'
)
mem_writes
...
...
@@ -897,6 +949,9 @@ module ftimings
if
(
self
%
print_flop_rate
)
then
write
(
unit_act
,
'(1x,a12)'
,
advance
=
'no'
)
dash
endif
if
(
self
%
print_cycle_count
)
then
write
(
unit_act
,
'(1x,a12)'
,
advance
=
'no'
)
dash
endif
if
(
self
%
print_memory_transferred
)
then
write
(
unit_act
,
'(1x,a12)'
,
advance
=
'no'
)
dash
write
(
unit_act
,
'(1x,a12)'
,
advance
=
'no'
)
dash
...
...
@@ -1251,8 +1306,8 @@ module ftimings
endif
#ifdef HAVE_LIBPAPI
if
(
self
%
timer
%
record_flop_count
s
)
then
call
flop
_counter
(
val
%
flop_count
)
if
(
self
%
timer
%
record_flop_count
)
then
call
papi
_counter
s
(
val
%
flop_count
,
val
%
cycle_count
)
endif
#endif
...
...
@@ -1750,6 +1805,9 @@ module ftimings
if
(
timer
%
print_flop_rate
)
then
write
(
unit
,
'(1x,a12)'
,
advance
=
'no'
)
nice_format_10
(
real
(
value
%
flop_count
,
kind
=
rk
)
/
(
value
%
micros
*
1e-6_rk
))
endif
if
(
timer
%
print_cycle_count
)
then
write
(
unit
,
'(1x,a12)'
,
advance
=
'no'
)
nice_format_10
(
real
(
value
%
cycle_count
,
kind
=
rk
))
endif
if
(
timer
%
print_memory_transferred
)
then
write
(
unit
,
'(1x,a12)'
,
advance
=
'no'
)
nice_format_2
(
real
(
value
%
mem_reads
,
kind
=
rk
))
write
(
unit
,
'(1x,a12)'
,
advance
=
'no'
)
nice_format_2
(
real
(
value
%
mem_writes
,
kind
=
rk
))
...
...
@@ -1935,7 +1993,12 @@ module ftimings
stop
"c_error_handler_wrapper: Unknown runtime type for 'handle': This should not happen"
end
select
base_type_timer
=>
timer
select
type
(
timer
)
type
is
(
timer_t
)
base_type_timer
=>
timer
class
default
stop
"c_error_handler_wrapper: Unknown runtime type for 'timer': This should not happen"
end
select
call
c_f_procpointer
(
c_error_handle
%
c_handler
,
c_handler
)
call
c_handler
(
c_loc
(
base_type_timer
),
c_error_handle
%
c_handle
,
trim
(
message
)
//
c_null_char
)
...
...
@@ -2019,6 +2082,16 @@ module ftimings
call
timer
%
measure_flops
(
enabled
==
1
)
end
subroutine
!c> void ftimings_measure_cycles(ftimer_t *timer, int enabled);
subroutine
ftimings_measure_cycles
(
ptr
,
enabled
)
bind
(
c
,
name
=
"ftimings_measure_cycles"
)
type
(
c_ptr
),
value
,
intent
(
in
)
::
ptr
integer
(
kind
=
c_int
),
value
,
intent
(
in
)
::
enabled
type
(
timer_t
),
pointer
::
timer
call
c_f_pointer
(
ptr
,
timer
)
call
timer
%
measure_cycles
(
enabled
==
1
)
end
subroutine
!c> int ftimings_is_enabled(ftimer_t *timer);
function
ftimings_is_enabled
(
ptr
)
result
(
is
)
bind
(
c
,
name
=
"ftimings_is_enabled"
)
type
(
c_ptr
),
value
,
intent
(
in
)
::
ptr
...
...
@@ -2087,6 +2160,17 @@ module ftimings
call
timer
%
set_print_options
(
print_flop_rate
=
enabled
==
1
)
end
subroutine
!c> void ftimings_print_cycle_count(ftimer_t *timer, int enable);
subroutine
ftimings_print_cycle_count
(
ptr
,
enabled
)
bind
(
c
,
name
=
"ftimings_print_cycle_count"
)
type
(
c_ptr
),
value
,
intent
(
in
)
::
ptr
integer
(
kind
=
c_int
),
value
,
intent
(
in
)
::
enabled
type
(
timer_t
),
pointer
::
timer
call
c_f_pointer
(
ptr
,
timer
)
call
timer
%
set_print_options
(
print_cycle_count
=
enabled
==
1
)
end
subroutine
!c> void ftimings_print_memory_transferred(ftimer_t *timer, int enable);
subroutine
ftimings_print_memory_transferred
(
ptr
,
enabled
)
bind
(
c
,
name
=
"ftimings_print_memory_transferred"
)
type
(
c_ptr
),
value
,
intent
(
in
)
::
ptr
...
...
ftimings/ftimings_value.F90
View file @
65eecca1
...
...
@@ -28,13 +28,14 @@ module ftimings_value
type
value_t
integer
(
kind
=
C_INT64_T
)
::
micros
=
0
! microseconds spent in this node
integer
(
kind
=
C_LONG
)
::
virtualmem
=
0
! newly created virtual memory
integer
(
kind
=
C_LONG
)
::
maxrsssize
=
0
! newly used max. resident mem ("high water mark")
integer
(
kind
=
C_LONG
)
::
maxrsssize
=
0
! newly used max. resident mem ("high water mark")
integer
(
kind
=
C_LONG
)
::
rsssize
=
0
! newly used resident memory
integer
(
kind
=
C_LONG_LONG
)
::
flop_count
=
0
! floating point operations done in this node
integer
(
kind
=
C_INT64_T
)
::
mem_reads
=
0
! bytes read from DRAM
integer
(
kind
=
C_INT64_T
)
::
mem_writes
=
0
! bytes written to DRAM
integer
(
kind
=
C_LONG_LONG
)
::
cycle_count
=
0
! cpu cycles done in this node
integer
(
kind
=
C_INT64_T
)
::
mem_reads
=
0
! bytes read from main RAM
integer
(
kind
=
C_INT64_T
)
::
mem_writes
=
0
! bytes written to main RAM
end
type
interface
operator
(
+
)
...
...
@@ -50,7 +51,8 @@ module ftimings_value
rsssize
=
0
,
&
virtualmem
=
0
,
&
maxrsssize
=
0
,
&
flop_count
=
0
)
flop_count
=
0
,
&
cycle_count
=
0
)
contains
...
...
@@ -63,6 +65,7 @@ module ftimings_value
c
%
maxrsssize
=
a
%
maxrsssize
+
b
%
maxrsssize
#ifdef HAVE_LIBPAPI
c
%
flop_count
=
a
%
flop_count
+
b
%
flop_count
c
%
cycle_count
=
a
%
cycle_count
+
b
%
cycle_count
#endif
#ifdef HAVE_PERF
c
%
mem_reads
=
a
%
mem_reads
+
b
%
mem_reads
...
...
@@ -79,6 +82,7 @@ module ftimings_value
c
%
maxrsssize
=
a
%
maxrsssize
-
b
%
maxrsssize
#ifdef HAVE_LIBPAPI
c
%
flop_count
=
a
%
flop_count
-
b
%
flop_count
c
%
cycle_count
=
a
%
cycle_count
-
b
%
cycle_count
#endif
#ifdef HAVE_PERF
c
%
mem_reads
=
a
%
mem_reads
-
b
%
mem_reads
...
...
@@ -95,6 +99,7 @@ module ftimings_value
neg_a
%
maxrsssize
=
-
a
%
maxrsssize
#ifdef HAVE_LIBPAPI
neg_a
%
flop_count
=
-
a
%
flop_count
neg_a
%
cycle_count
=
-
a
%
cycle_count
#endif
#ifdef HAVE_PERF
neg_a
%
mem_reads
=
-
a
%
mem_reads
...
...
ftimings/papi.c
View file @
65eecca1
...
...
@@ -29,7 +29,8 @@ static int event_set;
static
int
tried_papi_init
=
0
;
static
int
papi_available
=
0
;
static
int
flops_available
=
0
;
static
int
flop_available
=
0
;
static
int
cycle_available
=
0
;
#ifdef HAVE_LIBPAPI
#include
<papi.h>
...
...
@@ -75,15 +76,36 @@ static int papi_init(void) {
if
((
ret
=
PAPI_query_event
(
PAPI_DP_OPS
))
<
0
)
{
fprintf
(
stderr
,
"ftimings: %s:%d: PAPI_query_event(PAPI_DP_INS): %s
\n
"
,
__FILE__
,
__LINE__
,
PAPI_strerror
(
ret
));
flop
s
_available
=
0
;
flop_available
=
0
;
}
else
if
((
ret
=
PAPI_add_event
(
event_set
,
PAPI_DP_OPS
))
<
0
)
{
fprintf
(
stderr
,
"ftimings: %s:%d PAPI_add_event(): %s
\n
"
,
__FILE__
,
__LINE__
,
PAPI_strerror
(
ret
));
flop
s
_available
=
0
;
flop_available
=
0
;
}
else
{
flop
s
_available
=
1
;
flop_available
=
1
;
}
/* Cycle counter
*/
if
((
ret
=
PAPI_query_event
(
PAPI_TOT_CYC
))
<
0
)
{
fprintf
(
stderr
,
"ftimings: %s:%d: PAPI_query_event(PAPI_TOT_CYC): %s
\n
"
,
__FILE__
,
__LINE__
,
PAPI_strerror
(
ret
));
cycle_available
=
0
;
}
else
if
((
ret
=
PAPI_add_event
(
event_set
,
PAPI_TOT_CYC
))
<
0
)
{
fprintf
(
stderr
,
"ftimings: %s:%d PAPI_add_event(): %s
\n
"
,
__FILE__
,
__LINE__
,
PAPI_strerror
(
ret
));
cycle_available
=
0
;
}
else
{
cycle_available
=
1
;
}
/* Start */
if
((
ret
=
PAPI_start
(
event_set
))
<
0
)
{
fprintf
(
stderr
,
"ftimings: %s:%d PAPI_start(): %s
\n
"
,
__FILE__
,
__LINE__
,
PAPI_strerror
(
ret
));
papi_available
=
0
;
break
;
}
/* PAPI works */
papi_available
=
1
;
...
...
@@ -99,11 +121,19 @@ int ftimings_flop_init(void) {
papi_init
();
}
return
flop
s
_available
;
return
flop_available
;
}
void
ftimings_flop_counter
(
long
long
*
flop
)
{
long
long
res
[
3
];
int
ftimings_cycle_init
(
void
)
{
if
(
!
tried_papi_init
)
{
papi_init
();
}
return
cycle_available
;
}
void
ftimings_papi_counters
(
long
long
*
flop
,
long
long
*
cycles
)
{
long
long
res
[
2
];
int
i
,
ret
;
if
((
ret
=
PAPI_read
(
event_set
,
&
res
[
0
]))
<
0
)
{
...
...
@@ -112,10 +142,15 @@ void ftimings_flop_counter(long long *flop) {
}
i
=
0
;
if
(
flop
s
_available
)
{
if
(
flop_available
)
{
*
flop
=
res
[
i
++
];
}
else
{
*
flop
=
0LL
;
}
if
(
cycle_available
)
{
*
cycles
=
res
[
i
++
];
}
else
{
*
cycles
=
0LL
;
}
}
#endif
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment