Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
MPIBP-Hummer
BioEM
Commits
d48dcc78
Commit
d48dcc78
authored
Jul 31, 2017
by
Pilar Cossio
Browse files
Merge branch 'BioEM-1.0' into 'master'
profiling: improving NVTX profiling CPU+GPU execution See merge request
!5
parents
55af9b02
254d53db
Pipeline
#15705
passed with stage
in 39 seconds
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
CMakeLists.txt
View file @
d48dcc78
...
...
@@ -20,6 +20,7 @@ option (USE_OPENMP "Build BioEM with OpenMP support" ON)
option
(
USE_MPI
"Build BioEM with MPI support"
ON
)
option
(
PRINT_CMAKE_VARIABLES
"List all CMAKE Variables"
OFF
)
option
(
CUDA_FORCE_GCC
"Force GCC as host compiler for CUDA part (If standard host compiler is incompatible with CUDA)"
ON
)
option
(
USE_NVTX
"Build BioEM with additional NVTX information"
OFF
)
###Set up general variables
...
...
@@ -138,6 +139,16 @@ else()
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-vla -Wno-long-long -Wall -pedantic"
)
endif
()
###Enable CUDA debugging with NVTX
if
(
USE_NVTX
)
if
(
CUDA_FOUND
)
set
(
CUDA_CUDA_LIBRARY
${
CUDA_CUDA_LIBRARY
}
"
${
CUDA_TOOLKIT_ROOT_DIR
}
/lib64/libnvToolsExt.so"
)
add_definitions
(
-DBIOEM_USE_NVTX
)
else
()
message
(
FATAL_ERROR
"Cannot use NVTX if CUDA is not found"
)
endif
()
endif
()
###Add Libraries
if
(
CUDA_FOUND
)
cuda_add_cufft_to_target
(
bioEM
)
...
...
bioem.cpp
View file @
d48dcc78
...
...
@@ -52,8 +52,12 @@
const
uint32_t
colors
[]
=
{
0x0000ff00
,
0x000000ff
,
0x00ffff00
,
0x00ff00ff
,
0x0000ffff
,
0x00ff0000
,
0x00ffffff
};
const
int
num_colors
=
sizeof
(
colors
)
/
sizeof
(
colors
[
0
]);
enum
myColor
{
COLOR_PROJECTION
,
COLOR_CONVOLUTION
,
COLOR_COMPARISON
,
COLOR_WORKLOAD
,
COLOR_INIT
};
#define cuda_custom_timeslot(name,cid) { \
// Projection number is stored in category attribute
// Convolution number is stored in payload attribute
#define cuda_custom_timeslot(name,iMap,iConv,cid) { \
int color_id = cid; \
color_id = color_id%num_colors; \
nvtxEventAttributes_t eventAttrib = {0}; \
...
...
@@ -61,13 +65,16 @@ const int num_colors = sizeof(colors)/sizeof(colors[0]);
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \
eventAttrib.colorType = NVTX_COLOR_ARGB; \
eventAttrib.color = colors[color_id]; \
eventAttrib.category = iMap; \
eventAttrib.payloadType = NVTX_PAYLOAD_TYPE_UNSIGNED_INT64; \
eventAttrib.payload.llValue = iConv; \
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \
eventAttrib.message.ascii = name; \
nvtxRangePushEx(&eventAttrib); \
}
#define cuda_custom_timeslot_end nvtxRangePop();
#else
#define cuda_custom_timeslot(name,cid)
#define cuda_custom_timeslot(name,
iMap,iConv,
cid)
#define cuda_custom_timeslot_end
#endif
...
...
@@ -95,7 +102,7 @@ ostream& operator<<(ostream& os, const vector<T>& v)
bioem
::
bioem
()
{
FFTAlgo
=
getenv
(
"FFTALGO"
)
==
NULL
?
1
:
atoi
(
getenv
(
"FFTALGO"
));
DebugOutput
=
getenv
(
"BIOEM_DEBUG_OUTPUT"
)
==
NULL
?
2
:
atoi
(
getenv
(
"BIOEM_DEBUG_OUTPUT"
));
DebugOutput
=
getenv
(
"BIOEM_DEBUG_OUTPUT"
)
==
NULL
?
0
:
atoi
(
getenv
(
"BIOEM_DEBUG_OUTPUT"
));
nProjectionsAtOnce
=
getenv
(
"BIOEM_PROJECTIONS_AT_ONCE"
)
==
NULL
?
1
:
atoi
(
getenv
(
"BIOEM_PROJECTIONS_AT_ONCE"
));
Autotuning
=
false
;
}
...
...
@@ -466,7 +473,7 @@ int bioem::run()
// **** If we want to control the number of threads -> omp_set_num_threads(XX); ******
// ****************** Declarying class of Probability Pointer *************************
cuda_custom_timeslot
(
"Initialization"
,
-
1
,
-
1
,
COLOR_INIT
);
if
(
mpi_rank
==
0
)
printf
(
"
\t
Initializing Probabilities
\n
"
);
// Contros for MPI
...
...
@@ -534,7 +541,8 @@ int bioem::run()
proj_mapsFFT
=
(
mycomplex_t
*
)
myfftw_malloc
(
sizeof
(
mycomplex_t
)
*
ProjMapSize
*
nProjectionsAtOnce
);
conv_mapFFT
=
(
mycomplex_t
*
)
myfftw_malloc
(
sizeof
(
mycomplex_t
)
*
param
.
param_device
.
NumberPixels
*
param
.
param_device
.
NumberFFTPixels1D
);
if
(
!
FFTAlgo
)
conv_map
=
(
myfloat_t
*
)
myfftw_malloc
(
sizeof
(
myfloat_t
)
*
param
.
param_device
.
NumberPixels
*
param
.
param_device
.
NumberPixels
);
cuda_custom_timeslot_end
;
//Ending initialization
HighResTimer
timer
,
timer2
;
...
...
@@ -543,7 +551,7 @@ int bioem::run()
if
(
Autotuning
)
{
aut
.
Initialize
(
AUTOTUNING_ALGORITHM
,
FIRST_STABLE
);
rebalance
(
aut
.
Workload
());
rebalance
Wrapper
(
aut
.
Workload
());
}
if
(
DebugOutput
>=
1
&&
mpi_rank
==
0
)
printf
(
"
\t
Main Loop GridAngles %d, CTFs %d, RefMaps %d, Shifts (%d/%d)², Pixels %d², OMP Threads %d, MPI Ranks %d
\n
"
,
param
.
nTotGridAngles
,
param
.
nTotCTFs
,
RefMap
.
ntotRefMap
,
2
*
param
.
param_device
.
maxDisplaceCenter
+
param
.
param_device
.
GridSpaceCenter
,
param
.
param_device
.
GridSpaceCenter
,
param
.
param_device
.
NumberPixels
,
omp_get_max_threads
(),
mpi_size
);
...
...
@@ -554,6 +562,9 @@ int bioem::run()
int
iOrientEnd
=
(
int
)
((
long
long
int
)
(
mpi_rank
+
1
)
*
param
.
nTotGridAngles
/
mpi_size
);
if
(
iOrientEnd
>
param
.
nTotGridAngles
)
iOrientEnd
=
param
.
nTotGridAngles
;
/* Vectors for computing statistic on different parts of the code */
TimeStat
ts
((
iOrientEnd
-
iOrientStart
),
param
.
nTotCTFs
);
if
(
DebugOutput
>=
1
)
ts
.
InitTimeStat
(
4
);
// **************************Loop Over orientations***************************************
...
...
@@ -561,8 +572,11 @@ int bioem::run()
{
// ***************************************************************************************
// ***** Creating Projection for given orientation and transforming to Fourier space *****
if
(
DebugOutput
>=
1
)
timer2
.
ResetStart
();
if
(
DebugOutput
>=
2
)
timer
.
ResetStart
();
if
(
DebugOutput
>=
1
)
{
timer2
.
ResetStart
();
timer
.
ResetStart
();
}
int
iTmpEnd
=
std
::
min
(
iOrientEnd
,
iOrientAtOnce
+
nProjectionsAtOnce
);
// **************************Parallel orientations for projections at once***************
...
...
@@ -572,13 +586,17 @@ int bioem::run()
{
createProjection
(
iOrient
,
&
proj_mapsFFT
[(
iOrient
-
iOrientAtOnce
)
*
ProjMapSize
]);
}
if
(
DebugOutput
>=
2
)
printf
(
"
\t
Time Projection %d: %f (rank %d)
\n
"
,
iOrientAtOnce
,
timer
.
GetCurrentElapsedTime
(),
mpi_rank
);
if
(
DebugOutput
>=
1
)
{
ts
.
time
=
timer
.
GetCurrentElapsedTime
();
ts
.
Add
(
TS_PROJECTION
);
if
(
DebugOutput
>=
2
)
printf
(
"
\t
Time Projection %d: %f (rank %d)
\n
"
,
iOrientAtOnce
,
ts
.
time
,
mpi_rank
);
}
/* Recalibrate if needed */
if
(
Autotuning
&&
((
iOrientAtOnce
-
iOrientStart
)
%
RECALIB_FACTOR
==
0
)
&&
((
iOrientEnd
-
iOrientAtOnce
)
>
RECALIB_FACTOR
)
&&
(
iOrientAtOnce
!=
iOrientStart
))
{
aut
.
Reset
();
rebalance
(
aut
.
Workload
());
rebalance
Wrapper
(
aut
.
Workload
());
}
for
(
int
iOrient
=
iOrientAtOnce
;
iOrient
<
iTmpEnd
;
iOrient
++
)
...
...
@@ -591,52 +609,69 @@ int bioem::run()
for
(
int
iConv
=
0
;
iConv
<
param
.
nTotCTFs
;
iConv
++
)
{
// *** Calculating convolutions of projection map and crosscorrelations ***
if
(
DebugOutput
>=
2
)
timer
.
ResetStart
();
if
(
DebugOutput
>=
1
)
timer
.
ResetStart
();
createConvolutedProjectionMap
(
iOrient
,
iConv
,
proj_mapFFT
,
conv_map
,
conv_mapFFT
,
sumCONV
,
sumsquareCONV
);
if
(
DebugOutput
>=
2
)
printf
(
"
\t\t
Time Convolution %d %d: %f (rank %d)
\n
"
,
iOrient
,
iConv
,
timer
.
GetCurrentElapsedTime
(),
mpi_rank
);
if
(
DebugOutput
>=
1
)
{
ts
.
time
=
timer
.
GetCurrentElapsedTime
();
ts
.
Add
(
TS_CONVOLUTION
);
if
(
DebugOutput
>=
2
)
printf
(
"
\t\t
Time Convolution %d %d: %f (rank %d)
\n
"
,
iOrient
,
iConv
,
ts
.
time
,
mpi_rank
);
}
if
((
DebugOutput
>=
2
)
||
(
Autotuning
&&
aut
.
Needed
(
iConv
)))
timer
.
ResetStart
();
myfloat_t
amp
,
pha
,
env
;
if
((
DebugOutput
>=
1
)
||
(
Autotuning
&&
aut
.
Needed
(
iConv
)))
timer
.
ResetStart
();
myfloat_t
amp
,
pha
,
env
;
amp
=
param
.
CtfParam
[
iConv
].
pos
[
0
];
pha
=
param
.
CtfParam
[
iConv
].
pos
[
1
];
env
=
param
.
CtfParam
[
iConv
].
pos
[
2
];
amp
=
param
.
CtfParam
[
iConv
].
pos
[
0
];
pha
=
param
.
CtfParam
[
iConv
].
pos
[
1
];
env
=
param
.
CtfParam
[
iConv
].
pos
[
2
];
// ******************Internal loop over Reference images CUDA or OpenMP******************
// *** Comparing each calculated convoluted map with all experimental maps ***
compareRefMaps
(
iOrient
,
iConv
,
amp
,
pha
,
env
,
conv_map
,
conv_mapFFT
,
sumCONV
,
sumsquareCONV
);
double
compTime
=
0.
;
ts
.
time
=
0.
;
if
(
DebugOutput
>=
1
)
{
ts
.
time
=
timer
.
GetCurrentElapsedTime
();
ts
.
Add
(
TS_COMPARISON
);
}
if
(
DebugOutput
>=
2
)
{
compTime
=
timer
.
GetCurrentElapsedTime
();
const
int
nShifts
=
2
*
param
.
param_device
.
maxDisplaceCenter
/
param
.
param_device
.
GridSpaceCenter
+
1
;
const
double
nFlops
=
(
double
)
RefMap
.
ntotRefMap
*
(
double
)
nShifts
*
(
double
)
nShifts
*
(((
double
)
param
.
param_device
.
NumberPixels
-
(
double
)
param
.
param_device
.
maxDisplaceCenter
/
2.
)
*
((
double
)
param
.
param_device
.
NumberPixels
-
(
double
)
param
.
param_device
.
maxDisplaceCenter
/
2.
)
*
5.
+
25.
)
/
compT
ime
;
(((
double
)
param
.
param_device
.
NumberPixels
-
(
double
)
param
.
param_device
.
maxDisplaceCenter
/
2.
)
*
((
double
)
param
.
param_device
.
NumberPixels
-
(
double
)
param
.
param_device
.
maxDisplaceCenter
/
2.
)
*
5.
+
25.
)
/
ts
.
t
ime
;
const
double
nGBs
=
(
double
)
RefMap
.
ntotRefMap
*
(
double
)
nShifts
*
(
double
)
nShifts
*
(((
double
)
param
.
param_device
.
NumberPixels
-
(
double
)
param
.
param_device
.
maxDisplaceCenter
/
2.
)
*
((
double
)
param
.
param_device
.
NumberPixels
-
(
double
)
param
.
param_device
.
maxDisplaceCenter
/
2.
)
*
2.
+
8.
)
*
(
double
)
sizeof
(
myfloat_t
)
/
compT
ime
;
const
double
nGBs2
=
(
double
)
RefMap
.
ntotRefMap
*
((
double
)
param
.
param_device
.
NumberPixels
*
(
double
)
param
.
param_device
.
NumberPixels
+
8.
)
*
(
double
)
sizeof
(
myfloat_t
)
/
compT
ime
;
(((
double
)
param
.
param_device
.
NumberPixels
-
(
double
)
param
.
param_device
.
maxDisplaceCenter
/
2.
)
*
((
double
)
param
.
param_device
.
NumberPixels
-
(
double
)
param
.
param_device
.
maxDisplaceCenter
/
2.
)
*
2.
+
8.
)
*
(
double
)
sizeof
(
myfloat_t
)
/
ts
.
t
ime
;
const
double
nGBs2
=
(
double
)
RefMap
.
ntotRefMap
*
((
double
)
param
.
param_device
.
NumberPixels
*
(
double
)
param
.
param_device
.
NumberPixels
+
8.
)
*
(
double
)
sizeof
(
myfloat_t
)
/
ts
.
t
ime
;
if
(
Autotuning
)
printf
(
"
\t\t
Time Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s, with GPU workload %d%%) (rank %d)
\n
"
,
iOrient
,
iConv
,
compT
ime
,
nFlops
/
1000000000.
,
nGBs
/
1000000000.
,
nGBs2
/
1000000000.
,
aut
.
Workload
(),
mpi_rank
);
else
printf
(
"
\t\t
Time Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)
\n
"
,
iOrient
,
iConv
,
compT
ime
,
nFlops
/
1000000000.
,
nGBs
/
1000000000.
,
nGBs2
/
1000000000.
,
mpi_rank
);
if
(
Autotuning
)
printf
(
"
\t\t
Time Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s, with GPU workload %d%%) (rank %d)
\n
"
,
iOrient
,
iConv
,
ts
.
t
ime
,
nFlops
/
1000000000.
,
nGBs
/
1000000000.
,
nGBs2
/
1000000000.
,
aut
.
Workload
(),
mpi_rank
);
else
printf
(
"
\t\t
Time Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)
\n
"
,
iOrient
,
iConv
,
ts
.
t
ime
,
nFlops
/
1000000000.
,
nGBs
/
1000000000.
,
nGBs2
/
1000000000.
,
mpi_rank
);
}
if
(
Autotuning
&&
aut
.
Needed
(
iConv
))
{
if
(
compT
ime
==
0.
)
compT
ime
=
timer
.
GetCurrentElapsedTime
();
aut
.
Tune
(
compT
ime
);
if
(
aut
.
Finished
()
&&
DebugOutput
>=
2
)
printf
(
"
\
t\
t
Optimal GPU workload %d%% (rank %d)
\n
"
,
aut
.
Workload
(),
mpi_rank
);
rebalance
(
aut
.
Workload
());
if
(
ts
.
t
ime
==
0.
)
ts
.
t
ime
=
timer
.
GetCurrentElapsedTime
();
aut
.
Tune
(
ts
.
t
ime
);
if
(
aut
.
Finished
()
&&
DebugOutput
>=
1
)
printf
(
"
\t
Optimal GPU workload %d%% (rank %d)
\n
"
,
aut
.
Workload
(),
mpi_rank
);
rebalance
Wrapper
(
aut
.
Workload
());
}
}
if
(
DebugOutput
>=
1
)
{
printf
(
"
\t
Total time for projection %d: %f (rank %d)
\n
"
,
iOrient
,
timer2
.
GetCurrentElapsedTime
(),
mpi_rank
);
ts
.
time
=
timer2
.
GetCurrentElapsedTime
();
ts
.
Add
(
TS_TPROJECTION
);
printf
(
"
\t
Total time for projection %d: %f (rank %d)
\n
"
,
iOrient
,
ts
.
time
,
mpi_rank
);
timer2
.
ResetStart
();
}
}
}
/* Statistical summary on different parts of the code */
if
(
DebugOutput
>=
1
)
{
ts
.
PrintTimeStat
(
mpi_rank
);
ts
.
EmptyTimeStat
();
}
//deallocating fftw_complex vector
myfftw_free
(
proj_mapsFFT
);
myfftw_free
(
conv_mapFFT
);
...
...
@@ -1026,6 +1061,7 @@ int bioem::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t pha,
//***************************************************************************************
//***** BioEM routine for comparing reference maps to convoluted maps *****
//***************************************************************************************
cuda_custom_timeslot
(
"Comparison"
,
iOrient
,
iConv
,
COLOR_COMPARISON
);
if
(
FFTAlgo
)
{
//With FFT Algorithm
...
...
@@ -1045,6 +1081,8 @@ int bioem::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t pha,
compareRefMapShifted
<
-
1
>
(
iRefMap
,
iOrient
,
iConv
,
amp
,
pha
,
env
,
sumC
,
sumsquareC
,
conv_map
,
pProb
,
param
.
param_device
,
RefMap
);
}
}
cuda_custom_timeslot_end
;
return
(
0
);
}
...
...
@@ -1096,7 +1134,7 @@ int bioem::createProjection(int iMap, mycomplex_t* mapFFT)
// ********************* and turns projection into Fourier space ************************
// **************************************************************************************
cuda_custom_timeslot
(
"Projection"
,
0
);
cuda_custom_timeslot
(
"Projection"
,
iMap
,
0
,
COLOR_PROJECTION
);
myfloat3_t
RotatedPointsModel
[
Model
.
nPointsModel
];
myfloat_t
rotmat
[
3
][
3
];
...
...
@@ -1297,7 +1335,7 @@ int bioem::createConvolutedProjectionMap(int iMap, int iConv, mycomplex_t* lproj
// **************************************************************************************
cuda_custom_timeslot
(
"Convolution"
,
1
);
cuda_custom_timeslot
(
"Convolution"
,
iMap
,
iConv
,
COLOR_CONVOLUTION
);
mycomplex_t
*
tmp
=
param
.
fft_scratch_complex
[
omp_get_thread_num
()];
...
...
@@ -1455,4 +1493,11 @@ void bioem::free_device_host(void* ptr)
free
(
ptr
);
}
void
bioem
::
rebalanceWrapper
(
int
workload
)
{
cuda_custom_timeslot
(
"Rebalance workload"
,
-
1
,
workload
,
COLOR_WORKLOAD
);
rebalance
(
workload
);
cuda_custom_timeslot_end
;
}
void
bioem
::
rebalance
(
int
workload
)
{}
include/bioem.h
View file @
d48dcc78
...
...
@@ -43,6 +43,7 @@ public:
virtual
void
*
malloc_device_host
(
size_t
size
);
virtual
void
free_device_host
(
void
*
ptr
);
virtual
void
rebalance
(
int
workload
);
//Rebalance GPUWorkload
void
rebalanceWrapper
(
int
workload
);
//Rebalance wrapper
int
createProjection
(
int
iMap
,
mycomplex_t
*
map
);
int
calcross_cor
(
myfloat_t
*
localmap
,
myfloat_t
&
sum
,
myfloat_t
&
sumsquare
);
...
...
include/timer.h
View file @
d48dcc78
...
...
@@ -11,6 +11,15 @@
#ifndef TIMER_H
#define TIMER_H
#include
<stdio.h>
#include
<string>
#include
<numeric>
#include
<vector>
#include
<algorithm>
#include
<cmath>
using
namespace
std
;
class
HighResTimer
{
public:
...
...
@@ -30,6 +39,40 @@ private:
double
ElapsedTime
;
double
StartTime
;
int
running
;
};
};
/* Structure for saving a vector of timings */
typedef
struct
_TimeLog
{
vector
<
double
>
vec
;
double
sum
;
double
stdev
;
string
name
;
}
TimeLog
;
enum
TS_NAMES
{
TS_TPROJECTION
,
TS_PROJECTION
,
TS_CONVOLUTION
,
TS_COMPARISON
};
/* Structure for saving timings of different parts of code and doing basic statistics on them */
class
TimeStat
{
public:
TimeStat
(
int
Angles
,
int
CTFs
)
:
time
(
0
),
tl
(
NULL
)
{
angles
=
Angles
;
ctfs
=
CTFs
;};
~
TimeStat
()
{
EmptyTimeStat
();};
void
InitTimeLog
(
int
log
,
int
size
,
string
s
);
void
InitTimeStat
(
int
nlogs
);
void
EmptyTimeStat
();
void
inline
Add
(
int
log
)
{
tl
[
log
].
vec
.
push_back
(
time
);};
void
ComputeTimeStat
();
void
PrintTimeStat
(
int
mpi_rank
);
/* Variable for storing times during the execution */
double
time
;
private:
TimeLog
*
tl
;
int
total_logs
;
int
angles
;
int
ctfs
;
};
#endif
map.cpp
View file @
d48dcc78
...
...
@@ -352,7 +352,7 @@ void bioem_Probability::init(size_t maps, size_t angles, size_t cc, bioem& bio)
nAngles
=
angles
;
nCC
=
cc
;
ptr
=
bio
.
malloc_device_host
(
get_size
(
maps
,
angles
,
cc
,
bio
.
param
.
param_device
.
writeAngles
,
bio
.
param
.
param_device
.
writeCC
));
cout
<<
"Allocation #Maps "
<<
maps
<<
" #Angles "
<<
angles
<<
" #cross.cor "
<<
cc
<<
"
\n
"
;
if
(
bio
.
DebugOutput
>=
1
)
cout
<<
"Allocation #Maps "
<<
maps
<<
" #Angles "
<<
angles
<<
" #cross.cor "
<<
cc
<<
"
\n
"
;
//<< " == " << get_size(maps, angles, cc, bio.param.param_device.writeAngles, bio.param.param_device.writeCC)<< "\n";
set_pointers
();
}
...
...
timer.cpp
View file @
d48dcc78
...
...
@@ -90,4 +90,59 @@ double HighResTimer::GetFrequency()
#endif
}
double
HighResTimer
::
Frequency
=
HighResTimer
::
GetFrequency
();
\ No newline at end of file
double
HighResTimer
::
Frequency
=
HighResTimer
::
GetFrequency
();
void
TimeStat
::
InitTimeLog
(
int
log
,
int
size
,
string
s
)
{
tl
[
log
].
vec
.
reserve
(
size
);
tl
[
log
].
name
=
s
;
tl
[
log
].
sum
=
0.
;
tl
[
log
].
stdev
=
0.
;
}
void
TimeStat
::
InitTimeStat
(
int
nlogs
)
{
total_logs
=
nlogs
;
tl
=
new
TimeLog
[
total_logs
];
InitTimeLog
(
TS_TPROJECTION
,
angles
,
"Total time of projection"
);
InitTimeLog
(
TS_PROJECTION
,
angles
,
"Projection"
);
InitTimeLog
(
TS_CONVOLUTION
,
angles
*
ctfs
,
"Convolution"
);
InitTimeLog
(
TS_COMPARISON
,
angles
*
ctfs
,
"Comparison"
);
}
void
TimeStat
::
EmptyTimeStat
()
{
if
(
tl
==
NULL
)
return
;
delete
[
]
tl
;
tl
=
NULL
;
time
=
0.
;
}
void
TimeStat
::
ComputeTimeStat
()
{
double
mean
,
sq_sum
;
vector
<
double
>
diff
;
for
(
int
i
=
0
;
i
<
total_logs
;
i
++
)
{
tl
[
i
].
sum
=
std
::
accumulate
(
tl
[
i
].
vec
.
begin
(),
tl
[
i
].
vec
.
end
(),
0.0
);
mean
=
tl
[
i
].
sum
/
tl
[
i
].
vec
.
size
();
diff
.
resize
(
tl
[
i
].
vec
.
size
());
std
::
transform
(
tl
[
i
].
vec
.
begin
(),
tl
[
i
].
vec
.
end
(),
diff
.
begin
(),
std
::
bind2nd
(
std
::
minus
<
double
>
(),
mean
));
sq_sum
=
std
::
inner_product
(
diff
.
begin
(),
diff
.
end
(),
diff
.
begin
(),
0.0
);
tl
[
i
].
stdev
=
std
::
sqrt
(
sq_sum
/
tl
[
i
].
vec
.
size
());
}
}
void
TimeStat
::
PrintTimeStat
(
int
mpi_rank
)
{
ComputeTimeStat
();
for
(
int
i
=
0
;
i
<
total_logs
;
i
++
)
{
printf
(
"SUMMARY -> %s: Total %f sec; Mean %f sec; Std.Dev. %f (rank %d)
\n
"
,
tl
[
i
].
name
.
c_str
(),
tl
[
i
].
sum
,
tl
[
i
].
sum
/
tl
[
i
].
vec
.
size
(),
tl
[
i
].
stdev
,
mpi_rank
);
}
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment