Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
B
BioEM
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
MPIBP-Hummer
BioEM
Commits
adefd46f
Commit
adefd46f
authored
Jul 03, 2017
by
Luka Stanisic
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
offloading everything related to Autotuning to another class, making the code cleaner
parent
ddd6cbd9
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
227 additions
and
110 deletions
+227
-110
CMakeLists.txt
CMakeLists.txt
+1
-1
autotuner.cpp
autotuner.cpp
+125
-0
bioem.cpp
bioem.cpp
+25
-108
bioem_cuda.cu
bioem_cuda.cu
+1
-0
include/autotuner.h
include/autotuner.h
+62
-0
include/bioem.h
include/bioem.h
+1
-1
include/defs.h
include/defs.h
+12
-0
No files found.
CMakeLists.txt
View file @
adefd46f
...
...
@@ -37,7 +37,7 @@ else()
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
${
BIOEM_GCC_FLAGS
}
"
)
endif
()
set
(
BIOEM_SOURCE_FILES
"bioem.cpp"
"main.cpp"
"map.cpp"
"model.cpp"
"param.cpp"
"timer.cpp"
)
set
(
BIOEM_SOURCE_FILES
"bioem.cpp"
"main.cpp"
"map.cpp"
"model.cpp"
"param.cpp"
"timer.cpp"
"autotuner.cpp"
)
###Find Required Packages
find_package
(
PkgConfig
)
...
...
autotuner.cpp
0 → 100644
View file @
adefd46f
#include "autotuner.h"
void
Autotuner
::
Reset
()
{
stopTuning
=
false
;
workload
=
100
;
best_time
=
0.
;
best_workload
=
0
;
a
=
1
;
b
=
50
;
c
=
100
;
x
=
50
;
limit
=
1
;
fb
=
0.
;
fx
=
0.
;
if
(
algo
==
3
)
workload
=
50
;
}
bool
Autotuner
::
Needed
(
int
iteration
)
{
if
(
stopTuning
)
return
false
;
switch
(
algo
)
{
case
1
:
case
3
:
return
iteration
%
(
stable
+
1
)
==
stable
;
case
2
:
return
(
iteration
==
(
int
)
stable
/
2
)
||
(
iteration
==
stable
);
default:
/* Should never happen */
;
}
return
false
;
}
bool
Autotuner
::
Finished
()
{
switch
(
algo
)
{
case
1
:
if
(
workload
<
30
)
{
workload
=
best_workload
;
return
stopTuning
=
true
;
}
break
;
case
2
:
if
(
best_workload
!=
0
)
return
stopTuning
=
true
;
break
;
case
3
:
if
((
c
-
b
==
limit
)
&&
(
b
-
a
==
limit
))
return
stopTuning
=
true
;
break
;
default:
/* Should never happen */
;
}
return
false
;
}
void
Autotuner
::
Tune
(
double
compTime
)
{
switch
(
algo
)
{
case
1
:
AlgoSimple
(
compTime
);
break
;
case
2
:
AlgoRatio
(
compTime
);
break
;
case
3
:
AlgoBisection
(
compTime
);
break
;
default:
/* Should never happen */
;
}
}
void
Autotuner
::
AlgoSimple
(
double
compTime
)
{
if
(
best_time
==
0.
||
compTime
<
best_time
)
{
best_time
=
compTime
;
best_workload
=
workload
;
}
workload
-=
5
;
}
void
Autotuner
::
AlgoRatio
(
double
compTime
)
{
if
(
best_time
==
0.
)
{
best_time
=
compTime
;
workload
=
1
;
}
else
{
best_workload
=
(
int
)
100
*
(
compTime
/
(
best_time
+
compTime
));
workload
=
best_workload
;
}
}
void
Autotuner
::
AlgoBisection
(
double
compTime
)
{
if
(
fb
==
0.
)
{
fb
=
compTime
;
x
=
75
;
workload
=
x
;
return
;
}
fx
=
compTime
;
if
(
fx
<
fb
)
{
if
(
x
<
b
)
c
=
b
;
else
a
=
b
;
b
=
x
;
fb
=
fx
;
}
else
{
if
(
x
<
b
)
a
=
x
;
else
c
=
x
;
}
x
=
(
c
-
b
>
b
-
a
)
?
(
int
)(
b
+
(
c
-
b
)
/
2
)
:
(
int
)(
a
+
(
b
-
a
+
1
)
/
2
);
workload
=
x
;
}
bioem.cpp
View file @
adefd46f
...
...
@@ -14,12 +14,6 @@
#ifdef WITH_MPI
#include <mpi.h>
/* Recalibrate every X projections */
#define RECALIB_FACTOR 200
/* After how many comparison iterations, comparison duration becomes stable */
#define FIRST_STABLE 7
#define STABLE_ITERATION(i) (i % (FIRST_STABLE + 1) == FIRST_STABLE)
#define MPI_CHK(expr) \
if (expr != MPI_SUCCESS) \
{ \
...
...
@@ -47,6 +41,7 @@
#include <fftw3.h>
#include <math.h>
#include "timer.h"
#include "autotuner.h"
#include "param.h"
#include "bioem.h"
...
...
@@ -103,7 +98,13 @@ bioem::bioem()
FFTAlgo
=
getenv
(
"FFTALGO"
)
==
NULL
?
1
:
atoi
(
getenv
(
"FFTALGO"
));
DebugOutput
=
getenv
(
"BIOEM_DEBUG_OUTPUT"
)
==
NULL
?
2
:
atoi
(
getenv
(
"BIOEM_DEBUG_OUTPUT"
));
nProjectionsAtOnce
=
getenv
(
"BIOEM_PROJECTIONS_AT_ONCE"
)
==
NULL
?
1
:
atoi
(
getenv
(
"BIOEM_PROJECTIONS_AT_ONCE"
));
Autotuning
=
getenv
(
"BIOEM_AUTOTUNING"
)
==
NULL
?
0
:
atoi
(
getenv
(
"BIOEM_AUTOTUNING"
));
Autotuning
=
false
;
if
(
getenv
(
"GPU"
)
&&
atoi
(
getenv
(
"GPU"
)))
if
(
!
getenv
(
"GPUWORKLOAD"
)
||
(
atoi
(
getenv
(
"GPUWORKLOAD"
))
==
-
1
))
if
(
!
getenv
(
"BIOEM_DEBUG_BREAK"
)
||
(
atoi
(
getenv
(
"BIOEM_DEBUG_BREAK"
))
>
FIRST_STABLE
))
{
Autotuning
=
true
;
}
}
bioem
::~
bioem
()
...
...
@@ -532,17 +533,12 @@ int bioem::run()
HighResTimer
timer
,
timer2
;
/* This variables are used for Autotuning */
double
best_time
=
0
;
int
workload
=
getenv
(
"GPUWORKLOAD"
)
==
NULL
?
100
:
atoi
(
getenv
(
"GPUWORKLOAD"
));
int
best_workload
=
workload
;
bool
stopTuning
=
false
;
int
a
=
1
,
b
=
50
,
c
=
100
,
x
=
75
,
limit
=
1
;
double
fb
=
0.
,
fx
=
0.
;
if
(
Autotuning
==
3
)
/* Autotuning */
Autotuner
aut
;
if
(
Autotuning
)
{
workload
=
b
;
rebalance
(
b
);
aut
.
Initialize
(
AUTOTUNING_ALGORITHM
,
FIRST_STABLE
)
;
rebalance
(
aut
.
Workload
());
}
if
(
DebugOutput
>=
1
&&
mpi_rank
==
0
)
printf
(
"
\t
Main Loop GridAngles %d, CTFs %d, RefMaps %d, Shifts (%d/%d)², Pixels %d², OMP Threads %d, MPI Ranks %d
\n
"
,
param
.
nTotGridAngles
,
param
.
nTotCTFs
,
RefMap
.
ntotRefMap
,
2
*
param
.
param_device
.
maxDisplaceCenter
+
param
.
param_device
.
GridSpaceCenter
,
param
.
param_device
.
GridSpaceCenter
,
param
.
param_device
.
NumberPixels
,
omp_get_max_threads
(),
mpi_size
);
...
...
@@ -576,15 +572,12 @@ int bioem::run()
for
(
int
iOrient
=
iOrientAtOnce
;
iOrient
<
iTmpEnd
;
iOrient
++
)
{
/* Recalibrate if needed */
if
(((
iOrient
-
iOrientStart
)
%
RECALIB_FACTOR
==
0
)
&&
((
iTmpEnd
-
iOrient
)
>
RECALIB_FACTOR
)
&&
(
Autotuning
==
3
)
)
{
a
=
1
,
b
=
50
,
c
=
100
,
x
=
75
,
limit
=
1
;
fb
=
0.
,
fx
=
0.
;
workload
=
b
;
rebalance
(
b
);
stopTuning
=
false
;
if
(
Autotuning
&&
((
iOrient
-
iOrientStart
)
%
RECALIB_FACTOR
==
0
)
&&
((
iTmpEnd
-
iOrient
)
>
RECALIB_FACTOR
))
{
aut
.
Reset
();
rebalance
(
aut
.
Workload
());
}
mycomplex_t
*
proj_mapFFT
=
&
proj_mapsFFT
[(
iOrient
-
iOrientAtOnce
)
*
ProjMapSize
];
// ***************************************************************************************
...
...
@@ -598,8 +591,7 @@ int bioem::run()
createConvolutedProjectionMap
(
iOrient
,
iConv
,
proj_mapFFT
,
conv_map
,
conv_mapFFT
,
sumCONV
,
sumsquareCONV
);
if
(
DebugOutput
>=
2
)
printf
(
"
\t\t
Time Convolution %d %d: %f (rank %d)
\n
"
,
iOrient
,
iConv
,
timer
.
GetCurrentElapsedTime
(),
mpi_rank
);
if
(
Autotuning
&&
!
stopTuning
)
timer
.
ResetStart
();
if
(
DebugOutput
>=
2
)
timer
.
ResetStart
();
if
((
DebugOutput
>=
2
)
||
(
Autotuning
&&
aut
.
Needed
(
iConv
)))
timer
.
ResetStart
();
myfloat_t
amp
,
pha
,
env
;
amp
=
param
.
CtfParam
[
iConv
].
pos
[
0
];
...
...
@@ -622,90 +614,15 @@ int bioem::run()
(((
double
)
param
.
param_device
.
NumberPixels
-
(
double
)
param
.
param_device
.
maxDisplaceCenter
/
2.
)
*
((
double
)
param
.
param_device
.
NumberPixels
-
(
double
)
param
.
param_device
.
maxDisplaceCenter
/
2.
)
*
2.
+
8.
)
*
(
double
)
sizeof
(
myfloat_t
)
/
compTime
;
const
double
nGBs2
=
(
double
)
RefMap
.
ntotRefMap
*
((
double
)
param
.
param_device
.
NumberPixels
*
(
double
)
param
.
param_device
.
NumberPixels
+
8.
)
*
(
double
)
sizeof
(
myfloat_t
)
/
compTime
;
printf
(
"
\t\t
Time Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s, with GPU workload %d%%) (rank %d)
\n
"
,
iOrient
,
iConv
,
compTime
,
nFlops
/
1000000000.
,
nGBs
/
1000000000.
,
nGBs2
/
1000000000.
,
workload
,
mpi_rank
);
}
if
(
Autotuning
==
1
&&
!
stopTuning
&&
STABLE_ITERATION
(
iConv
))
{
if
(
compTime
==
0.
)
compTime
=
timer
.
GetCurrentElapsedTime
();
if
(
best_time
==
0
||
compTime
<
best_time
)
{
best_time
=
compTime
;
best_workload
=
workload
;
}
workload
-=
5
;
if
(
workload
<
30
)
{
stopTuning
=
true
;
workload
=
best_workload
;
}
rebalance
(
workload
);
}
if
(
Autotuning
==
2
&&
!
stopTuning
&&
(
iConv
==
3
||
iConv
==
7
))
{
if
(
compTime
==
0.
)
compTime
=
timer
.
GetCurrentElapsedTime
();
if
(
iConv
==
3
)
{
best_time
=
compTime
;
workload
=
1
;
}
else
if
(
iConv
==
7
)
{
workload
=
(
int
)
100
*
(
compTime
/
(
best_time
+
compTime
)
);
if
(
DebugOutput
>=
2
)
{
printf
(
"
\t\t
Comparison on GPU only time: %.6f
\n
"
,
best_time
);
printf
(
"
\t\t
Comparison on CPU only time: %.6f
\n
"
,
compTime
);
printf
(
"
\t\t
Optimal GPU workload: %d%%
\n
"
,
workload
);
}
stopTuning
=
true
;
}
rebalance
(
workload
);
if
(
Autotuning
)
printf
(
"
\t\t
Time Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s, with GPU workload %d%%) (rank %d)
\n
"
,
iOrient
,
iConv
,
compTime
,
nFlops
/
1000000000.
,
nGBs
/
1000000000.
,
nGBs2
/
1000000000.
,
aut
.
Workload
(),
mpi_rank
);
else
printf
(
"
\t\t
Time Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)
\n
"
,
iOrient
,
iConv
,
compTime
,
nFlops
/
1000000000.
,
nGBs
/
1000000000.
,
nGBs2
/
1000000000.
,
mpi_rank
);
}
if
(
Autotuning
==
3
&&
!
stopTuning
&&
STABLE_ITERATION
(
iConv
))
if
(
Autotuning
&&
aut
.
Needed
(
iConv
))
{
if
(
compTime
==
0.
)
compTime
=
timer
.
GetCurrentElapsedTime
();
if
(((
iOrient
-
iOrientStart
)
%
RECALIB_FACTOR
==
0
)
&&
(
iConv
==
FIRST_STABLE
))
{
fb
=
compTime
;
x
=
75
;
}
else
{
fx
=
compTime
;
if
(
fx
<
fb
)
{
if
(
x
<
b
)
c
=
b
;
else
a
=
b
;
b
=
x
;
fb
=
fx
;
}
else
{
if
(
x
<
b
)
a
=
x
;
else
c
=
x
;
}
x
=
(
c
-
b
>
b
-
a
)
?
(
int
)(
b
+
(
c
-
b
)
/
2
)
:
(
int
)(
a
+
(
b
-
a
+
1
)
/
2
);
}
if
((
c
-
b
==
limit
)
&&
(
b
-
a
==
limit
))
{
stopTuning
=
true
;
if
(
DebugOutput
>=
2
)
{
printf
(
"
\t\t
Optimal GPU workload %d%% (rank %d)
\n
"
,
workload
,
mpi_rank
);
}
}
workload
=
x
;
rebalance
(
x
);
aut
.
Tune
(
compTime
);
if
(
aut
.
Finished
()
&&
DebugOutput
>=
2
)
printf
(
"
\t\t
Optimal GPU workload %d%% (rank %d)
\n
"
,
aut
.
Workload
(),
mpi_rank
);
rebalance
(
aut
.
Workload
());
}
}
if
(
DebugOutput
>=
1
)
...
...
bioem_cuda.cu
View file @
adefd46f
...
...
@@ -137,6 +137,7 @@ bioem_cuda::bioem_cuda()
GPUAlgo
=
getenv
(
"GPUALGO"
)
==
NULL
?
2
:
atoi
(
getenv
(
"GPUALGO"
));
GPUAsync
=
getenv
(
"GPUASYNC"
)
==
NULL
?
1
:
atoi
(
getenv
(
"GPUASYNC"
));
GPUWorkload
=
getenv
(
"GPUWORKLOAD"
)
==
NULL
?
100
:
atoi
(
getenv
(
"GPUWORKLOAD"
));
if
(
GPUWorkload
==
-
1
)
GPUWorkload
=
100
;
GPUDualStream
=
getenv
(
"GPUDUALSTREAM"
)
==
NULL
?
1
:
atoi
(
getenv
(
"GPUDUALSTREAM"
));
}
...
...
include/autotuner.h
0 → 100644
View file @
adefd46f
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
< BioEM software for Bayesian inference of Electron Microscopy images>
Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp,
Volker Lindenstruth and Gerhard Hummer.
Max Planck Institute of Biophysics, Frankfurt, Germany.
See license statement for terms of distribution.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
#ifndef AUTOTUNER_H
#define AUTOTUNER_H
class
Autotuner
{
public:
Autotuner
()
{
stopTuning
=
true
;}
/* Setting variables to initial values */
inline
void
Initialize
(
int
alg
=
3
,
int
st
=
7
)
{
algo
=
alg
;
stable
=
st
;
Reset
();
}
/* Resetting variables to initial values */
void
Reset
();
/* Check if autotuning is needed, depending on which comparison is finished */
bool
Needed
(
int
iteration
);
/* Check if optimal workload value has been computed */
bool
Finished
();
/* Set a new workload value to test, depending on the algorithm */
void
Tune
(
double
compTime
);
/* Return workload value */
inline
int
Workload
()
{
return
workload
;}
private:
int
algo
;
int
stable
;
bool
stopTuning
;
int
workload
;
/* Variables needed for AlgoSimple and AlgoRatio */
double
best_time
;
int
best_workload
;
/* Variables needed for AlgoBisection */
int
a
;
int
b
;
int
c
;
int
x
;
int
limit
;
double
fb
,
fx
;
/* Autotuning algorithms */
void
AlgoSimple
(
double
compTime
);
void
AlgoRatio
(
double
compTime
);
void
AlgoBisection
(
double
compTime
);
};
#endif
include/bioem.h
View file @
adefd46f
...
...
@@ -72,7 +72,7 @@ protected:
int
FFTAlgo
;
//Use the FFT Algorithm (Default 1)
int
DebugOutput
;
//Debug Output Level (Default 2)
int
nProjectionsAtOnce
;
//Number of projections to do at once via OpenMP (Default 1)
int
Autotuning
;
//Do the autotuning of the load-balancing between CPUs and GPUs
bool
Autotuning
;
//Do the autotuning of the load-balancing between CPUs and GPUs
};
#endif
include/defs.h
View file @
adefd46f
...
...
@@ -91,6 +91,18 @@ struct myfloat3_t
#define CUDA_FFTS_AT_ONCE 1024
//#define BIOEM_USE_NVTX
/* Autotuning
Autotuning algorithms:
1. AlgoSimple = 1; Testing workload values between 100 and 30, all multiples of 5. Taking the value with the best timing.
2. AlgoRatio = 2; Comparisons where GPU handles 100% or only 1% of the workload are timed, and then the optimal workload balance is computed.
3. AlgoBisection = 3; Based on bisection, multiple workload values are tested until the optimal one is found.
*/
#define AUTOTUNING_ALGORITHM 3
/* Recalibrate every X projections */
#define RECALIB_FACTOR 200
/* After how many comparison iterations, comparison duration becomes stable */
#define FIRST_STABLE 7
static
inline
void
*
mallocchk
(
size_t
size
)
{
void
*
ptr
=
malloc
(
size
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment