Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
B
BioEM
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
MPIBP-Hummer
BioEM
Commits
ad9a1e47
Commit
ad9a1e47
authored
Jun 16, 2017
by
Luka Stanisic
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
prototype of a GPUWORKLOAD autotuning
parent
1459580e
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
52 additions
and
4 deletions
+52
-4
bioem.cpp
bioem.cpp
+36
-4
bioem_cuda.cu
bioem_cuda.cu
+13
-0
include/bioem.h
include/bioem.h
+2
-0
include/bioem_cuda_internal.h
include/bioem_cuda_internal.h
+1
-0
No files found.
bioem.cpp
View file @
ad9a1e47
...
...
@@ -97,6 +97,7 @@ bioem::bioem()
FFTAlgo
=
getenv
(
"FFTALGO"
)
==
NULL
?
1
:
atoi
(
getenv
(
"FFTALGO"
));
DebugOutput
=
getenv
(
"BIOEM_DEBUG_OUTPUT"
)
==
NULL
?
2
:
atoi
(
getenv
(
"BIOEM_DEBUG_OUTPUT"
));
nProjectionsAtOnce
=
getenv
(
"BIOEM_PROJECTIONS_AT_ONCE"
)
==
NULL
?
1
:
atoi
(
getenv
(
"BIOEM_PROJECTIONS_AT_ONCE"
));
Autotuning
=
getenv
(
"BIOEM_AUTOTUNING"
)
==
NULL
?
0
:
atoi
(
getenv
(
"BIOEM_AUTOTUNING"
));
}
bioem
::~
bioem
()
...
...
@@ -520,6 +521,12 @@ int bioem::run()
HighResTimer
timer
,
timer2
;
/* This variables are used for Autotuning */
double
best_time
=
0
;
int
workload
=
getenv
(
"GPUWORKLOAD"
)
==
NULL
?
100
:
atoi
(
getenv
(
"GPUWORKLOAD"
));
int
best_workload
=
workload
;
bool
stopTuning
=
false
;
if
(
DebugOutput
>=
1
&&
mpi_rank
==
0
)
printf
(
"
\t
Main Loop GridAngles %d, CTFs %d, RefMaps %d, Shifts (%d/%d)², Pixels %d², OMP Threads %d, MPI Ranks %d
\n
"
,
param
.
nTotGridAngles
,
param
.
nTotCTFs
,
RefMap
.
ntotRefMap
,
2
*
param
.
param_device
.
maxDisplaceCenter
+
param
.
param_device
.
GridSpaceCenter
,
param
.
param_device
.
GridSpaceCenter
,
param
.
param_device
.
NumberPixels
,
omp_get_max_threads
(),
mpi_size
);
...
...
@@ -559,6 +566,7 @@ int bioem::run()
{
// *** Calculating convolutions of projection map and crosscorrelations ***
if
(
Autotuning
&&
!
stopTuning
)
timer
.
ResetStart
();
if
(
DebugOutput
>=
2
)
timer
.
ResetStart
();
createConvolutedProjectionMap
(
iOrient
,
iConv
,
proj_mapFFT
,
conv_map
,
conv_mapFFT
,
sumCONV
,
sumsquareCONV
);
if
(
DebugOutput
>=
2
)
printf
(
"
\t\t
Time Convolution %d %d: %f (rank %d)
\n
"
,
iOrient
,
iConv
,
timer
.
GetCurrentElapsedTime
(),
mpi_rank
);
...
...
@@ -576,9 +584,10 @@ int bioem::run()
compareRefMaps
(
iOrient
,
iConv
,
amp
,
pha
,
env
,
conv_map
,
conv_mapFFT
,
sumCONV
,
sumsquareCONV
);
double
compTime
=
0.
;
if
(
DebugOutput
>=
2
)
{
co
nst
double
co
mpTime
=
timer
.
GetCurrentElapsedTime
();
compTime
=
timer
.
GetCurrentElapsedTime
();
const
int
nShifts
=
2
*
param
.
param_device
.
maxDisplaceCenter
/
param
.
param_device
.
GridSpaceCenter
+
1
;
const
double
nFlops
=
(
double
)
RefMap
.
ntotRefMap
*
(
double
)
nShifts
*
(
double
)
nShifts
*
(((
double
)
param
.
param_device
.
NumberPixels
-
(
double
)
param
.
param_device
.
maxDisplaceCenter
/
2.
)
*
((
double
)
param
.
param_device
.
NumberPixels
-
(
double
)
param
.
param_device
.
maxDisplaceCenter
/
2.
)
*
5.
+
25.
)
/
compTime
;
...
...
@@ -588,12 +597,33 @@ int bioem::run()
printf
(
"
\t\t
Time Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)
\n
"
,
iOrient
,
iConv
,
compTime
,
nFlops
/
1000000000.
,
nGBs
/
1000000000.
,
nGBs2
/
1000000000.
,
mpi_rank
);
}
if
(
Autotuning
&&
!
stopTuning
&&
(
iConv
%
5
==
4
))
{
if
(
compTime
==
0.
)
compTime
=
timer
.
GetCurrentElapsedTime
();
if
(
best_time
==
0
||
compTime
<
best_time
)
{
best_time
=
compTime
;
best_workload
=
workload
;
}
workload
-=
5
;
if
(
workload
<
30
)
{
stopTuning
=
true
;
workload
=
best_workload
;
}
deviceFinishRun
();
rebalance
(
workload
);
deviceStartRun
();
}
}
if
(
DebugOutput
>=
1
)
{
printf
(
"
\t
Total time for projection %d: %f (rank %d)
\n
"
,
iOrient
,
timer2
.
GetCurrentElapsedTime
(),
mpi_rank
);
timer2
.
ResetStart
();
}
timer2
.
ResetStart
();
}
}
}
//deallocating fftw_complex vector
...
...
@@ -1395,7 +1425,7 @@ int bioem::deviceInit()
}
int
bioem
::
deviceStartRun
()
{
{
deviceInit
();
return
(
0
);
}
...
...
@@ -1413,3 +1443,5 @@ void bioem::free_device_host(void* ptr)
{
free
(
ptr
);
}
void
bioem
::
rebalance
(
int
workload
)
{}
bioem_cuda.cu
View file @
ad9a1e47
...
...
@@ -572,6 +572,19 @@ void bioem_cuda::free_device_host(void* ptr)
cudaFreeHost
(
ptr
);
}
void
bioem_cuda
::
rebalance
(
int
workload
)
{
if
((
workload
<
0
)
||
(
workload
>
100
)
||
(
workload
>
GPUWorkload
))
return
;
if
(
DebugOutput
>=
1
)
{
printf
(
"
\t\t
Setting GPU workload to %d%%
\n
"
,
workload
);
}
GPUWorkload
=
workload
;
maxRef
=
(
size_t
)
RefMap
.
ntotRefMap
*
(
size_t
)
GPUWorkload
/
100
;
}
bioem
*
bioem_cuda_create
()
{
int
count
;
...
...
include/bioem.h
View file @
ad9a1e47
...
...
@@ -42,6 +42,7 @@ public:
virtual
void
*
malloc_device_host
(
size_t
size
);
virtual
void
free_device_host
(
void
*
ptr
);
virtual
void
rebalance
(
int
workload
);
int
createProjection
(
int
iMap
,
mycomplex_t
*
map
);
int
calcross_cor
(
myfloat_t
*
localmap
,
myfloat_t
&
sum
,
myfloat_t
&
sumsquare
);
...
...
@@ -71,6 +72,7 @@ protected:
int
FFTAlgo
;
//Use the FFT Algorithm (Default 1)
int
DebugOutput
;
//Debug Output Level (Default 2)
int
nProjectionsAtOnce
;
//Number of projections to do at once via OpenMP (Default 1)
int
Autotuning
;
//Do the autotuning of the load-balancing between CPUs and GPUs
};
#endif
include/bioem_cuda_internal.h
View file @
ad9a1e47
...
...
@@ -33,6 +33,7 @@ public:
virtual
int
compareRefMaps
(
int
iOrient
,
int
iConv
,
myfloat_t
amp
,
myfloat_t
pha
,
myfloat_t
env
,
const
myfloat_t
*
conv_map
,
mycomplex_t
*
localmultFFT
,
myfloat_t
sumC
,
myfloat_t
sumsquareC
,
const
int
startMap
=
0
);
virtual
void
*
malloc_device_host
(
size_t
size
);
virtual
void
free_device_host
(
void
*
ptr
);
virtual
void
rebalance
(
int
workload
);
protected:
virtual
int
deviceInit
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment