Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
MPIBP-Hummer
BioEM
Commits
02123bb1
Commit
02123bb1
authored
May 14, 2014
by
David Rohr
Browse files
improve async cuda transfer, improve cuda tracing
parent
ba4e992e
Changes
4
Hide whitespace changes
Inline
Side-by-side
bioem.cpp
View file @
02123bb1
...
...
@@ -2,7 +2,7 @@
< BioEM software for Bayesian inference of Electron Microscopy images>
Copyright (C) 2014 Pilar Cossio, David Rohr and Gerhard Hummer.
Max Planck Institute of Biophysics, Frankfurt, Germany.
See license statement for terms of distribution.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
...
...
@@ -27,6 +27,29 @@
#include
"model.h"
#include
"map.h"
#ifdef BIOEM_USE_NVTX
#include
"nvToolsExt.h"
const
uint32_t
colors
[]
=
{
0x0000ff00
,
0x000000ff
,
0x00ffff00
,
0x00ff00ff
,
0x0000ffff
,
0x00ff0000
,
0x00ffffff
};
const
int
num_colors
=
sizeof
(
colors
)
/
sizeof
(
colors
[
0
]);
#define cuda_custom_timeslot(name,cid) { \
int color_id = cid; \
color_id = color_id%num_colors;\
nvtxEventAttributes_t eventAttrib = {0}; \
eventAttrib.version = NVTX_VERSION; \
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \
eventAttrib.colorType = NVTX_COLOR_ARGB; \
eventAttrib.color = colors[color_id]; \
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \
eventAttrib.message.ascii = name; \
nvtxRangePushEx(&eventAttrib); \
}
#define cuda_custom_timeslot_end nvtxRangePop();
#else
#define cuda_custom_timeslot(name,cid)
#define cuda_custom_timeslot_end
#endif
#include
"bioem_algorithm.h"
using
namespace
boost
;
...
...
@@ -440,6 +463,8 @@ int bioem::createProjection(int iMap, mycomplex_t* mapFFT)
// ********************* and turns projection into Fourier space ************************
// **************************************************************************************
cuda_custom_timeslot
(
"Projection"
,
0
);
myfloat3_t
RotatedPointsModel
[
Model
.
nPointsModel
];
myfloat_t
rotmat
[
3
][
3
];
myfloat_t
alpha
,
gam
,
beta
;
...
...
@@ -516,6 +541,8 @@ int bioem::createProjection(int iMap, mycomplex_t* mapFFT)
// ********** Omp Critical is necessary with FFTW*******
myfftw_execute_dft_r2c
(
param
.
fft_plan_r2c_forward
,
localproj
,
mapFFT
);
cuda_custom_timeslot_end
;
return
(
0
);
}
...
...
@@ -527,6 +554,8 @@ int bioem::createConvolutedProjectionMap(int iMap, int iConv, mycomplex_t* lproj
// *************** and Backtransforming it to real Space ********************************
// **************************************************************************************
cuda_custom_timeslot
(
"Convolution"
,
1
);
mycomplex_t
*
tmp
=
localCCT
[
omp_get_thread_num
()];
// **** Multiplying FFTmap with corresponding kernel ****
...
...
@@ -559,6 +588,8 @@ int bioem::createConvolutedProjectionMap(int iMap, int iConv, mycomplex_t* lproj
sumC
=
sumC
/
norm2
;
sumsquareC
=
sumsquareC
/
norm4
;
cuda_custom_timeslot_end
;
return
(
0
);
}
...
...
bioem_cuda.cu
View file @
02123bb1
...
...
@@ -2,7 +2,7 @@
< BioEM software for Bayesian inference of Electron Microscopy images>
Copyright (C) 2014 Pilar Cossio, David Rohr and Gerhard Hummer.
Max Planck Institute of Biophysics, Frankfurt, Germany.
See license statement for terms of distribution.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
...
...
@@ -176,7 +176,12 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map
if
(
FFTAlgo
)
{
memcpy
(
&
pConvMapFFT_Host
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
localmultFFT
,
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
));
checkCudaErrors
(
cudaMemcpyAsync
(
&
pConvMapFFT
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
&
pConvMapFFT_Host
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
),
cudaMemcpyHostToDevice
,
cudaStream
[
0
]));
checkCudaErrors
(
cudaMemcpyAsync
(
&
pConvMapFFT
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
&
pConvMapFFT_Host
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
),
cudaMemcpyHostToDevice
,
cudaStream
[
GPUAsync
?
2
:
0
]));
if
(
GPUAsync
)
{
checkCudaErrors
(
cudaEventRecord
(
cudaEvent
[
2
],
cudaStream
[
2
]));
checkCudaErrors
(
cudaStreamWaitEvent
(
cudaStream
[
0
],
cudaEvent
[
2
],
0
));
}
if
(
GPUDualStream
)
{
checkCudaErrors
(
cudaEventRecord
(
cudaFFTEvent
[
0
],
cudaStream
[
0
]));
...
...
@@ -310,6 +315,11 @@ int bioem_cuda::deviceInit()
checkCudaErrors
(
cudaEventCreate
(
&
cudaFFTEvent
[
i
]));
checkCudaErrors
(
cudaMalloc
(
&
pConvMap_device
[
i
],
sizeof
(
myfloat_t
)
*
RefMap
.
refMapSize
));
}
if
(
GPUAsync
)
{
checkCudaErrors
(
cudaStreamCreate
(
&
cudaStream
[
2
]));
checkCudaErrors
(
cudaEventCreate
(
&
cudaEvent
[
2
]));
}
if
(
FFTAlgo
)
{
...
...
@@ -358,6 +368,12 @@ int bioem_cuda::deviceExit()
{
cudaFree
(
pRefMap_device_Mod
);
}
if
(
GPUAsync
)
{
cudaStreamDestroy
(
cudaStream
[
2
]);
cudaEventDestroy
(
cudaEvent
[
2
]);
}
delete
gpumap
;
cudaThreadExit
();
...
...
include/bioem_cuda_internal.h
View file @
02123bb1
...
...
@@ -29,8 +29,8 @@ protected:
int
deviceInitialized
;
cudaStream_t
cudaStream
[
2
];
cudaEvent_t
cudaEvent
[
2
];
cudaStream_t
cudaStream
[
3
];
cudaEvent_t
cudaEvent
[
3
];
cudaEvent_t
cudaFFTEvent
[
2
];
bioem_RefMap_Mod
*
pRefMap_device_Mod
;
bioem_RefMap
*
gpumap
;
...
...
include/defs.h
View file @
02123bb1
...
...
@@ -69,6 +69,7 @@ struct myfloat3_t
#define CUDA_BLOCK_COUNT 1024 * 16
#define CUDA_MAX_SHIFT_REDUCE 1024
#define CUDA_FFTS_AT_ONCE 1024
//#define BIOEM_USE_NVTX
static
inline
void
*
mallocchk
(
size_t
size
)
{
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment