Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
MPIBP-Hummer
BioEM
Commits
02123bb1
Commit
02123bb1
authored
May 14, 2014
by
David Rohr
Browse files
improve async cuda transfer, improve cuda tracing
parent
ba4e992e
Changes
4
Hide whitespace changes
Inline
Side-by-side
bioem.cpp
View file @
02123bb1
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
< BioEM software for Bayesian inference of Electron Microscopy images>
< BioEM software for Bayesian inference of Electron Microscopy images>
Copyright (C) 2014 Pilar Cossio, David Rohr and Gerhard Hummer.
Copyright (C) 2014 Pilar Cossio, David Rohr and Gerhard Hummer.
Max Planck Institute of Biophysics, Frankfurt, Germany.
Max Planck Institute of Biophysics, Frankfurt, Germany.
See license statement for terms of distribution.
See license statement for terms of distribution.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
...
@@ -27,6 +27,29 @@
...
@@ -27,6 +27,29 @@
#include
"model.h"
#include
"model.h"
#include
"map.h"
#include
"map.h"
#ifdef BIOEM_USE_NVTX
#include
"nvToolsExt.h"
const
uint32_t
colors
[]
=
{
0x0000ff00
,
0x000000ff
,
0x00ffff00
,
0x00ff00ff
,
0x0000ffff
,
0x00ff0000
,
0x00ffffff
};
const
int
num_colors
=
sizeof
(
colors
)
/
sizeof
(
colors
[
0
]);
#define cuda_custom_timeslot(name,cid) { \
int color_id = cid; \
color_id = color_id%num_colors;\
nvtxEventAttributes_t eventAttrib = {0}; \
eventAttrib.version = NVTX_VERSION; \
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \
eventAttrib.colorType = NVTX_COLOR_ARGB; \
eventAttrib.color = colors[color_id]; \
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \
eventAttrib.message.ascii = name; \
nvtxRangePushEx(&eventAttrib); \
}
#define cuda_custom_timeslot_end nvtxRangePop();
#else
#define cuda_custom_timeslot(name,cid)
#define cuda_custom_timeslot_end
#endif
#include
"bioem_algorithm.h"
#include
"bioem_algorithm.h"
using
namespace
boost
;
using
namespace
boost
;
...
@@ -440,6 +463,8 @@ int bioem::createProjection(int iMap, mycomplex_t* mapFFT)
...
@@ -440,6 +463,8 @@ int bioem::createProjection(int iMap, mycomplex_t* mapFFT)
// ********************* and turns projection into Fourier space ************************
// ********************* and turns projection into Fourier space ************************
// **************************************************************************************
// **************************************************************************************
cuda_custom_timeslot
(
"Projection"
,
0
);
myfloat3_t
RotatedPointsModel
[
Model
.
nPointsModel
];
myfloat3_t
RotatedPointsModel
[
Model
.
nPointsModel
];
myfloat_t
rotmat
[
3
][
3
];
myfloat_t
rotmat
[
3
][
3
];
myfloat_t
alpha
,
gam
,
beta
;
myfloat_t
alpha
,
gam
,
beta
;
...
@@ -516,6 +541,8 @@ int bioem::createProjection(int iMap, mycomplex_t* mapFFT)
...
@@ -516,6 +541,8 @@ int bioem::createProjection(int iMap, mycomplex_t* mapFFT)
// ********** Omp Critical is necessary with FFTW*******
// ********** Omp Critical is necessary with FFTW*******
myfftw_execute_dft_r2c
(
param
.
fft_plan_r2c_forward
,
localproj
,
mapFFT
);
myfftw_execute_dft_r2c
(
param
.
fft_plan_r2c_forward
,
localproj
,
mapFFT
);
cuda_custom_timeslot_end
;
return
(
0
);
return
(
0
);
}
}
...
@@ -527,6 +554,8 @@ int bioem::createConvolutedProjectionMap(int iMap, int iConv, mycomplex_t* lproj
...
@@ -527,6 +554,8 @@ int bioem::createConvolutedProjectionMap(int iMap, int iConv, mycomplex_t* lproj
// *************** and Backtransforming it to real Space ********************************
// *************** and Backtransforming it to real Space ********************************
// **************************************************************************************
// **************************************************************************************
cuda_custom_timeslot
(
"Convolution"
,
1
);
mycomplex_t
*
tmp
=
localCCT
[
omp_get_thread_num
()];
mycomplex_t
*
tmp
=
localCCT
[
omp_get_thread_num
()];
// **** Multiplying FFTmap with corresponding kernel ****
// **** Multiplying FFTmap with corresponding kernel ****
...
@@ -559,6 +588,8 @@ int bioem::createConvolutedProjectionMap(int iMap, int iConv, mycomplex_t* lproj
...
@@ -559,6 +588,8 @@ int bioem::createConvolutedProjectionMap(int iMap, int iConv, mycomplex_t* lproj
sumC
=
sumC
/
norm2
;
sumC
=
sumC
/
norm2
;
sumsquareC
=
sumsquareC
/
norm4
;
sumsquareC
=
sumsquareC
/
norm4
;
cuda_custom_timeslot_end
;
return
(
0
);
return
(
0
);
}
}
...
...
bioem_cuda.cu
View file @
02123bb1
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
< BioEM software for Bayesian inference of Electron Microscopy images>
< BioEM software for Bayesian inference of Electron Microscopy images>
Copyright (C) 2014 Pilar Cossio, David Rohr and Gerhard Hummer.
Copyright (C) 2014 Pilar Cossio, David Rohr and Gerhard Hummer.
Max Planck Institute of Biophysics, Frankfurt, Germany.
Max Planck Institute of Biophysics, Frankfurt, Germany.
See license statement for terms of distribution.
See license statement for terms of distribution.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
...
@@ -176,7 +176,12 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map
...
@@ -176,7 +176,12 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map
if
(
FFTAlgo
)
if
(
FFTAlgo
)
{
{
memcpy
(
&
pConvMapFFT_Host
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
localmultFFT
,
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
));
memcpy
(
&
pConvMapFFT_Host
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
localmultFFT
,
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
));
checkCudaErrors
(
cudaMemcpyAsync
(
&
pConvMapFFT
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
&
pConvMapFFT_Host
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
),
cudaMemcpyHostToDevice
,
cudaStream
[
0
]));
checkCudaErrors
(
cudaMemcpyAsync
(
&
pConvMapFFT
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
&
pConvMapFFT_Host
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
),
cudaMemcpyHostToDevice
,
cudaStream
[
GPUAsync
?
2
:
0
]));
if
(
GPUAsync
)
{
checkCudaErrors
(
cudaEventRecord
(
cudaEvent
[
2
],
cudaStream
[
2
]));
checkCudaErrors
(
cudaStreamWaitEvent
(
cudaStream
[
0
],
cudaEvent
[
2
],
0
));
}
if
(
GPUDualStream
)
if
(
GPUDualStream
)
{
{
checkCudaErrors
(
cudaEventRecord
(
cudaFFTEvent
[
0
],
cudaStream
[
0
]));
checkCudaErrors
(
cudaEventRecord
(
cudaFFTEvent
[
0
],
cudaStream
[
0
]));
...
@@ -310,6 +315,11 @@ int bioem_cuda::deviceInit()
...
@@ -310,6 +315,11 @@ int bioem_cuda::deviceInit()
checkCudaErrors
(
cudaEventCreate
(
&
cudaFFTEvent
[
i
]));
checkCudaErrors
(
cudaEventCreate
(
&
cudaFFTEvent
[
i
]));
checkCudaErrors
(
cudaMalloc
(
&
pConvMap_device
[
i
],
sizeof
(
myfloat_t
)
*
RefMap
.
refMapSize
));
checkCudaErrors
(
cudaMalloc
(
&
pConvMap_device
[
i
],
sizeof
(
myfloat_t
)
*
RefMap
.
refMapSize
));
}
}
if
(
GPUAsync
)
{
checkCudaErrors
(
cudaStreamCreate
(
&
cudaStream
[
2
]));
checkCudaErrors
(
cudaEventCreate
(
&
cudaEvent
[
2
]));
}
if
(
FFTAlgo
)
if
(
FFTAlgo
)
{
{
...
@@ -358,6 +368,12 @@ int bioem_cuda::deviceExit()
...
@@ -358,6 +368,12 @@ int bioem_cuda::deviceExit()
{
{
cudaFree
(
pRefMap_device_Mod
);
cudaFree
(
pRefMap_device_Mod
);
}
}
if
(
GPUAsync
)
{
cudaStreamDestroy
(
cudaStream
[
2
]);
cudaEventDestroy
(
cudaEvent
[
2
]);
}
delete
gpumap
;
delete
gpumap
;
cudaThreadExit
();
cudaThreadExit
();
...
...
include/bioem_cuda_internal.h
View file @
02123bb1
...
@@ -29,8 +29,8 @@ protected:
...
@@ -29,8 +29,8 @@ protected:
int
deviceInitialized
;
int
deviceInitialized
;
cudaStream_t
cudaStream
[
2
];
cudaStream_t
cudaStream
[
3
];
cudaEvent_t
cudaEvent
[
2
];
cudaEvent_t
cudaEvent
[
3
];
cudaEvent_t
cudaFFTEvent
[
2
];
cudaEvent_t
cudaFFTEvent
[
2
];
bioem_RefMap_Mod
*
pRefMap_device_Mod
;
bioem_RefMap_Mod
*
pRefMap_device_Mod
;
bioem_RefMap
*
gpumap
;
bioem_RefMap
*
gpumap
;
...
...
include/defs.h
View file @
02123bb1
...
@@ -69,6 +69,7 @@ struct myfloat3_t
...
@@ -69,6 +69,7 @@ struct myfloat3_t
#define CUDA_BLOCK_COUNT 1024 * 16
#define CUDA_BLOCK_COUNT 1024 * 16
#define CUDA_MAX_SHIFT_REDUCE 1024
#define CUDA_MAX_SHIFT_REDUCE 1024
#define CUDA_FFTS_AT_ONCE 1024
#define CUDA_FFTS_AT_ONCE 1024
//#define BIOEM_USE_NVTX
static
inline
void
*
mallocchk
(
size_t
size
)
static
inline
void
*
mallocchk
(
size_t
size
)
{
{
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment