Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
MPIBP-Hummer
BioEM
Commits
a6e19b48
Commit
a6e19b48
authored
Jun 30, 2017
by
Luka Stanisic
Browse files
additional information for debugging GPU performance
parent
861d145b
Changes
1
Show whitespace changes
Inline
Side-by-side
bioem_cuda.cu
View file @
a6e19b48
...
@@ -238,37 +238,34 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t
...
@@ -238,37 +238,34 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t
}
}
float
time
;
float
time
;
#ifdef DEBUG_GPU
cudaEvent_t
start
,
stop
;
cudaEvent_t
start
,
stop
;
if
(
DebugOutput
>=
4
)
{
checkCudaErrors
(
cudaEventCreate
(
&
start
));
checkCudaErrors
(
cudaEventCreate
(
&
start
));
checkCudaErrors
(
cudaEventCreate
(
&
stop
));
checkCudaErrors
(
cudaEventCreate
(
&
stop
));
checkCudaErrors
(
cudaEventRecord
(
start
,
0
));
checkCudaErrors
(
cudaEventRecord
(
start
,
0
));
}
#endif
if
(
GPUAsync
)
if
(
GPUAsync
)
{
{
checkCudaErrors
(
cudaEventSynchronize
(
cudaEvent
[
iConv
&
1
]));
checkCudaErrors
(
cudaEventSynchronize
(
cudaEvent
[
iConv
&
1
]));
}
}
if
(
DebugOutput
>=
4
)
#ifdef DEBUG_GPU
{
checkCudaErrors
(
cudaEventRecord
(
stop
,
0
));
checkCudaErrors
(
cudaEventRecord
(
stop
,
0
));
checkCudaErrors
(
cudaEventSynchronize
(
stop
));
checkCudaErrors
(
cudaEventSynchronize
(
stop
));
checkCudaErrors
(
cudaEventElapsedTime
(
&
time
,
start
,
stop
));
checkCudaErrors
(
cudaEventElapsedTime
(
&
time
,
start
,
stop
));
printf
(
"
\t\t\t
GPU: time to synch projections %1.6f sec
\n
"
,
time
/
1000
);
printf
(
"
\t\t\t
GPU: time to synch projections %1.6f sec
\n
"
,
time
/
1000
);
checkCudaErrors
(
cudaEventRecord
(
start
,
0
));
checkCudaErrors
(
cudaEventRecord
(
start
,
0
));
}
#endif
if
(
FFTAlgo
)
if
(
FFTAlgo
)
{
{
memcpy
(
&
pConvMapFFT_Host
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
localmultFFT
,
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
));
memcpy
(
&
pConvMapFFT_Host
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
localmultFFT
,
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
));
checkCudaErrors
(
cudaMemcpyAsync
(
&
pConvMapFFT
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
&
pConvMapFFT_Host
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
),
cudaMemcpyHostToDevice
,
cudaStream
[
GPUAsync
?
2
:
0
]));
checkCudaErrors
(
cudaMemcpyAsync
(
&
pConvMapFFT
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
&
pConvMapFFT_Host
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
),
cudaMemcpyHostToDevice
,
cudaStream
[
GPUAsync
?
2
:
0
]));
if
(
DebugOutput
>=
4
)
#ifdef DEBUG_GPU
{
checkCudaErrors
(
cudaEventRecord
(
stop
,
0
));
checkCudaErrors
(
cudaEventRecord
(
stop
,
0
));
checkCudaErrors
(
cudaEventSynchronize
(
stop
));
checkCudaErrors
(
cudaEventSynchronize
(
stop
));
checkCudaErrors
(
cudaEventElapsedTime
(
&
time
,
start
,
stop
));
checkCudaErrors
(
cudaEventElapsedTime
(
&
time
,
start
,
stop
));
printf
(
"
\t\t\t
GPU: time for memcpy %1.6f sec
\n
"
,
time
/
1000
);
printf
(
"
\t\t\t
GPU: time for memcpy %1.6f sec
\n
"
,
time
/
1000
);
checkCudaErrors
(
cudaEventRecord
(
start
,
0
));
checkCudaErrors
(
cudaEventRecord
(
start
,
0
));
}
#endif
if
(
GPUAsync
)
if
(
GPUAsync
)
{
{
checkCudaErrors
(
cudaEventRecord
(
cudaEvent
[
2
],
cudaStream
[
2
]));
checkCudaErrors
(
cudaEventRecord
(
cudaEvent
[
2
],
cudaStream
[
2
]));
...
@@ -302,14 +299,13 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t
...
@@ -302,14 +299,13 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t
else
else
{
{
checkCudaErrors
(
cudaMemcpyAsync
(
pConvMap_device
[
iConv
&
1
],
conv_map
,
sizeof
(
myfloat_t
)
*
RefMap
.
refMapSize
,
cudaMemcpyHostToDevice
,
cudaStream
[
0
]));
checkCudaErrors
(
cudaMemcpyAsync
(
pConvMap_device
[
iConv
&
1
],
conv_map
,
sizeof
(
myfloat_t
)
*
RefMap
.
refMapSize
,
cudaMemcpyHostToDevice
,
cudaStream
[
0
]));
if
(
DebugOutput
>=
4
)
#ifdef DEBUG_GPU
{
checkCudaErrors
(
cudaEventRecord
(
stop
,
0
));
checkCudaErrors
(
cudaEventRecord
(
stop
,
0
));
checkCudaErrors
(
cudaEventSynchronize
(
stop
));
checkCudaErrors
(
cudaEventSynchronize
(
stop
));
checkCudaErrors
(
cudaEventElapsedTime
(
&
time
,
start
,
stop
));
checkCudaErrors
(
cudaEventElapsedTime
(
&
time
,
start
,
stop
));
printf
(
"
\t\t\t
GPU: time for memcpy %1.6f sec
\n
"
,
time
/
1000
);
printf
(
"
\t\t\t
GPU: time for memcpy %1.6f sec
\n
"
,
time
/
1000
);
checkCudaErrors
(
cudaEventRecord
(
start
,
0
)
);
checkCudaErrors
(
cudaEventRecord
(
start
,
0
)
);
}
#endif
if
(
GPUAlgo
==
2
)
//Loop over shifts
if
(
GPUAlgo
==
2
)
//Loop over shifts
{
{
const
int
nShifts
=
2
*
param
.
param_device
.
maxDisplaceCenter
/
param
.
param_device
.
GridSpaceCenter
+
1
;
const
int
nShifts
=
2
*
param
.
param_device
.
maxDisplaceCenter
/
param
.
param_device
.
GridSpaceCenter
+
1
;
...
@@ -356,25 +352,23 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t
...
@@ -356,25 +352,23 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t
exit
(
1
);
exit
(
1
);
}
}
}
}
if
(
DebugOutput
>=
4
)
#ifdef DEBUG_GPU
{
checkCudaErrors
(
cudaEventRecord
(
stop
,
0
));
checkCudaErrors
(
cudaEventRecord
(
stop
,
0
));
checkCudaErrors
(
cudaEventSynchronize
(
stop
));
checkCudaErrors
(
cudaEventSynchronize
(
stop
));
checkCudaErrors
(
cudaEventElapsedTime
(
&
time
,
start
,
stop
));
checkCudaErrors
(
cudaEventElapsedTime
(
&
time
,
start
,
stop
));
printf
(
"
\t\t\t
GPU: time to run CUDA %1.6f sec
\n
"
,
time
/
1000
);
printf
(
"
\t\t\t
GPU: time to run CUDA %1.6f sec
\n
"
,
time
/
1000
);
checkCudaErrors
(
cudaEventRecord
(
start
,
0
));
checkCudaErrors
(
cudaEventRecord
(
start
,
0
));
}
#endif
if
(
GPUWorkload
<
100
)
if
(
GPUWorkload
<
100
)
{
{
bioem
::
compareRefMaps
(
iOrient
,
iConv
,
amp
,
pha
,
env
,
conv_map
,
localmultFFT
,
sumC
,
sumsquareC
,
maxRef
);
bioem
::
compareRefMaps
(
iOrient
,
iConv
,
amp
,
pha
,
env
,
conv_map
,
localmultFFT
,
sumC
,
sumsquareC
,
maxRef
);
}
}
if
(
DebugOutput
>=
4
)
#ifdef DEBUG_GPU
{
checkCudaErrors
(
cudaEventRecord
(
stop
,
0
));
checkCudaErrors
(
cudaEventRecord
(
stop
,
0
));
checkCudaErrors
(
cudaEventSynchronize
(
stop
));
checkCudaErrors
(
cudaEventSynchronize
(
stop
));
checkCudaErrors
(
cudaEventElapsedTime
(
&
time
,
start
,
stop
));
checkCudaErrors
(
cudaEventElapsedTime
(
&
time
,
start
,
stop
));
printf
(
"
\t\t\t
GPU: time to run OMP %1.6f sec
\n
"
,
time
/
1000
);
printf
(
"
\t\t\t
GPU: time to run OMP %1.6f sec
\n
"
,
time
/
1000
);
}
#endif
if
(
GPUAsync
)
if
(
GPUAsync
)
{
{
checkCudaErrors
(
cudaEventRecord
(
cudaEvent
[
iConv
&
1
],
cudaStream
[
0
]));
checkCudaErrors
(
cudaEventRecord
(
cudaEvent
[
iConv
&
1
],
cudaStream
[
0
]));
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment