Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
On Thursday, 7th July from 1 to 3 pm there will be a maintenance with a short downtime of GitLab.
Open sidebar
MPIBP-Hummer
BioEM
Commits
92681f01
Commit
92681f01
authored
Jul 03, 2017
by
Luka Stanisic
Browse files
additional GPU debugging info
parent
25c278cf
Changes
1
Hide whitespace changes
Inline
Side-by-side
bioem_cuda.cu
View file @
92681f01
...
...
@@ -67,6 +67,70 @@ static const char *cufftGetErrorStrung(cufftResult error)
return
"UNKNOWN"
;
}
/* Handing CUDA Driver errors */
#define cuErrorCheck(call) \
do { \
CUresult __error__; \
if ((__error__ = (call)) != CUDA_SUCCESS) { \
printf("CUDA Driver Error %d / %s (%s %d)\n", __error__, cuGetError(__error__),__FILE__, __LINE__); \
return __error__; \
} \
} while (false)
static
const
char
*
cuGetError
(
CUresult
result
)
{
switch
(
result
)
{
case
CUDA_SUCCESS
:
return
"No errors"
;
case
CUDA_ERROR_INVALID_VALUE
:
return
"Invalid value"
;
case
CUDA_ERROR_OUT_OF_MEMORY
:
return
"Out of memory"
;
case
CUDA_ERROR_NOT_INITIALIZED
:
return
"Driver not initialized"
;
case
CUDA_ERROR_DEINITIALIZED
:
return
"Driver deinitialized"
;
case
CUDA_ERROR_PROFILER_DISABLED
:
return
"Profiler disabled"
;
case
CUDA_ERROR_PROFILER_NOT_INITIALIZED
:
return
"Profiler not initialized"
;
case
CUDA_ERROR_PROFILER_ALREADY_STARTED
:
return
"Profiler already started"
;
case
CUDA_ERROR_PROFILER_ALREADY_STOPPED
:
return
"Profiler already stopped"
;
case
CUDA_ERROR_NO_DEVICE
:
return
"No CUDA-capable device available"
;
case
CUDA_ERROR_INVALID_DEVICE
:
return
"Invalid device"
;
case
CUDA_ERROR_INVALID_IMAGE
:
return
"Invalid kernel image"
;
case
CUDA_ERROR_INVALID_CONTEXT
:
return
"Invalid context"
;
case
CUDA_ERROR_CONTEXT_ALREADY_CURRENT
:
return
"Context already current"
;
case
CUDA_ERROR_MAP_FAILED
:
return
"Map failed"
;
case
CUDA_ERROR_UNMAP_FAILED
:
return
"Unmap failed"
;
case
CUDA_ERROR_ARRAY_IS_MAPPED
:
return
"Array is mapped"
;
case
CUDA_ERROR_ALREADY_MAPPED
:
return
"Already mapped"
;
case
CUDA_ERROR_NO_BINARY_FOR_GPU
:
return
"No binary for GPU"
;
case
CUDA_ERROR_ALREADY_ACQUIRED
:
return
"Already acquired"
;
case
CUDA_ERROR_NOT_MAPPED
:
return
"Not mapped"
;
case
CUDA_ERROR_NOT_MAPPED_AS_ARRAY
:
return
"Not mapped as array"
;
case
CUDA_ERROR_NOT_MAPPED_AS_POINTER
:
return
"Not mapped as pointer"
;
case
CUDA_ERROR_ECC_UNCORRECTABLE
:
return
"Uncorrectable ECC error"
;
case
CUDA_ERROR_UNSUPPORTED_LIMIT
:
return
"Unsupported CUlimit"
;
case
CUDA_ERROR_CONTEXT_ALREADY_IN_USE
:
return
"Context already in use"
;
case
CUDA_ERROR_INVALID_SOURCE
:
return
"Invalid source"
;
case
CUDA_ERROR_FILE_NOT_FOUND
:
return
"File not found"
;
case
CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND
:
return
"Shared object symbol not found"
;
case
CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
:
return
"Shared object initialization failed"
;
case
CUDA_ERROR_OPERATING_SYSTEM
:
return
"Operating System call failed"
;
case
CUDA_ERROR_INVALID_HANDLE
:
return
"Invalid handle"
;
case
CUDA_ERROR_NOT_FOUND
:
return
"Not found"
;
case
CUDA_ERROR_NOT_READY
:
return
"CUDA not ready"
;
case
CUDA_ERROR_LAUNCH_FAILED
:
return
"Launch failed"
;
case
CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES
:
return
"Launch exceeded resources"
;
case
CUDA_ERROR_LAUNCH_TIMEOUT
:
return
"Launch exceeded timeout"
;
case
CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING
:
return
"Launch with incompatible texturing"
;
case
CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED
:
return
"Peer access already enabled"
;
case
CUDA_ERROR_PEER_ACCESS_NOT_ENABLED
:
return
"Peer access not enabled"
;
case
CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE
:
return
"Primary context active"
;
case
CUDA_ERROR_CONTEXT_IS_DESTROYED
:
return
"Context is destroyed"
;
case
CUDA_ERROR_ASSERT
:
return
"Device assert failed"
;
case
CUDA_ERROR_TOO_MANY_PEERS
:
return
"Too many peers"
;
case
CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED
:
return
"Host memory already registered"
;
case
CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED
:
return
"Host memory not registered"
;
case
CUDA_ERROR_UNKNOWN
:
return
"Unknown error"
;
default:
return
"Unknown error code"
;
}
}
bioem_cuda
::
bioem_cuda
()
{
deviceInitialized
=
0
;
...
...
@@ -172,15 +236,35 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t
cout
<<
"Error startMap not implemented for GPU Code
\n
"
;
exit
(
1
);
}
#ifdef DEBUG_GPU
float
time
;
cudaEvent_t
start
,
stop
;
checkCudaErrors
(
cudaEventCreate
(
&
start
));
checkCudaErrors
(
cudaEventCreate
(
&
stop
));
checkCudaErrors
(
cudaEventRecord
(
start
,
0
));
#endif
if
(
GPUAsync
)
{
checkCudaErrors
(
cudaEventSynchronize
(
cudaEvent
[
iConv
&
1
]));
}
#ifdef DEBUG_GPU
checkCudaErrors
(
cudaEventRecord
(
stop
,
0
));
checkCudaErrors
(
cudaEventSynchronize
(
stop
));
checkCudaErrors
(
cudaEventElapsedTime
(
&
time
,
start
,
stop
));
printf
(
"
\t\t\t
GPU: time to synch projections %1.6f sec
\n
"
,
time
/
1000
);
checkCudaErrors
(
cudaEventRecord
(
start
,
0
));
#endif
if
(
FFTAlgo
)
{
memcpy
(
&
pConvMapFFT_Host
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
localmultFFT
,
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
));
checkCudaErrors
(
cudaMemcpyAsync
(
&
pConvMapFFT
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
&
pConvMapFFT_Host
[(
iConv
&
1
)
*
param
.
FFTMapSize
],
param
.
FFTMapSize
*
sizeof
(
mycomplex_t
),
cudaMemcpyHostToDevice
,
cudaStream
[
GPUAsync
?
2
:
0
]));
#ifdef DEBUG_GPU
checkCudaErrors
(
cudaEventRecord
(
stop
,
0
));
checkCudaErrors
(
cudaEventSynchronize
(
stop
));
checkCudaErrors
(
cudaEventElapsedTime
(
&
time
,
start
,
stop
));
printf
(
"
\t\t\t
GPU: time for memcpy %1.6f sec
\n
"
,
time
/
1000
);
checkCudaErrors
(
cudaEventRecord
(
start
,
0
));
#endif
if
(
GPUAsync
)
{
checkCudaErrors
(
cudaEventRecord
(
cudaEvent
[
2
],
cudaStream
[
2
]));
...
...
@@ -204,7 +288,7 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t
}
cuDoRefMapsFFT
<<<
divup
(
num
,
CUDA_THREAD_COUNT
),
CUDA_THREAD_COUNT
,
0
,
cudaStream
[
j
&
1
]
>>>
(
iOrient
,
iConv
,
amp
,
pha
,
env
,
pFFTtmp
[
j
&
1
],
sumC
,
sumsquareC
,
pProb_device
,
param
.
param_device
,
*
gpumap
,
num
,
i
);
}
checkCudaErrors
(
cuda
Ge
tLastError
());
checkCudaErrors
(
cuda
PeekA
tLastError
());
if
(
GPUDualStream
)
{
checkCudaErrors
(
cudaEventRecord
(
cudaFFTEvent
[
1
],
cudaStream
[
1
]));
...
...
@@ -214,7 +298,13 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t
else
{
checkCudaErrors
(
cudaMemcpyAsync
(
pConvMap_device
[
iConv
&
1
],
conv_map
,
sizeof
(
myfloat_t
)
*
RefMap
.
refMapSize
,
cudaMemcpyHostToDevice
,
cudaStream
[
0
]));
#ifdef DEBUG_GPU
checkCudaErrors
(
cudaEventRecord
(
stop
,
0
));
checkCudaErrors
(
cudaEventSynchronize
(
stop
));
checkCudaErrors
(
cudaEventElapsedTime
(
&
time
,
start
,
stop
));
printf
(
"
\t\t\t
GPU: time for memcpy %1.6f sec
\n
"
,
time
/
1000
);
checkCudaErrors
(
cudaEventRecord
(
start
,
0
)
);
#endif
if
(
GPUAlgo
==
2
)
//Loop over shifts
{
const
int
nShifts
=
2
*
param
.
param_device
.
maxDisplaceCenter
/
param
.
param_device
.
GridSpaceCenter
+
1
;
...
...
@@ -261,10 +351,23 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t
exit
(
1
);
}
}
#ifdef DEBUG_GPU
checkCudaErrors
(
cudaEventRecord
(
stop
,
0
));
checkCudaErrors
(
cudaEventSynchronize
(
stop
));
checkCudaErrors
(
cudaEventElapsedTime
(
&
time
,
start
,
stop
));
printf
(
"
\t\t\t
GPU: time to run CUDA %1.6f sec
\n
"
,
time
/
1000
);
checkCudaErrors
(
cudaEventRecord
(
start
,
0
));
#endif
if
(
GPUWorkload
<
100
)
{
bioem
::
compareRefMaps
(
iOrient
,
iConv
,
amp
,
pha
,
env
,
conv_map
,
localmultFFT
,
sumC
,
sumsquareC
,
maxRef
);
}
#ifdef DEBUG_GPU
checkCudaErrors
(
cudaEventRecord
(
stop
,
0
));
checkCudaErrors
(
cudaEventSynchronize
(
stop
));
checkCudaErrors
(
cudaEventElapsedTime
(
&
time
,
start
,
stop
));
printf
(
"
\t\t\t
GPU: time to run OMP %1.6f sec
\n
"
,
time
/
1000
);
#endif
if
(
GPUAsync
)
{
checkCudaErrors
(
cudaEventRecord
(
cudaEvent
[
iConv
&
1
],
cudaStream
[
0
]));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment