Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
BioEM
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
MPIBP-Hummer
BioEM
Commits
c4d1dff7
Commit
c4d1dff7
authored
Jun 16, 2017
by
Luka Stanisic
Browse files
Options
Downloads
Patches
Plain Diff
checking CUDA driver errors
parent
dd70d174
Branches
Branches containing commit
Tags
Tags containing commit
1 merge request
!2
minor fixes
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
bioem_cuda.cu
+4
-4
4 additions, 4 deletions
bioem_cuda.cu
include/bioem_cuda_internal.h
+71
-0
71 additions, 0 deletions
include/bioem_cuda_internal.h
with
75 additions
and
4 deletions
bioem_cuda.cu
+
4
−
4
View file @
c4d1dff7
...
@@ -297,13 +297,13 @@ int bioem_cuda::selectCudaDevice()
...
@@ -297,13 +297,13 @@ int bioem_cuda::selectCudaDevice()
#else
#else
unsigned
int
free
,
total
;
unsigned
int
free
,
total
;
#endif
#endif
cuInit
(
0
);
CU_ERROR_CHECK
(
cuInit
(
0
)
)
;
CUdevice
tmpDevice
;
CUdevice
tmpDevice
;
cuDeviceGet
(
&
tmpDevice
,
i
);
CU_ERROR_CHECK
(
cuDeviceGet
(
&
tmpDevice
,
i
)
)
;
CUcontext
tmpContext
;
CUcontext
tmpContext
;
cuCtxCreate
(
&
tmpContext
,
0
,
tmpDevice
);
CU_ERROR_CHECK
(
cuCtxCreate
(
&
tmpContext
,
0
,
tmpDevice
)
)
;
if
(
cuMemGetInfo
(
&
free
,
&
total
))
exit
(
1
);
if
(
cuMemGetInfo
(
&
free
,
&
total
))
exit
(
1
);
cuCtxDestroy
(
tmpContext
);
CU_ERROR_CHECK
(
cuCtxDestroy
(
tmpContext
)
)
;
checkCudaErrors
(
cudaGetDeviceProperties
(
&
deviceProp
,
i
));
checkCudaErrors
(
cudaGetDeviceProperties
(
&
deviceProp
,
i
));
if
(
DebugOutput
>=
2
&&
mpi_rank
==
0
)
printf
(
"CUDA Device %2d: %s (Rev: %d.%d - Mem Avail %lld / %lld)
\n
"
,
i
,
deviceProp
.
name
,
deviceProp
.
major
,
deviceProp
.
minor
,
(
long
long
int
)
free
,
(
long
long
int
)
deviceProp
.
totalGlobalMem
);
if
(
DebugOutput
>=
2
&&
mpi_rank
==
0
)
printf
(
"CUDA Device %2d: %s (Rev: %d.%d - Mem Avail %lld / %lld)
\n
"
,
i
,
deviceProp
.
name
,
deviceProp
.
major
,
deviceProp
.
minor
,
(
long
long
int
)
free
,
(
long
long
int
)
deviceProp
.
totalGlobalMem
);
...
...
...
...
This diff is collapsed.
Click to expand it.
include/bioem_cuda_internal.h
+
71
−
0
View file @
c4d1dff7
...
@@ -72,5 +72,76 @@ private:
...
@@ -72,5 +72,76 @@ private:
int
maxRef
;
int
maxRef
;
};
};
/* Handing CUDA Driver errors */
/* Inspired from: https://github.com/garymacindoe/cuda-cholesky */
// Expand and stringify argument
#define STRINGx(x) #x
#define STRING(x) STRINGx(x)
const
char
*
cuGetError
(
CUresult
result
)
{
switch
(
result
)
{
case
CUDA_SUCCESS
:
return
"No errors"
;
case
CUDA_ERROR_INVALID_VALUE
:
return
"Invalid value"
;
case
CUDA_ERROR_OUT_OF_MEMORY
:
return
"Out of memory"
;
case
CUDA_ERROR_NOT_INITIALIZED
:
return
"Driver not initialized"
;
case
CUDA_ERROR_DEINITIALIZED
:
return
"Driver deinitialized"
;
case
CUDA_ERROR_PROFILER_DISABLED
:
return
"Profiler disabled"
;
case
CUDA_ERROR_PROFILER_NOT_INITIALIZED
:
return
"Profiler not initialized"
;
case
CUDA_ERROR_PROFILER_ALREADY_STARTED
:
return
"Profiler already started"
;
case
CUDA_ERROR_PROFILER_ALREADY_STOPPED
:
return
"Profiler already stopped"
;
case
CUDA_ERROR_NO_DEVICE
:
return
"No CUDA-capable device available"
;
case
CUDA_ERROR_INVALID_DEVICE
:
return
"Invalid device"
;
case
CUDA_ERROR_INVALID_IMAGE
:
return
"Invalid kernel image"
;
case
CUDA_ERROR_INVALID_CONTEXT
:
return
"Invalid context"
;
case
CUDA_ERROR_CONTEXT_ALREADY_CURRENT
:
return
"Context already current"
;
case
CUDA_ERROR_MAP_FAILED
:
return
"Map failed"
;
case
CUDA_ERROR_UNMAP_FAILED
:
return
"Unmap failed"
;
case
CUDA_ERROR_ARRAY_IS_MAPPED
:
return
"Array is mapped"
;
case
CUDA_ERROR_ALREADY_MAPPED
:
return
"Already mapped"
;
case
CUDA_ERROR_NO_BINARY_FOR_GPU
:
return
"No binary for GPU"
;
case
CUDA_ERROR_ALREADY_ACQUIRED
:
return
"Already acquired"
;
case
CUDA_ERROR_NOT_MAPPED
:
return
"Not mapped"
;
case
CUDA_ERROR_NOT_MAPPED_AS_ARRAY
:
return
"Not mapped as array"
;
case
CUDA_ERROR_NOT_MAPPED_AS_POINTER
:
return
"Not mapped as pointer"
;
case
CUDA_ERROR_ECC_UNCORRECTABLE
:
return
"Uncorrectable ECC error"
;
case
CUDA_ERROR_UNSUPPORTED_LIMIT
:
return
"Unsupported CUlimit"
;
case
CUDA_ERROR_CONTEXT_ALREADY_IN_USE
:
return
"Context already in use"
;
case
CUDA_ERROR_INVALID_SOURCE
:
return
"Invalid source"
;
case
CUDA_ERROR_FILE_NOT_FOUND
:
return
"File not found"
;
case
CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND
:
return
"Shared object symbol not found"
;
case
CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
:
return
"Shared object initialization failed"
;
case
CUDA_ERROR_OPERATING_SYSTEM
:
return
"Operating System call failed"
;
case
CUDA_ERROR_INVALID_HANDLE
:
return
"Invalid handle"
;
case
CUDA_ERROR_NOT_FOUND
:
return
"Not found"
;
case
CUDA_ERROR_NOT_READY
:
return
"CUDA not ready"
;
case
CUDA_ERROR_LAUNCH_FAILED
:
return
"Launch failed"
;
case
CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES
:
return
"Launch exceeded resources"
;
case
CUDA_ERROR_LAUNCH_TIMEOUT
:
return
"Launch exceeded timeout"
;
case
CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING
:
return
"Launch with incompatible texturing"
;
case
CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED
:
return
"Peer access already enabled"
;
case
CUDA_ERROR_PEER_ACCESS_NOT_ENABLED
:
return
"Peer access not enabled"
;
case
CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE
:
return
"Primary context active"
;
case
CUDA_ERROR_CONTEXT_IS_DESTROYED
:
return
"Context is destroyed"
;
case
CUDA_ERROR_ASSERT
:
return
"Device assert failed"
;
case
CUDA_ERROR_TOO_MANY_PEERS
:
return
"Too many peers"
;
case
CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED
:
return
"Host memory already registered"
;
case
CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED
:
return
"Host memory not registered"
;
case
CUDA_ERROR_UNKNOWN
:
return
"Unknown error"
;
default:
return
"Unknown error code"
;
}
}
#define CU_ERROR_CHECK(call) \
do { \
CUresult __error__; \
if ((__error__ = (call)) != CUDA_SUCCESS) { \
printf(STRING(call), __func__, __FILE__, __LINE__, __error__, \
(const char * (*)(int))cuGetError); \
return __error__; \
} \
} while (false)
#endif
#endif
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
sign in
to comment