Commit c4d1dff7 authored by Luka Stanisic's avatar Luka Stanisic
Browse files

checking CUDA driver errors

parent dd70d174
...@@ -297,13 +297,13 @@ int bioem_cuda::selectCudaDevice() ...@@ -297,13 +297,13 @@ int bioem_cuda::selectCudaDevice()
#else #else
unsigned int free, total; unsigned int free, total;
#endif #endif
cuInit(0); CU_ERROR_CHECK(cuInit(0));
CUdevice tmpDevice; CUdevice tmpDevice;
cuDeviceGet(&tmpDevice, i); CU_ERROR_CHECK(cuDeviceGet(&tmpDevice, i));
CUcontext tmpContext; CUcontext tmpContext;
cuCtxCreate(&tmpContext, 0, tmpDevice); CU_ERROR_CHECK(cuCtxCreate(&tmpContext, 0, tmpDevice));
if(cuMemGetInfo(&free, &total)) exit(1); if(cuMemGetInfo(&free, &total)) exit(1);
cuCtxDestroy(tmpContext); CU_ERROR_CHECK(cuCtxDestroy(tmpContext));
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, i)); checkCudaErrors(cudaGetDeviceProperties(&deviceProp, i));
if (DebugOutput >= 2 && mpi_rank == 0) printf("CUDA Device %2d: %s (Rev: %d.%d - Mem Avail %lld / %lld)\n", i,, deviceProp.major, deviceProp.minor, (long long int) free, (long long int) deviceProp.totalGlobalMem); if (DebugOutput >= 2 && mpi_rank == 0) printf("CUDA Device %2d: %s (Rev: %d.%d - Mem Avail %lld / %lld)\n", i,, deviceProp.major, deviceProp.minor, (long long int) free, (long long int) deviceProp.totalGlobalMem);
...@@ -72,5 +72,76 @@ private: ...@@ -72,5 +72,76 @@ private:
int maxRef; int maxRef;
}; };
/* Handing CUDA Driver errors */
/* Inspired from: */
// Expand and stringify argument
#define STRINGx(x) #x
#define STRING(x) STRINGx(x)
const char * cuGetError(CUresult result) {
switch (result) {
case CUDA_SUCCESS: return "No errors";
case CUDA_ERROR_INVALID_VALUE: return "Invalid value";
case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory";
case CUDA_ERROR_NOT_INITIALIZED: return "Driver not initialized";
case CUDA_ERROR_DEINITIALIZED: return "Driver deinitialized";
case CUDA_ERROR_PROFILER_DISABLED: return "Profiler disabled";
case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "Profiler not initialized";
case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "Profiler already started";
case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "Profiler already stopped";
case CUDA_ERROR_NO_DEVICE: return "No CUDA-capable device available";
case CUDA_ERROR_INVALID_DEVICE: return "Invalid device";
case CUDA_ERROR_INVALID_IMAGE: return "Invalid kernel image";
case CUDA_ERROR_INVALID_CONTEXT: return "Invalid context";
case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: return "Context already current";
case CUDA_ERROR_MAP_FAILED: return "Map failed";
case CUDA_ERROR_UNMAP_FAILED: return "Unmap failed";
case CUDA_ERROR_ARRAY_IS_MAPPED: return "Array is mapped";
case CUDA_ERROR_ALREADY_MAPPED: return "Already mapped";
case CUDA_ERROR_NO_BINARY_FOR_GPU: return "No binary for GPU";
case CUDA_ERROR_ALREADY_ACQUIRED: return "Already acquired";
case CUDA_ERROR_NOT_MAPPED: return "Not mapped";
case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: return "Not mapped as array";
case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "Not mapped as pointer";
case CUDA_ERROR_ECC_UNCORRECTABLE: return "Uncorrectable ECC error";
case CUDA_ERROR_UNSUPPORTED_LIMIT: return "Unsupported CUlimit";
case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "Context already in use";
case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Shared object symbol not found";
case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: return "Shared object initialization failed";
case CUDA_ERROR_OPERATING_SYSTEM: return "Operating System call failed";
case CUDA_ERROR_INVALID_HANDLE: return "Invalid handle";
case CUDA_ERROR_NOT_FOUND: return "Not found";
case CUDA_ERROR_NOT_READY: return "CUDA not ready";
case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed";
case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "Launch exceeded resources";
case CUDA_ERROR_LAUNCH_TIMEOUT: return "Launch exceeded timeout";
case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "Launch with incompatible texturing";
case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "Peer access already enabled";
case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "Peer access not enabled";
case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "Primary context active";
case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "Context is destroyed";
case CUDA_ERROR_ASSERT: return "Device assert failed";
case CUDA_ERROR_TOO_MANY_PEERS: return "Too many peers";
case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: return "Host memory already registered";
case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: return "Host memory not registered";
case CUDA_ERROR_UNKNOWN: return "Unknown error";
default: return "Unknown error code";
#define CU_ERROR_CHECK(call) \
do { \
CUresult __error__; \
if ((__error__ = (call)) != CUDA_SUCCESS) { \
printf(STRING(call), __func__, __FILE__, __LINE__, __error__, \
(const char * (*)(int))cuGetError); \
return __error__; \
} \
} while (false)
#endif #endif
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment