Commit 60fde8f9 authored by Luka Stanisic's avatar Luka Stanisic

fixing CUDA for EXCLUSIVE_PROCESS mode and improving GPU initialization

parent 92681f01
......@@ -363,19 +363,23 @@ int bioem::configure(int ac, char* av[])
printf("Time Precalculate %f\n", timer.GetCurrentElapsedTime());
timer.ResetStart();
}
if(!param.printModel)pProb.init(RefMap.ntotRefMap, param.nTotGridAngles, param.nTotCC, *this);
// ****************** Initializing pointers *********************
deviceInit();
if (DebugOutput >= 2 && mpi_rank == 0)
{
printf("Time Init Probabilities %f\n", timer.GetCurrentElapsedTime());
printf("Time Device Init %f\n", timer.GetCurrentElapsedTime());
timer.ResetStart();
}
// ****************** Initializng pointers *********************
deviceInit();
if(!param.printModel)pProb.init(RefMap.ntotRefMap, param.nTotGridAngles, param.nTotCC, *this);
if (DebugOutput >= 2 && mpi_rank == 0) printf("Time Device Init %f\n", timer.GetCurrentElapsedTime());
if (DebugOutput >= 2 && mpi_rank == 0)
{
printf("Time Init Probabilities %f\n", timer.GetCurrentElapsedTime());
timer.ResetStart();
}
return(0);
}
......
......@@ -382,41 +382,34 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t
int bioem_cuda::selectCudaDevice()
{
int count;
long long int bestDeviceSpeed = -1;
int bestDevice;
int bestDevice = 0;
cudaDeviceProp deviceProp;
/* Initializing CUDA driver API */
cuErrorCheck(cuInit(0));
/* Get number of available CUDA devices */
checkCudaErrors(cudaGetDeviceCount(&count));
if (count == 0)
{
printf("No CUDA device detected\n");
return(1);
}
for (int i = 0;i < count;i++)
/* Find the best GPU */
long long int bestDeviceSpeed = -1, deviceSpeed = -1;
for (int i = 0; i < count; i++)
{
#if CUDA_VERSION > 3010
size_t free, total;
#else
unsigned int free, total;
#endif
cuInit(0);
CUdevice tmpDevice;
cuDeviceGet(&tmpDevice, i);
CUcontext tmpContext;
cuCtxCreate(&tmpContext, 0, tmpDevice);
if(cuMemGetInfo(&free, &total)) exit(1);
cuCtxDestroy(tmpContext);
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, i));
if (DebugOutput >= 2 && mpi_rank == 0) printf("CUDA Device %2d: %s (Rev: %d.%d - Mem Avail %lld / %lld)\n", i, deviceProp.name, deviceProp.major, deviceProp.minor, (long long int) free, (long long int) deviceProp.totalGlobalMem);
long long int deviceSpeed = (long long int) deviceProp.multiProcessorCount * (long long int) deviceProp.clockRate * (long long int) deviceProp.warpSize;
cudaGetDeviceProperties(&deviceProp, i);
deviceSpeed = (long long int) deviceProp.multiProcessorCount * (long long int) deviceProp.clockRate * (long long int) deviceProp.warpSize;
if (deviceSpeed > bestDeviceSpeed)
{
bestDevice = i;
bestDeviceSpeed = deviceSpeed;
}
}
/* Get user-specified GPU choice */
if (getenv("GPUDEVICE"))
{
int device = atoi(getenv("GPUDEVICE"));
......@@ -434,13 +427,30 @@ int bioem_cuda::selectCudaDevice()
if (device < 0)
{
printf("Negative CUDA device specified: %d, invalid!\n", device);
exit(1);
}
bestDevice = device;
}
checkCudaErrors(cudaSetDevice(bestDevice));
cudaGetDeviceProperties(&deviceProp ,bestDevice);
/* Set CUDA processes to appropriate devices */
cudaGetDeviceProperties(&deviceProp, bestDevice);
if (deviceProp.computeMode == 0)
{
checkCudaErrors(cudaSetDevice(bestDevice));
}
else
{
if (DebugOutput >= 1)
{
printf("CUDA device %d is not set in DEFAULT mode, make sure that CUDA processes are pinned as planned!\n", bestDevice);
printf("Pinning process %d to CUDA device %d\n", mpi_rank, bestDevice);
}
checkCudaErrors(cudaSetDevice(bestDevice));
/* This synchronization is needed in order to detect bogus silent errors from cudaSetDevice call */
checkCudaErrors(cudaDeviceSynchronize());
}
/* Debugging information about CUDA devices used by the current process */
if (DebugOutput >= 3)
{
printf("Using CUDA Device %s with Properties:\n", deviceProp.name);
......@@ -459,13 +469,33 @@ int bioem_cuda::selectCudaDevice()
printf("memoryClockRate = %d\n", deviceProp.memoryClockRate);
printf("multiProcessorCount = %d\n", deviceProp.multiProcessorCount);
printf("textureAlignment = %lld\n", (unsigned long long int) deviceProp.textureAlignment);
printf("computeMode = %d\n", deviceProp.computeMode);
#if CUDA_VERSION > 3010
size_t free, total;
#else
unsigned int free, total;
#endif
if (deviceProp.computeMode == 0)
{
CUdevice tmpDevice;
cuErrorCheck(cuDeviceGet(&tmpDevice, bestDevice));
CUcontext tmpContext;
cuErrorCheck(cuCtxCreate(&tmpContext, 0, tmpDevice));
cuErrorCheck(cuMemGetInfo(&free, &total));
cuErrorCheck(cuCtxDestroy(tmpContext));
}
else
{
cuErrorCheck(cuMemGetInfo(&free, &total));
}
printf("free memory = %lld; total memory = %lld\n", free, total);
}
if (DebugOutput >= 1)
{
printf("BioEM for CUDA initialized (MPI Rank %d), %d GPUs found, using GPU %d\n", mpi_rank, count, bestDevice);
}
return(0);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment