Commit 861d145b authored by Luka Stanisic's avatar Luka Stanisic

improving GPU initialization

parent d8564df0
...@@ -368,19 +368,24 @@ int bioem::configure(int ac, char* av[]) ...@@ -368,19 +368,24 @@ int bioem::configure(int ac, char* av[])
printf("Time Precalculate %f\n", timer.GetCurrentElapsedTime()); printf("Time Precalculate %f\n", timer.GetCurrentElapsedTime());
timer.ResetStart(); timer.ResetStart();
} }
if(!param.printModel)pProb.init(RefMap.ntotRefMap, param.nTotGridAngles, param.nTotCC, *this);
// ****************** Initializing pointers *********************
deviceInit();
if (DebugOutput >= 2 && mpi_rank == 0) if (DebugOutput >= 2 && mpi_rank == 0)
{ {
printf("Time Init Probabilities %f\n", timer.GetCurrentElapsedTime()); printf("Time Device Init %f\n", timer.GetCurrentElapsedTime());
timer.ResetStart(); timer.ResetStart();
} }
// ****************** Initializng pointers ********************* if(!param.printModel)pProb.init(RefMap.ntotRefMap, param.nTotGridAngles, param.nTotCC, *this);
deviceInit();
if (DebugOutput >= 2 && mpi_rank == 0) printf("Time Device Init %f\n", timer.GetCurrentElapsedTime()); if (DebugOutput >= 2 && mpi_rank == 0)
{
printf("Time Init Probabilities %f\n", timer.GetCurrentElapsedTime());
timer.ResetStart();
}
return(0); return(0);
} }
......
...@@ -389,41 +389,34 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t ...@@ -389,41 +389,34 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t
int bioem_cuda::selectCudaDevice() int bioem_cuda::selectCudaDevice()
{ {
int count; int count;
int bestDevice = 0;
long long int bestDeviceSpeed = -1;
int bestDevice;
cudaDeviceProp deviceProp; cudaDeviceProp deviceProp;
/* Initializing CUDA driver API */
cuErrorCheck(cuInit(0));
/* Get number of available CUDA devices */
checkCudaErrors(cudaGetDeviceCount(&count)); checkCudaErrors(cudaGetDeviceCount(&count));
if (count == 0) if (count == 0)
{ {
printf("No CUDA device detected\n"); printf("No CUDA device detected\n");
return(1); return(1);
} }
for (int i = 0;i < count;i++)
/* Find the best GPU */
long long int bestDeviceSpeed = -1, deviceSpeed = -1;
for (int i = 0; i < count; i++)
{ {
#if CUDA_VERSION > 3010 cudaGetDeviceProperties(&deviceProp, i);
size_t free, total; deviceSpeed = (long long int) deviceProp.multiProcessorCount * (long long int) deviceProp.clockRate * (long long int) deviceProp.warpSize;
#else
unsigned int free, total;
#endif
cuErrorCheck(cuInit(0));
CUdevice tmpDevice;
cuErrorCheck(cuDeviceGet(&tmpDevice, i));
CUcontext tmpContext;
cuErrorCheck(cuCtxCreate(&tmpContext, 0, tmpDevice));
if(cuMemGetInfo(&free, &total)) exit(1);
cuErrorCheck(cuCtxDestroy(tmpContext));
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, i));
if (DebugOutput >= 2 && mpi_rank == 0) printf("CUDA Device %2d: %s (Rev: %d.%d - Mem Avail %lld / %lld)\n", i, deviceProp.name, deviceProp.major, deviceProp.minor, (long long int) free, (long long int) deviceProp.totalGlobalMem);
long long int deviceSpeed = (long long int) deviceProp.multiProcessorCount * (long long int) deviceProp.clockRate * (long long int) deviceProp.warpSize;
if (deviceSpeed > bestDeviceSpeed) if (deviceSpeed > bestDeviceSpeed)
{ {
bestDevice = i; bestDevice = i;
bestDeviceSpeed = deviceSpeed; bestDeviceSpeed = deviceSpeed;
} }
} }
/* Get user-specified GPU choice */
if (getenv("GPUDEVICE")) if (getenv("GPUDEVICE"))
{ {
int device = atoi(getenv("GPUDEVICE")); int device = atoi(getenv("GPUDEVICE"));
...@@ -441,13 +434,30 @@ int bioem_cuda::selectCudaDevice() ...@@ -441,13 +434,30 @@ int bioem_cuda::selectCudaDevice()
if (device < 0) if (device < 0)
{ {
printf("Negative CUDA device specified: %d, invalid!\n", device); printf("Negative CUDA device specified: %d, invalid!\n", device);
exit(1);
} }
bestDevice = device; bestDevice = device;
} }
checkCudaErrors(cudaSetDevice(bestDevice));
cudaGetDeviceProperties(&deviceProp ,bestDevice); /* Set CUDA processes to appropriate devices */
cudaGetDeviceProperties(&deviceProp, bestDevice);
if (deviceProp.computeMode == 0)
{
checkCudaErrors(cudaSetDevice(bestDevice));
}
else
{
if (DebugOutput >= 1)
{
printf("CUDA device %d is not set in DEFAULT mode, make sure that CUDA processes are pinned as planned!\n", bestDevice);
printf("Pinning process %d to CUDA device %d\n", mpi_rank, bestDevice);
}
checkCudaErrors(cudaSetDevice(bestDevice));
/* This synchronization is needed in order to detect bogus silent errors from cudaSetDevice call */
checkCudaErrors(cudaDeviceSynchronize());
}
/* Debugging information about CUDA devices used by the current process */
if (DebugOutput >= 3) if (DebugOutput >= 3)
{ {
printf("Using CUDA Device %s with Properties:\n", deviceProp.name); printf("Using CUDA Device %s with Properties:\n", deviceProp.name);
...@@ -466,13 +476,33 @@ int bioem_cuda::selectCudaDevice() ...@@ -466,13 +476,33 @@ int bioem_cuda::selectCudaDevice()
printf("memoryClockRate = %d\n", deviceProp.memoryClockRate); printf("memoryClockRate = %d\n", deviceProp.memoryClockRate);
printf("multiProcessorCount = %d\n", deviceProp.multiProcessorCount); printf("multiProcessorCount = %d\n", deviceProp.multiProcessorCount);
printf("textureAlignment = %lld\n", (unsigned long long int) deviceProp.textureAlignment); printf("textureAlignment = %lld\n", (unsigned long long int) deviceProp.textureAlignment);
printf("computeMode = %d\n", deviceProp.computeMode);
#if CUDA_VERSION > 3010
size_t free, total;
#else
unsigned int free, total;
#endif
if (deviceProp.computeMode == 0)
{
CUdevice tmpDevice;
cuErrorCheck(cuDeviceGet(&tmpDevice, bestDevice));
CUcontext tmpContext;
cuErrorCheck(cuCtxCreate(&tmpContext, 0, tmpDevice));
cuErrorCheck(cuMemGetInfo(&free, &total));
cuErrorCheck(cuCtxDestroy(tmpContext));
}
else
{
cuErrorCheck(cuMemGetInfo(&free, &total));
}
printf("free memory = %lld; total memory = %lld\n", free, total);
} }
if (DebugOutput >= 1) if (DebugOutput >= 1)
{ {
printf("BioEM for CUDA initialized (MPI Rank %d), %d GPUs found, using GPU %d\n", mpi_rank, count, bestDevice); printf("BioEM for CUDA initialized (MPI Rank %d), %d GPUs found, using GPU %d\n", mpi_rank, count, bestDevice);
} }
return(0); return(0);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment