Commit 093227f8 authored by David Rohr's avatar David Rohr

add check for cuda device

parent cd4d4d5d
......@@ -113,6 +113,7 @@ endif()
###Add Libraries
if (CUDA_FOUND)
cuda_add_cufft_to_target(bioEM)
target_link_libraries(bioEM ${CUDA_CUDA_LIBRARY})
endif()
target_link_libraries(bioEM -L${FFTW_LIBDIR} -lfftw3 -lfftw3f)
......
......@@ -272,9 +272,78 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map
return(0);
}
int bioem_cuda::selectCudaDevice()
{
int count;
long long int bestDeviceSpeed = -1;
int bestDevice;
cudaDeviceProp deviceProp;
checkCudaErrors(cudaGetDeviceCount(&count));
if (count == 0)
{
printf("No CUDA device detected\n");
return(1);
}
for (int i = 0;i < count;i++)
{
printf("CUDA device %d\n", i);
#if CUDA_VERSION > 3010
size_t free, total;
#else
unsigned int free, total;
#endif
cuInit(0);
CUdevice tmpDevice;
cuDeviceGet(&tmpDevice, i);
CUcontext tmpContext;
cuCtxCreate(&tmpContext, 0, tmpDevice);
if(cuMemGetInfo(&free, &total)) exit(1);
cuCtxDestroy(tmpContext);
if (DebugOutput >= 1) printf("Obtained current memory usage for device %d\n", i);
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, i));
if (DebugOutput >= 1) printf("Obtained device properties for device %d\n", i);
if (DebugOutput >= 1) printf("%2d: %s (Rev: %d.%d - Mem Avail %lld / %lld)", i, deviceProp.name, deviceProp.major, deviceProp.minor, (long long int) free, (long long int) deviceProp.totalGlobalMem);
long long int deviceSpeed = (long long int) deviceProp.multiProcessorCount * (long long int) deviceProp.clockRate * (long long int) deviceProp.warpSize;
if (deviceSpeed > bestDeviceSpeed)
{
bestDevice = i;
bestDeviceSpeed = deviceSpeed;
}
}
cudaGetDeviceProperties(&deviceProp ,bestDevice);
if (DebugOutput >= 1)
{
printf("Using CUDA Device %s with Properties:", deviceProp.name);
printf("totalGlobalMem = %lld", (unsigned long long int) deviceProp.totalGlobalMem);
printf("sharedMemPerBlock = %lld", (unsigned long long int) deviceProp.sharedMemPerBlock);
printf("regsPerBlock = %d", deviceProp.regsPerBlock);
printf("warpSize = %d", deviceProp.warpSize);
printf("memPitch = %lld", (unsigned long long int) deviceProp.memPitch);
printf("maxThreadsPerBlock = %d", deviceProp.maxThreadsPerBlock);
printf("maxThreadsDim = %d %d %d", deviceProp.maxThreadsDim[0], deviceProp.maxThreadsDim[1], deviceProp.maxThreadsDim[2]);
printf("maxGridSize = %d %d %d", deviceProp.maxGridSize[0], deviceProp.maxGridSize[1], deviceProp.maxGridSize[2]);
printf("totalConstMem = %lld", (unsigned long long int) deviceProp.totalConstMem);
printf("major = %d", deviceProp.major);
printf("minor = %d", deviceProp.minor);
printf("clockRate = %d", deviceProp.clockRate);
printf("memoryClockRate = %d", deviceProp.memoryClockRate);
printf("multiProcessorCount = %d", deviceProp.multiProcessorCount);
printf("textureAlignment = %lld", (unsigned long long int) deviceProp.textureAlignment);
}
return(0);
}
int bioem_cuda::deviceInit()
{
deviceExit();
if (FFTAlgo) GPUAlgo = 2;
......@@ -471,5 +540,14 @@ void bioem_cuda::free_device_host(void* ptr)
bioem* bioem_cuda_create()
{
int count;
if (cudaGetDeviceCount(&count) != cudaSuccess) count = 0;
if (count == 0)
{
printf("No CUDA device available, using fallback to CPU version\n");
return new bioem;
}
return new bioem_cuda;
}
......@@ -26,6 +26,9 @@ protected:
virtual int deviceStartRun();
virtual int deviceFinishRun();
int deviceExit();
private:
int selectCudaDevice();
int deviceInitialized;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment