diff --git a/bioem_cuda.cu b/bioem_cuda.cu index e5ccf87bc6ec27f12d3c5a86cde00e84f59f3084..64145117c41337e51260818f62fe13459684ba05 100644 --- a/bioem_cuda.cu +++ b/bioem_cuda.cu @@ -288,7 +288,6 @@ int bioem_cuda::selectCudaDevice() } for (int i = 0;i < count;i++) { - printf("CUDA device %d\n", i); #if CUDA_VERSION > 3010 size_t free, total; #else @@ -301,11 +300,9 @@ int bioem_cuda::selectCudaDevice() cuCtxCreate(&tmpContext, 0, tmpDevice); if(cuMemGetInfo(&free, &total)) exit(1); cuCtxDestroy(tmpContext); - if (DebugOutput >= 1) printf("Obtained current memory usage for device %d\n", i); checkCudaErrors(cudaGetDeviceProperties(&deviceProp, i)); - if (DebugOutput >= 1) printf("Obtained device properties for device %d\n", i); - if (DebugOutput >= 1) printf("%2d: %s (Rev: %d.%d - Mem Avail %lld / %lld)", i, deviceProp.name, deviceProp.major, deviceProp.minor, (long long int) free, (long long int) deviceProp.totalGlobalMem); + if (DebugOutput >= 1) printf("CUDA Device %2d: %s (Rev: %d.%d - Mem Avail %lld / %lld)\n", i, deviceProp.name, deviceProp.major, deviceProp.minor, (long long int) free, (long long int) deviceProp.totalGlobalMem); long long int deviceSpeed = (long long int) deviceProp.multiProcessorCount * (long long int) deviceProp.clockRate * (long long int) deviceProp.warpSize; if (deviceSpeed > bestDeviceSpeed) { @@ -316,24 +313,24 @@ int bioem_cuda::selectCudaDevice() cudaGetDeviceProperties(&deviceProp ,bestDevice); - if (DebugOutput >= 1) + if (DebugOutput >= 2) { - printf("Using CUDA Device %s with Properties:", deviceProp.name); - printf("totalGlobalMem = %lld", (unsigned long long int) deviceProp.totalGlobalMem); - printf("sharedMemPerBlock = %lld", (unsigned long long int) deviceProp.sharedMemPerBlock); - printf("regsPerBlock = %d", deviceProp.regsPerBlock); - printf("warpSize = %d", deviceProp.warpSize); - printf("memPitch = %lld", (unsigned long long int) deviceProp.memPitch); - printf("maxThreadsPerBlock = %d", deviceProp.maxThreadsPerBlock); - printf("maxThreadsDim = %d %d %d", deviceProp.maxThreadsDim[0], deviceProp.maxThreadsDim[1], deviceProp.maxThreadsDim[2]); - printf("maxGridSize = %d %d %d", deviceProp.maxGridSize[0], deviceProp.maxGridSize[1], deviceProp.maxGridSize[2]); - printf("totalConstMem = %lld", (unsigned long long int) deviceProp.totalConstMem); - printf("major = %d", deviceProp.major); - printf("minor = %d", deviceProp.minor); - printf("clockRate = %d", deviceProp.clockRate); - printf("memoryClockRate = %d", deviceProp.memoryClockRate); - printf("multiProcessorCount = %d", deviceProp.multiProcessorCount); - printf("textureAlignment = %lld", (unsigned long long int) deviceProp.textureAlignment); + printf("Using CUDA Device %s with Properties:\n", deviceProp.name); + printf("totalGlobalMem = %lld\n", (unsigned long long int) deviceProp.totalGlobalMem); + printf("sharedMemPerBlock = %lld\n", (unsigned long long int) deviceProp.sharedMemPerBlock); + printf("regsPerBlock = %d\n", deviceProp.regsPerBlock); + printf("warpSize = %d\n", deviceProp.warpSize); + printf("memPitch = %lld\n", (unsigned long long int) deviceProp.memPitch); + printf("maxThreadsPerBlock = %d\n", deviceProp.maxThreadsPerBlock); + printf("maxThreadsDim = %d %d %d\n", deviceProp.maxThreadsDim[0], deviceProp.maxThreadsDim[1], deviceProp.maxThreadsDim[2]); + printf("maxGridSize = %d %d %d\n", deviceProp.maxGridSize[0], deviceProp.maxGridSize[1], deviceProp.maxGridSize[2]); + printf("totalConstMem = %lld\n", (unsigned long long int) deviceProp.totalConstMem); + printf("major = %d\n", deviceProp.major); + printf("minor = %d\n", deviceProp.minor); + printf("clockRate = %d\n", deviceProp.clockRate); + printf("memoryClockRate = %d\n", deviceProp.memoryClockRate); + printf("multiProcessorCount = %d\n", deviceProp.multiProcessorCount); + printf("textureAlignment = %lld\n", (unsigned long long int) deviceProp.textureAlignment); } if (DebugOutput >= 1)