diff --git a/bioem.cpp b/bioem.cpp
index 3e62798d8b8e2a5b001c0e4d0b105509228f69aa..51156b14116d0d987d90fe18c165340a29956a1a 100644
--- a/bioem.cpp
+++ b/bioem.cpp
@@ -417,6 +417,7 @@ int bioem::run()
 		MPI_Reduce(tmp1, tmp3, RefMap.ntotRefMap, MY_MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
 
 		//Find MaxProb
+		MPI_Status mpistatus;
 		{	
 			int* tmpi1 = new int[RefMap.ntotRefMap];
 			int* tmpi2 = new int[RefMap.ntotRefMap];
@@ -436,7 +437,7 @@ int bioem::run()
 				{
 					if (mpi_rank == 0)
 					{
-						MPI_Recv(&pProb.getProbMap(i).max, sizeof(pProb.getProbMap(i).max), MPI_BYTE, tmpi2[i], i, MPI_COMM_WORLD, NULL);
+						MPI_Recv(&pProb.getProbMap(i).max, sizeof(pProb.getProbMap(i).max), MPI_BYTE, tmpi2[i], i, MPI_COMM_WORLD, &mpistatus);
 					}
 					else if (mpi_rank == tmpi2[i])
 					{
diff --git a/bioem_cuda.cu b/bioem_cuda.cu
index 4157725a68a4025d64097979bf811288ed2281ab..4860c28e599b352f3b45bab1372a13797b7f35e5 100644
--- a/bioem_cuda.cu
+++ b/bioem_cuda.cu
@@ -310,6 +310,27 @@ int bioem_cuda::selectCudaDevice()
 			bestDeviceSpeed = deviceSpeed;
 		}
 	}
+	if (getenv("GPUDEVICE"))
+	{
+		int device = atoi(getenv("GPUDEVICE"));
+		if (device > count)
+		{
+			printf("Invalid CUDA device specified, max device number is %d\n", count);
+			exit(1);
+		}
+#ifdef WITH_MPI
+		if (device == -1)
+		{
+			device = mpi_rank % count;
+		}
+#endif
+		if (device < 0)
+		{
+			printf("Negative CUDA device specified: %d, invalid!\n", device);
+		}
+		bestDevice = device;
+	}
+	checkCudaErrors(cudaSetDevice(bestDevice));
 
 	cudaGetDeviceProperties(&deviceProp ,bestDevice);