diff --git a/bioem_cuda.cu b/bioem_cuda.cu index 01e6d982d87423b40c9119bcd317e6c98770da7b..9d66797e9998821e7db7c24d2210d770123fbee6 100644 --- a/bioem_cuda.cu +++ b/bioem_cuda.cu @@ -305,9 +305,7 @@ int bioem_cuda::deviceInit() gpumap->sum_RefMap = sum; gpumap->sumsquare_RefMap = sumsquare; - pProb_device = pProb; - checkCudaErrors(cudaMalloc(&pProb_device.ptr, pProb_device.get_size(RefMap.ntotRefMap, param.nTotGridAngles))); - pProb_device.set_pointers(); + checkCudaErrors(cudaMalloc(&pProb_memory, pProb_device.get_size(RefMap.ntotRefMap, param.nTotGridAngles))); for (int i = 0; i < 2; i++) { checkCudaErrors(cudaStreamCreate(&cudaStream[i])); @@ -342,7 +340,7 @@ int bioem_cuda::deviceExit() if (deviceInitialized == 0) return(0); - cudaFree(pProb_device.ptr); + cudaFree(pProb_memory); cudaFree(sum); cudaFree(sumsquare); for (int i = 0; i < 2; i++) @@ -383,9 +381,23 @@ int bioem_cuda::deviceExit() int bioem_cuda::deviceStartRun() { - maxRef = GPUWorkload >= 100 ? RefMap.ntotRefMap : ((size_t) RefMap.ntotRefMap * (size_t) GPUWorkload / 100); + if (GPUWorkload >= 100) + { + maxRef = RefMap.ntotRefMap; + pProb_host = &pProb; + } + else + { + maxRef = (size_t) RefMap.ntotRefMap * (size_t) GPUWorkload / 100; + pProb_host = new bioem_Probability; + pProb_host->init(maxRef, param.nTotGridAngles, *this); + pProb_host->copyFrom(&pProb, *this); + } - cudaMemcpyAsync(pProb_device.ptr, pProb.ptr, pProb.get_size(RefMap.ntotRefMap, param.nTotGridAngles), cudaMemcpyHostToDevice, cudaStream[0]); + pProb_device = *pProb_host; + pProb_device.ptr = pProb_memory; + pProb_device.set_pointers(); + checkCudaErrors(cudaMemcpyAsync(pProb_device.ptr, pProb_host->ptr, pProb_host->get_size(maxRef, param.nTotGridAngles), cudaMemcpyHostToDevice, cudaStream[0])); if (FFTAlgo) { @@ -420,7 +432,7 @@ int bioem_cuda::deviceStartRun() int bioem_cuda::deviceFinishRun() { if (GPUAsync) cudaStreamSynchronize(cudaStream[0]); - cudaMemcpyAsync(pProb.ptr, pProb_device.ptr, pProb.get_size(RefMap.ntotRefMap, param.nTotGridAngles), cudaMemcpyDeviceToHost, cudaStream[0]); + checkCudaErrors(cudaMemcpyAsync(pProb_host->ptr, pProb_device.ptr, pProb_host->get_size(maxRef, param.nTotGridAngles), cudaMemcpyDeviceToHost, cudaStream[0])); if (FFTAlgo) { @@ -434,6 +446,12 @@ int bioem_cuda::deviceFinishRun() if (!GPUDualStream) break; } } + cudaThreadSynchronize(); + if (GPUWorkload < 100) + { + pProb.copyFrom(pProb_host, *this); + delete[] pProb_host; + } return(0); } diff --git a/include/bioem.h b/include/bioem.h index cae10fc4254c326945c5dfb698ab80ce6bbc9599..675f0bbcc9a5c6f767c95baa84b5a3f3d9f3bdf5 100644 --- a/include/bioem.h +++ b/include/bioem.h @@ -13,6 +13,7 @@ class bioem { friend class bioem_RefMap; + friend class bioem_Probability; public: bioem(); diff --git a/include/bioem_cuda_internal.h b/include/bioem_cuda_internal.h index e3fec066c4611b0271e92b40a1e71af5469c4236..67d1206fd8e4dc184e3183401f486f3f88f76d21 100644 --- a/include/bioem_cuda_internal.h +++ b/include/bioem_cuda_internal.h @@ -34,7 +34,9 @@ protected: cudaEvent_t cudaFFTEvent[2]; bioem_RefMap_Mod* pRefMap_device_Mod; bioem_RefMap* gpumap; + bioem_Probability* pProb_host; bioem_Probability pProb_device; + void* pProb_memory; myfloat_t* pConvMap_device[2]; mycomplex_t* pRefMapsFFT; diff --git a/include/map.h b/include/map.h index 05a11ed894f01802734daf375619ed4785e7232c..fc0ce1aaf8e74c4bcadf2e4a7f4c179cec81b693 100644 --- a/include/map.h +++ b/include/map.h @@ -107,12 +107,13 @@ public: bioem_Probability_map* ptr_map; bioem_Probability_angle* ptr_angle; - size_t get_size(size_t maps, size_t angles) + static size_t get_size(size_t maps, size_t angles) { return(maps * (angles * sizeof(bioem_Probability_angle) + sizeof(bioem_Probability_map))); } void init(size_t maps, size_t angles, bioem& bio); + void copyFrom(bioem_Probability* from, bioem& bio); void set_pointers() { diff --git a/map.cpp b/map.cpp index 630caa1c7b559888b8b36ba3f30762c07ebe10f5..63d94659c7fe60a456d0cb7b58c316fff3baf81c 100644 --- a/map.cpp +++ b/map.cpp @@ -2,7 +2,7 @@ < BioEM software for Bayesian inference of Electron Microscopy images> Copyright (C) 2014 Pilar Cossio, David Rohr and Gerhard Hummer. Max Planck Institute of Biophysics, Frankfurt, Germany. - + See license statement for terms of distribution. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ @@ -72,15 +72,15 @@ int bioem_RefMap::readRefMaps(bioem_param& param) sscanf (tmpVals,"%99c",mapname); // Check for last line - strncpy (tmpm,mapname,3); + strncpy (tmpm,mapname,3); if(strcmp(tmpm,"XXX")!=0) { - indifile = mapname; + indifile = mapname; //Reading Multiple MRC read_MRC(indifile,param); } - for(int i=0;i<3;i++)mapname[i] = 'X'; - for(int i=3;i<100;i++)mapname[i] = {0}; + for(int i=0;i<3;i++)mapname[i] = 'X'; + for(int i=3;i<100;i++)mapname[i] = {0}; } cout << "\n+++++++++++++++++++++++++++++++++++++++++++ \n"; @@ -185,7 +185,7 @@ int bioem_RefMap::readRefMaps(bioem_param& param) fclose(fp); } } - + if (getenv("BIOEM_DEBUG_NMAPS")) { ntotRefMap = atoi(getenv("BIOEM_DEBUG_NMAPS")); @@ -280,9 +280,23 @@ void bioem_Probability::init(size_t maps, size_t angles, bioem& bio) set_pointers(); } +void bioem_Probability::copyFrom(bioem_Probability* from, bioem& bio) +{ + bioem_Probability_map& pProbMap = getProbMap(0); + bioem_Probability_map& pProbMapFrom = from->getProbMap(0); + memcpy(&pProbMap, &pProbMapFrom, from->nMaps * sizeof(bioem_Probability_map)); + + for (int iOrient = 0; iOrient < bio.param.nTotGridAngles; iOrient ++) + { + bioem_Probability_angle& pProbAngle = getProbAngle(0, iOrient); + bioem_Probability_angle& pProbAngleFrom = from->getProbAngle(0, iOrient); + memcpy(&pProbAngle, &pProbAngleFrom, from->nMaps * sizeof(bioem_Probability_angle)); + } +} + int bioem_RefMap::read_MRC(const char* filename,bioem_param& param) { - myfloat_t st,st2; + myfloat_t st,st2; unsigned long count; FILE *fin; float currfloat; @@ -302,7 +316,7 @@ int bioem_RefMap::read_MRC(const char* filename,bioem_param& param) n_range_viol0 = test_mrc(filename,0); n_range_viol1 = test_mrc(filename,1); - if (n_range_viol0 < n_range_viol1) { //* guess endianism + if (n_range_viol0 < n_range_viol1) { //* guess endianism swap = 0; if (n_range_viol0 > 0) { printf(" Warning: %i header field range violations detected in file %s \n", n_range_viol0,filename); @@ -396,7 +410,7 @@ int bioem_RefMap::read_MRC(const char* filename,bioem_param& param) exit(1); } else - { + { rewind (fin); for (count=0; count<256; ++count) if (read_float_empty(fin)==0) { cout << "ERROR Converting Data: " << filename; @@ -405,7 +419,7 @@ int bioem_RefMap::read_MRC(const char* filename,bioem_param& param) for (count=0; count<(unsigned long)nsymbt; ++count) if (read_char_float(&currfloat,fin)==0) { cout << "ERROR Converting Data: " << filename; - exit(1); + exit(1); } for ( int nmap = 0 ; nmap < ns ; nmap ++ ) @@ -426,14 +440,14 @@ int bioem_RefMap::read_MRC(const char* filename,bioem_param& param) st += currfloat; st2 += currfloat*currfloat; } - } + } //Normaling maps to zero mean and unit standard deviation st /= float(nr*nc); - st2 = sqrt(st2 / float(nr * nc) - st * st); + st2 = sqrt(st2 / float(nr * nc) - st * st); for ( int j = 0 ; j < nr ; j ++ ) for ( int i = 0 ; i < nc ; i ++ ){ maps[(nmap + ntotRefMap) * refMapSize + i * numPixels + j] = maps[(nmap + ntotRefMap) * refMapSize + i * numPixels + j] / st2 - st/st2; - //if(nmap+ntotRefMap==300) cout << i << " " << j << " " << nmap+ntotRefMap << " " << Ref[nmap+ntotRefMap].points[i][j] << "\n"; + //if(nmap+ntotRefMap==300) cout << i << " " << j << " " << nmap+ntotRefMap << " " << Ref[nmap+ntotRefMap].points[i][j] << "\n"; } } ntotRefMap += ns ; @@ -506,7 +520,7 @@ int bioem_RefMap::test_mrc (const char *vol_file, int swap) { exit(1); } - //* read header info + //* read header info header_ok *= read_int(&nc,fin,swap); header_ok *= read_int(&nr,fin,swap); header_ok *= read_int(&ns,fin,swap);