From 73602d4ba289c89c12aefc7edce9897bf474c3ad Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 13 Apr 2014 14:05:12 +0200 Subject: [PATCH] merge compareRefMaps functions, create calc_logpro and update_prob function to remvoe code duplication --- bioem.cpp | 120 +++++++++++++++------------------- bioem_algorithm.h | 116 +++++++++++++++++--------------- bioem_cuda.cu | 24 +++---- include/bioem.h | 8 +-- include/bioem_cuda_internal.h | 2 +- 5 files changed, 129 insertions(+), 141 deletions(-) diff --git a/bioem.cpp b/bioem.cpp index 24fadd3..b42a35d 100644 --- a/bioem.cpp +++ b/bioem.cpp @@ -269,14 +269,7 @@ int bioem::run() /***************************************************************************************/ /*** Comparing each calculated convoluted map with all experimental maps ***/ timer.ResetStart(); - if (FFTAlgo == 0) - { - compareRefMaps(iProjectionOut, iConv, conv_map); - } - else - { - compareRefMaps2(iProjectionOut, iConv,conv_mapFFT,sumCONV,sumsquareCONV); - } + compareRefMaps(iProjectionOut, iConv, conv_map, conv_mapFFT, sumCONV, sumsquareCONV); const double compTime = timer.GetCurrentElapsedTime(); const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1; @@ -363,39 +356,39 @@ int bioem::run() return(0); } -int bioem::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, const int startMap) +int bioem::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap) { -#pragma omp parallel for - for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++) + if (FFTAlgo) { - compareRefMapShifted<-1>(iRefMap,iProjectionOut,iConv,conv_map, pProb, param.param_device, RefMap); - } - return(0); -} - -int bioem::compareRefMaps2(int iOrient, int iConv, mycomplex_t* localConvFFT,myfloat_t sumC,myfloat_t sumsquareC) -{ #pragma omp parallel + { + mycomplex_t *localCCT; + myfloat_t *lCC; + localCCT= (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) *param.param_device.NumberPixels*param.param_device.NumberFFTPixels1D); + lCC= (myfloat_t *) myfftw_malloc(sizeof(myfloat_t) *param.param_device.NumberPixels*param.param_device.NumberPixels); + + const int num_threads = omp_get_num_threads(); + const int thread_id = omp_get_thread_num(); + const int mapsPerThread = (RefMap.ntotRefMap - startMap + num_threads - 1) / num_threads; + const int iStart = startMap + thread_id * mapsPerThread; + const int iEnd = min(RefMap.ntotRefMap, startMap + (thread_id + 1) * mapsPerThread); + + for (int iRefMap = iStart; iRefMap < iEnd; iRefMap ++) + { + calculateCCFFT(iRefMap,iProjectionOut, iConv, sumC,sumsquareC, localmultFFT, localCCT,lCC); + } + myfftw_free(localCCT); + myfftw_free(lCC); + } + } + else { - mycomplex_t *localCCT; - myfloat_t *lCC; - localCCT= (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) *param.param_device.NumberPixels*param.param_device.NumberFFTPixels1D); - lCC= (myfloat_t *) myfftw_malloc(sizeof(myfloat_t) *param.param_device.NumberPixels*param.param_device.NumberPixels); - - const int num_threads = omp_get_num_threads(); - const int thread_id = omp_get_thread_num(); - const int mapsPerThread = (RefMap.ntotRefMap + num_threads - 1) / num_threads; - const int iStart = thread_id * mapsPerThread; - const int iEnd = min(RefMap.ntotRefMap, (thread_id + 1) * mapsPerThread); - - for (int iRefMap = iStart; iRefMap < iEnd; iRefMap ++) +#pragma omp parallel for + for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++) { - calculateCCFFT(iRefMap,iOrient, iConv, sumC,sumsquareC, localConvFFT, localCCT,lCC); + compareRefMapShifted<-1>(iRefMap,iProjectionOut,iConv,conv_map, pProb, param.param_device, RefMap); } - myfftw_free(localCCT); - myfftw_free(lCC); } - return(0); } @@ -437,51 +430,40 @@ inline int bioem::calculateCCFFT(int iRefMap, int iOrient, int iConv, myfloat_t return (0); } -inline int bioem::calProb(int iRefMap,int iOrient, int iConv,myfloat_t sumC,myfloat_t sumsquareC, int value, int disx, int disy) +inline int bioem::calProb(int iRefMap,int iOrient, int iConv,myfloat_t sumC,myfloat_t sumsquareC, float value, int disx, int disy) { /********************************************************/ /*********** Calculates the BioEM probability ***********/ /********************************************************/ - const myfloat_t ForLogProb = (sumsquareC * param.param_device.Ntotpi - sumC * sumC); + const myfloat_t logpro = calc_logpro(param.param_device, sumC, sumsquareC, value, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]); - // Products of different cross-correlations (first element in formula) - const myfloat_t firstele = param.param_device.Ntotpi * (RefMap.sumsquare_RefMap[iRefMap] * sumsquareC - value * value) + - 2 * RefMap.sum_RefMap[iRefMap] * sumC * value - RefMap.sumsquare_RefMap[iRefMap] * sumC * sumC - RefMap.sum_RefMap[iRefMap] * RefMap.sum_RefMap[iRefMap] * sumsquareC; + //update_prob<-1>(logpro, iRefMap, iOrient, iConv, disx, disy, pProb); + //GCC is too stupid to inline properly, so the code is copied here + if(pProb[iRefMap].Constoadd < logpro) + { + pProb[iRefMap].Total = pProb[iRefMap].Total * exp(-logpro + pProb[iRefMap].Constoadd); + pProb[iRefMap].Constoadd = logpro; + } + pProb[iRefMap].Total += exp(logpro - pProb[iRefMap].Constoadd); - //******* Calculating log of Prob*********/ - // As in fortran code: logpro=(3-Ntotpi)*0.5*log(firstele/pConvMap[iOrient].ForLogProbfromConv[iConv])+(Ntotpi*0.5-2)*log(Ntotpi-2)-0.5*log(pConvMap[iOrient].ForLogProbfromConv[iConv])+0.5*log(PI)+(1-Ntotpi*0.5)*(log(2*PI)+1); - const myfloat_t logpro = (3 - param.param_device.Ntotpi) * 0.5 * log(firstele) + (param.param_device.Ntotpi * 0.5 - 2) * log((param.param_device.Ntotpi - 2) * ForLogProb); -// cout << n <<" " << firstele << " "<< logpro << "\n"; - { - /******* Summing total Probabilities *************/ - /******* Need a constant because of numerical divergence*****/ - if(pProb[iRefMap].Constoadd < logpro) - { - pProb[iRefMap].Total = pProb[iRefMap].Total * exp(-logpro + pProb[iRefMap].Constoadd); - pProb[iRefMap].Constoadd = logpro; - } - pProb[iRefMap].Total += exp(logpro - pProb[iRefMap].Constoadd); + if(pProb[iRefMap].ConstAngle[iOrient] < logpro) + { + pProb[iRefMap].forAngles[iOrient] = pProb[iRefMap].forAngles[iOrient] * exp(-logpro + pProb[iRefMap].ConstAngle[iOrient]); + pProb[iRefMap].ConstAngle[iOrient] = logpro; + } + pProb[iRefMap].forAngles[iOrient] += exp(logpro - pProb[iRefMap].ConstAngle[iOrient]); - //Summing probabilities for each orientation - if(pProb[iRefMap].ConstAngle[iOrient] < logpro) - { - pProb[iRefMap].forAngles[iOrient] = pProb[iRefMap].forAngles[iOrient] * exp(-logpro + pProb[iRefMap].ConstAngle[iOrient]); - pProb[iRefMap].ConstAngle[iOrient] = logpro; - } - pProb[iRefMap].forAngles[iOrient] += exp(logpro - pProb[iRefMap].ConstAngle[iOrient]); + if(pProb[iRefMap].max_prob < logpro) + { + pProb[iRefMap].max_prob = logpro; + pProb[iRefMap].max_prob_cent_x = disx; + pProb[iRefMap].max_prob_cent_y = disy; + pProb[iRefMap].max_prob_orient = iOrient; + pProb[iRefMap].max_prob_conv = iConv; + } - /********** Getting parameters that maximize the probability ***********/ - if(pProb[iRefMap].max_prob < logpro) - { - pProb[iRefMap].max_prob = logpro; - pProb[iRefMap].max_prob_cent_x = disx; - pProb[iRefMap].max_prob_cent_y = disy; - pProb[iRefMap].max_prob_orient = iOrient; - pProb[iRefMap].max_prob_conv = iConv; - } - } return (0); } diff --git a/bioem_algorithm.h b/bioem_algorithm.h index fdec590..e4f401b 100644 --- a/bioem_algorithm.h +++ b/bioem_algorithm.h @@ -11,6 +11,65 @@ #include #endif +template +__device__ static inline void update_prob(const myfloat_t logpro, const int iRefMap, const int iOrient, const int iConv, const int cent_x, const int cent_y, bioem_Probability* pProb, myfloat_t* buf3 = NULL, int* bufint = NULL) +{ + /******* Summing total Probabilities *************/ + /******* Need a constant because of numerical divergence*****/ + if(pProb[iRefMap].Constoadd < logpro) + { + pProb[iRefMap].Total = pProb[iRefMap].Total * exp(-logpro + pProb[iRefMap].Constoadd); + pProb[iRefMap].Constoadd = logpro; + } + + //Summing probabilities for each orientation + if(pProb[iRefMap].ConstAngle[iOrient] < logpro) + { + pProb[iRefMap].forAngles[iOrient] = pProb[iRefMap].forAngles[iOrient] * exp(-logpro + pProb[iRefMap].ConstAngle[iOrient]); + pProb[iRefMap].ConstAngle[iOrient] = logpro; + } + + if (GPUAlgo != 2) + { + pProb[iRefMap].Total += exp(logpro - pProb[iRefMap].Constoadd); + pProb[iRefMap].forAngles[iOrient] += exp(logpro - pProb[iRefMap].ConstAngle[iOrient]); + } + + /********** Getting parameters that maximize the probability ***********/ + if(pProb[iRefMap].max_prob < logpro) + { + pProb[iRefMap].max_prob = logpro; + + if (GPUAlgo == 2) + { + bufint[0] = 1; + buf3[1] = logpro; + } + else + { + pProb[iRefMap].max_prob_cent_x = cent_x; + pProb[iRefMap].max_prob_cent_y = cent_y; + } + pProb[iRefMap].max_prob_orient = iOrient; + pProb[iRefMap].max_prob_conv = iConv; + } +} + +__device__ static inline myfloat_t calc_logpro(const bioem_param_device& param, const myfloat_t sum, const myfloat_t sumsquare, const myfloat_t crossproMapConv, const myfloat_t sumref, const myfloat_t sumsquareref) +{ + // Related to Reference calculated Projection + const myfloat_t ForLogProb = (sumsquare * param.Ntotpi - sum * sum); + + // Products of different cross-correlations (first element in formula) + const myfloat_t firstele = param.Ntotpi * (sumsquareref * sumsquare-crossproMapConv * crossproMapConv) + + 2 * sumref * sum * crossproMapConv - sumsquareref * sum * sum - sumref * sumref * sumsquare; + + //******* Calculating log of Prob*********/ + // As in fortran code: logpro=(3-Ntotpi)*0.5*log(firstele/pConvMap[iOrient].ForLogProbfromConv[iConv])+(Ntotpi*0.5-2)*log(Ntotpi-2)-0.5*log(pConvMap[iOrient].ForLogProbfromConv[iConv])+0.5*log(PI)+(1-Ntotpi*0.5)*(log(2*PI)+1); + const myfloat_t logpro = (3 - param.Ntotpi) * 0.5 * log(firstele) + (param.Ntotpi * 0.5 - 2) * log((param.Ntotpi - 2) * ForLogProb); + return(logpro); +} + template __device__ static inline void compareRefMap(const int iRefMap, const int iOrient, const int iConv, const bioem_map& Mapconv, bioem_Probability* pProb, const bioem_param_device& param, const RefT& RefMap, const int cent_x, const int cent_y, const int myShift = 0, const int nShifts2 = 0, const int myRef = 0, const bool threadActive = true) @@ -99,16 +158,7 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient #endif /********** Calculating elements in BioEM Probability formula ********/ - // Related to Reference calculated Projection - const myfloat_t ForLogProb = (sumsquare * param.Ntotpi - sum * sum); - - // Products of different cross-correlations (first element in formula) - const myfloat_t firstele = param.Ntotpi * (RefMap.sumsquare_RefMap[iRefMap] * sumsquare-crossproMapConv * crossproMapConv) + - 2 * RefMap.sum_RefMap[iRefMap] * sum * crossproMapConv - RefMap.sumsquare_RefMap[iRefMap] * sum * sum - RefMap.sum_RefMap[iRefMap] * RefMap.sum_RefMap[iRefMap] * sumsquare; - - //******* Calculating log of Prob*********/ - // As in fortran code: logpro=(3-Ntotpi)*0.5*log(firstele/pConvMap[iOrient].ForLogProbfromConv[iConv])+(Ntotpi*0.5-2)*log(Ntotpi-2)-0.5*log(pConvMap[iOrient].ForLogProbfromConv[iConv])+0.5*log(PI)+(1-Ntotpi*0.5)*(log(2*PI)+1); - logpro = (3 - param.Ntotpi) * 0.5 * log(firstele) + (param.Ntotpi * 0.5 - 2) * log((param.Ntotpi - 2) * ForLogProb); + logpro = calc_logpro(param, sum, sumsquare, crossproMapConv, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]); } else { @@ -166,24 +216,7 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient if (myShift == 0 && iRefMap < RefMap.ntotRefMap) { const myfloat_t logpro_max = vbuf[myThreadIdxX]; - if(pProb[iRefMap].Constoadd < logpro_max) - { - pProb[iRefMap].Total = pProb[iRefMap].Total * exp(-logpro_max + pProb[iRefMap].Constoadd); - pProb[iRefMap].Constoadd = logpro_max; - } - if(pProb[iRefMap].ConstAngle[iOrient] < logpro_max) - { - pProb[iRefMap].forAngles[iOrient] = pProb[iRefMap].forAngles[iOrient] * exp(-logpro_max + pProb[iRefMap].ConstAngle[iOrient]); - pProb[iRefMap].ConstAngle[iOrient] = logpro_max; - } - if(pProb[iRefMap].max_prob < logpro_max) - { - pProb[iRefMap].max_prob = logpro_max; - pProb[iRefMap].max_prob_orient = iOrient; - pProb[iRefMap].max_prob_conv = iConv; - bufint[0] = 1; - buf3[1] = logpro_max; - } + update_prob(logpro_max, iRefMap, iOrient, iConv, -1, -1, pProb, buf3, bufint); } } @@ -289,32 +322,7 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient /***** Summing & Storing total/Orientation Probabilites for each map ************/ { - /******* Summing total Probabilities *************/ - /******* Need a constant because of numerical divergence*****/ - if(pProb[iRefMap].Constoadd < logpro) - { - pProb[iRefMap].Total = pProb[iRefMap].Total * exp(-logpro + pProb[iRefMap].Constoadd); - pProb[iRefMap].Constoadd = logpro; - } - pProb[iRefMap].Total += exp(logpro - pProb[iRefMap].Constoadd); - - //Summing probabilities for each orientation - if(pProb[iRefMap].ConstAngle[iOrient] < logpro) - { - pProb[iRefMap].forAngles[iOrient] = pProb[iRefMap].forAngles[iOrient] * exp(-logpro + pProb[iRefMap].ConstAngle[iOrient]); - pProb[iRefMap].ConstAngle[iOrient] = logpro; - } - pProb[iRefMap].forAngles[iOrient] += exp(logpro - pProb[iRefMap].ConstAngle[iOrient]); - - /********** Getting parameters that maximize the probability ***********/ - if(pProb[iRefMap].max_prob < logpro) - { - pProb[iRefMap].max_prob = logpro; - pProb[iRefMap].max_prob_cent_x = cent_x; - pProb[iRefMap].max_prob_cent_y = cent_y; - pProb[iRefMap].max_prob_orient = iOrient; - pProb[iRefMap].max_prob_conv = iConv; - } + update_prob<-1>(logpro, iRefMap, iOrient, iConv, cent_x, cent_y, pProb); } } diff --git a/bioem_cuda.cu b/bioem_cuda.cu index 3ec37c9..d125710 100644 --- a/bioem_cuda.cu +++ b/bioem_cuda.cu @@ -62,9 +62,9 @@ __global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iCon const int myShift = myid & (nShifts * nShifts - 1); const int cent_x = myShiftIdx * param.GridSpaceCenter - param.maxDisplaceCenter; const int cent_y = myShiftIdy * param.GridSpaceCenter - param.maxDisplaceCenter; - + const bool threadActive = myShiftIdx < nShifts && myShiftIdy < nShifts && iRefMap < maxRef; - + compareRefMap<2>(iRefMap,iOrient,iConv,*pMap, pProb, param, *RefMap, cent_x, cent_y, myShift, nShifts * nShifts, myRef, threadActive); } @@ -81,7 +81,7 @@ static inline int ilog2 (int value) static inline int ilog2(int value) {return 31 - __builtin_clz(value);} #endif -int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, const int startMap) +int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap) { if (startMap) { @@ -94,7 +94,7 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& c checkCudaErrors(cudaEventSynchronize(cudaEvent[iConv & 1])); } checkCudaErrors(cudaMemcpyAsync(pConvMap_device[iConv & 1], &conv_map, sizeof(bioem_map), cudaMemcpyHostToDevice, cudaStream)); - + if (GPUAlgo == 2) //Loop over shifts { const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1; @@ -129,7 +129,7 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& c { compareRefMap_kernel <<>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device_Mod, cent_x, cent_y, maxRef); } - } + } } else if (GPUAlgo == 0) //All shifts in one kernel { @@ -142,7 +142,7 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& c } if (GPUWorkload < 100) { - bioem::compareRefMaps(iProjectionOut, iConv, conv_map, maxRef); + bioem::compareRefMaps(iProjectionOut, iConv, conv_map, localmultFFT, sumC, sumsquareC, maxRef); } if (GPUAsync) { @@ -158,7 +158,7 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& c int bioem_cuda::deviceInit() { deviceExit(); - + checkCudaErrors(cudaStreamCreate(&cudaStream)); checkCudaErrors(cudaMalloc(&pRefMap_device, sizeof(bioem_RefMap))); checkCudaErrors(cudaMalloc(&pProb_device, sizeof(bioem_Probability) * RefMap.ntotRefMap)); @@ -168,7 +168,7 @@ int bioem_cuda::deviceInit() checkCudaErrors(cudaMalloc(&pConvMap_device[i], sizeof(bioem_map))); } pRefMap_device_Mod = (bioem_RefMap_Mod*) pRefMap_device; - + if (GPUAlgo == 0 || GPUAlgo == 1) { bioem_RefMap_Mod* RefMapGPU = new bioem_RefMap_Mod(RefMap); @@ -186,7 +186,7 @@ int bioem_cuda::deviceInit() int bioem_cuda::deviceExit() { if (deviceInitialized == 0) return(0); - + cudaStreamDestroy(cudaStream); cudaFree(pRefMap_device); cudaFree(pProb_device); @@ -196,7 +196,7 @@ int bioem_cuda::deviceExit() cudaFree(pConvMap_device); } cudaThreadExit(); - + deviceInitialized = 0; return(0); } @@ -204,7 +204,7 @@ int bioem_cuda::deviceExit() int bioem_cuda::deviceStartRun() { maxRef = GPUWorkload >= 100 ? RefMap.ntotRefMap : ((size_t) RefMap.ntotRefMap * (size_t) GPUWorkload / 100); - + cudaMemcpy(pProb_device, pProb, sizeof(bioem_Probability) * maxRef, cudaMemcpyHostToDevice); return(0); } @@ -213,7 +213,7 @@ int bioem_cuda::deviceFinishRun() { if (GPUAsync) cudaStreamSynchronize(cudaStream); cudaMemcpy(pProb, pProb_device, sizeof(bioem_Probability) * maxRef, cudaMemcpyDeviceToHost); - + return(0); } diff --git a/include/bioem.h b/include/bioem.h index a832f0b..510b7a4 100644 --- a/include/bioem.h +++ b/include/bioem.h @@ -23,14 +23,12 @@ public: int doProjections(int iMap); int createConvolutedProjectionMap(int iOreint,int iMap, mycomplex_t* lproj,bioem_map& Mapconv,mycomplex_t* localmultFFT,myfloat_t& sumC,myfloat_t& sumsquareC); - - virtual int compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, const int startMap = 0); - int compareRefMaps2(int iProjectionOut, int iConv,mycomplex_t* localmultFFT,myfloat_t sumC,myfloat_t sumsquareC); + virtual int compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap = 0); int createProjection(int iMap, mycomplex_t* map); int calcross_cor(bioem_map& localmap,myfloat_t& sum,myfloat_t& sumsquare); - int calProb(int iRefMap,int iOrient, int iConv,myfloat_t sumC,myfloat_t sumsquareC, int value, int disx, int disy); - int calculateCCFFT(int iMap, int iOrient, int iConv, myfloat_t sumC,myfloat_t sumsquareC, mycomplex_t* localConvFFT,mycomplex_t* localCCT,myfloat_t* lCC); + int calProb(int iRefMap,int iOrient, int iConv,myfloat_t sumC,myfloat_t sumsquareC, float value, int disx, int disy); + int calculateCCFFT(int iMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, mycomplex_t* localConvFFT,mycomplex_t* localCCT,myfloat_t* lCC); bioem_Probability* pProb; diff --git a/include/bioem_cuda_internal.h b/include/bioem_cuda_internal.h index 38b8e6d..c553288 100644 --- a/include/bioem_cuda_internal.h +++ b/include/bioem_cuda_internal.h @@ -16,7 +16,7 @@ public: bioem_cuda(); virtual ~bioem_cuda(); - virtual int compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, const int startMap = 0); + virtual int compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap = 0); protected: virtual int deviceInit(); -- GitLab