diff --git a/bioem.cpp b/bioem.cpp index cbe2871af3ac7c01687a09fc5002ea441374442c..879023beece231e5e937318df1d5b17a6e622d02 100644 --- a/bioem.cpp +++ b/bioem.cpp @@ -18,10 +18,8 @@ #include "model.h" #include "map.h" - #include "bioem_algorithm.h" - using namespace boost; namespace po = boost::program_options; @@ -170,11 +168,20 @@ int bioem::configure(int ac, char* av[]) param.nTotCTFs = atoi(getenv("BIOEM_DEBUG_BREAK")); } + pProb.init(RefMap.ntotRefMap, param.nTotGridAngles); + deviceInit(); return(0); } +void bioem::cleanup() +{ + //Deleting allocated pointers + free(pProb.ptr); + RefMap.freePointers(); +} + int bioem::precalculate() { // ************************************************************************************** @@ -184,32 +191,15 @@ int bioem::precalculate() // Generating Grids of orientations param.CalculateGridsParam(); - myfloat_t sum, sumsquare; - - //Precalculating cross-correlations of maps - for (int iRefMap = 0; iRefMap < RefMap.ntotRefMap ; iRefMap++) - { - calcross_cor(RefMap.getmap(iRefMap), sum, sumsquare); - //Storing Crosscorrelations in Map class - RefMap.sum_RefMap[iRefMap] = sum; - RefMap.sumsquare_RefMap[iRefMap] = sumsquare; - } - // Precalculating CTF Kernels stored in class Param param.CalculateRefCTF(); - // Precalculating Maps in Fourier space - if (FFTAlgo) - { - RefMap.PreCalculateMapsFFT(param); - free(RefMap.maps); - RefMap.maps = NULL; - } + //Precalculate Maps + RefMap.precalculate(param, *this); return(0); } - int bioem::run() { // ************************************************************************************** @@ -218,23 +208,36 @@ int bioem::run() // **** If we want to control the number of threads -> omp_set_num_threads(XX); ****** // ****************** Declarying class of Probability Pointer ************************* - pProb = new bioem_Probability[RefMap.ntotRefMap]; - printf("\tInitializing\n"); + printf("\tInitializing Probabilities\n"); // Inizialzing Probabilites to zero and constant to -Infinity for (int iRefMap = 0; iRefMap < RefMap.ntotRefMap; iRefMap ++) { - pProb[iRefMap].Total = 0.0; - pProb[iRefMap].Constoadd = -9999999; - pProb[iRefMap].max_prob = -9999999; + bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap); + + pProbMap.Total = 0.0; + pProbMap.Constoadd = -9999999; + pProbMap.max_prob = -9999999; for (int iOrient = 0; iOrient < param.nTotGridAngles; iOrient ++) { - pProb[iRefMap].forAngles[iOrient] = 0.0; - pProb[iRefMap].ConstAngle[iOrient] = -99999999; + bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient); + + pProbAngle.forAngles = 0.0; + pProbAngle.ConstAngle = -99999999; } } // ************************************************************************************** deviceStartRun(); + { + const int count = omp_get_max_threads(); + localCCT = new mycomplex_t*[count]; + lCC = new myfloat_t*[count]; + for (int i = 0;i < count;i++) + { + localCCT[i] = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D); + lCC[i] = (myfloat_t *) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels); + } + } // ******************************** MAIN CYCLE ****************************************** @@ -252,29 +255,29 @@ int bioem::run() printf("\tMain Loop (GridAngles %d, CTFs %d, RefMaps %d, Shifts (%d/%d)²), Pixels %d²\n", param.nTotGridAngles, param.nTotCTFs, RefMap.ntotRefMap, 2 * param.param_device.maxDisplaceCenter + param.param_device.GridSpaceCenter, param.param_device.GridSpaceCenter, param.param_device.NumberPixels); printf("\tInner Loop Count (%d %d %d) %lld\n", param.param_device.maxDisplaceCenter, param.param_device.GridSpaceCenter, param.param_device.NumberPixels, (long long int) (param.param_device.NumberPixels * param.param_device.NumberPixels * (2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1) * (2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1))); - for (int iProjectionOut = 0; iProjectionOut < param.nTotGridAngles; iProjectionOut++) + for (int iOrient = 0; iOrient < param.nTotGridAngles; iOrient++) { // *************************************************************************************** // ***** Creating Projection for given orientation and transforming to Fourier space ***** timer.ResetStart(); - createProjection(iProjectionOut, proj_mapFFT); - printf("Time Projection %d: %f\n", iProjectionOut, timer.GetCurrentElapsedTime()); + createProjection(iOrient, proj_mapFFT); + printf("Time Projection %d: %f\n", iOrient, timer.GetCurrentElapsedTime()); // *************************************************************************************** // ***** **** Internal Loop over convolutions **** ***** for (int iConv = 0; iConv < param.nTotCTFs; iConv++) { - printf("\t\tConvolution %d %d\n", iProjectionOut, iConv); + printf("\t\tConvolution %d %d\n", iOrient, iConv); // *** Calculating convolutions of projection map and crosscorrelations *** timer.ResetStart(); - createConvolutedProjectionMap(iProjectionOut, iConv, proj_mapFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV); - printf("Time Convolution %d %d: %f\n", iProjectionOut, iConv, timer.GetCurrentElapsedTime()); + createConvolutedProjectionMap(iOrient, iConv, proj_mapFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV); + printf("Time Convolution %d %d: %f\n", iOrient, iConv, timer.GetCurrentElapsedTime()); // *************************************************************************************** // *** Comparing each calculated convoluted map with all experimental maps *** timer.ResetStart(); - compareRefMaps(iProjectionOut, iConv, conv_map, conv_mapFFT, sumCONV, sumsquareCONV); + compareRefMaps(iOrient, iConv, conv_map, conv_mapFFT, sumCONV, sumsquareCONV); const double compTime = timer.GetCurrentElapsedTime(); const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1; @@ -284,7 +287,7 @@ int bioem::run() (((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 2. + 8.) * (double) sizeof(myfloat_t) / compTime; const double nGBs2 = (double) RefMap.ntotRefMap * ((double) param.param_device.NumberPixels * (double) param.param_device.NumberPixels + 8.) * (double) sizeof(myfloat_t) / compTime; - printf("Time Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s)\n", iProjectionOut, iConv, compTime, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000.); + printf("Time Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s)\n", iOrient, iConv, compTime, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000.); } } //deallocating fftw_complex vector @@ -293,6 +296,16 @@ int bioem::run() delete[] conv_map; deviceFinishRun(); + { + const int count = omp_get_max_threads(); + for (int i = 0;i < count;i++) + { + myfftw_free(localCCT[i]); + myfftw_free(lCC[i]); + } + delete[] localCCT; + delete[] lCC; + } // ************* Writing Out Probabilities *************** @@ -305,34 +318,36 @@ int bioem::run() ofstream outputProbFile; outputProbFile.open ("Output_Probabilities"); - for (int iRefMap = 0; iRefMap < RefMap.ntotRefMap; iRefMap ++) { // **** Total Probability *** - outputProbFile << "RefMap " << iRefMap << " Probability " << log(pProb[iRefMap].Total) + pProb[iRefMap].Constoadd + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu) << " Constant " << pProb[iRefMap].Constoadd << "\n"; + bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap); + + outputProbFile << "RefMap " << iRefMap << " Probability " << log(pProbMap.Total) + pProbMap.Constoadd + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu) << " Constant " << pProbMap.Constoadd << "\n"; outputProbFile << "RefMap " << iRefMap << " Maximizing Param: "; // *** Param that maximize probability**** - outputProbFile << (pProb[iRefMap].max_prob + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5) * (log(2 * M_PI) + 1) + log(param.param_device.volu)) << " "; - outputProbFile << param.angles[pProb[iRefMap].max_prob_orient].pos[0] << " "; - outputProbFile << param.angles[pProb[iRefMap].max_prob_orient].pos[1] << " "; - outputProbFile << param.angles[pProb[iRefMap].max_prob_orient].pos[2] << " "; - outputProbFile << param.CtfParam[pProb[iRefMap].max_prob_conv].pos[0] << " "; - outputProbFile << param.CtfParam[pProb[iRefMap].max_prob_conv].pos[1] << " "; - outputProbFile << param.CtfParam[pProb[iRefMap].max_prob_conv].pos[2] << " "; - outputProbFile << pProb[iRefMap].max_prob_cent_x << " "; - outputProbFile << pProb[iRefMap].max_prob_cent_y; + outputProbFile << (pProbMap.max_prob + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5) * (log(2 * M_PI) + 1) + log(param.param_device.volu)) << " "; + outputProbFile << param.angles[pProbMap.max_prob_orient].pos[0] << " "; + outputProbFile << param.angles[pProbMap.max_prob_orient].pos[1] << " "; + outputProbFile << param.angles[pProbMap.max_prob_orient].pos[2] << " "; + outputProbFile << param.CtfParam[pProbMap.max_prob_conv].pos[0] << " "; + outputProbFile << param.CtfParam[pProbMap.max_prob_conv].pos[1] << " "; + outputProbFile << param.CtfParam[pProbMap.max_prob_conv].pos[2] << " "; + outputProbFile << pProbMap.max_prob_cent_x << " "; + outputProbFile << pProbMap.max_prob_cent_y; outputProbFile << "\n"; // *** For individual files*** //angProbfile.open ("ANG_PROB_"iRefMap); if(param.writeAngles) { - for (int iProjectionOut = 0; iProjectionOut < param.nTotGridAngles; iProjectionOut++) + for (int iOrient = 0; iOrient < param.nTotGridAngles; iOrient++) { - angProbfile << " " << iRefMap << " " << param.angles[iProjectionOut].pos[0] << " " << param.angles[iProjectionOut].pos[1] << " " << param.angles[iProjectionOut].pos[2] << " " << log(pProb[iRefMap].forAngles[iProjectionOut]) + pProb[iRefMap].ConstAngle[iProjectionOut] + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu) << " " << log(param.param_device.volu) << "\n"; + bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient); + angProbfile << " " << iRefMap << " " << param.angles[iOrient].pos[0] << " " << param.angles[iOrient].pos[1] << " " << param.angles[iOrient].pos[2] << " " << log(pProbAngle.forAngles) + pProbAngle.ConstAngle + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu) << " " << log(param.param_device.volu) << "\n"; } } } @@ -340,25 +355,10 @@ int bioem::run() angProbfile.close(); outputProbFile.close(); - //Deleting allocated pointers - - if (pProb) - { - delete[] pProb; - pProb = NULL; - } - - if (param.refCTF) - { - delete[] param.refCTF; - param.refCTF = NULL; - } - - RefMap.freePointers(); return(0); } -int bioem::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap) +int bioem::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap) { if (FFTAlgo) { @@ -366,7 +366,7 @@ int bioem::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* conv_m for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++) { const int num = omp_get_thread_num(); - calculateCCFFT(iRefMap, iProjectionOut, iConv, sumC, sumsquareC, localmultFFT, localCCT[num], lCC[num]); + calculateCCFFT(iRefMap, iOrient, iConv, sumC, sumsquareC, localmultFFT, localCCT[num], lCC[num]); } } else @@ -374,7 +374,7 @@ int bioem::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* conv_m #pragma omp parallel for for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++) { - compareRefMapShifted < -1 > (iRefMap, iProjectionOut, iConv, conv_map, pProb, param.param_device, RefMap); + compareRefMapShifted < -1 > (iRefMap, iOrient, iConv, conv_map, pProb, param.param_device, RefMap); } } return(0); @@ -416,7 +416,6 @@ int bioem::createProjection(int iMap, mycomplex_t* mapFFT) // **** To see how things are going: cout << "Id " << omp_get_thread_num() << " Angs: " << alpha << " " << beta << " " << gam << "\n"; *** // ********** Creat Rotation with pre-defiend grid of orientations********** - rotmat[0][0] = cos(gam) * cos(alpha) - cos(beta) * sin(alpha) * sin(gam); rotmat[0][1] = cos(gam) * sin(alpha) + cos(beta) * cos(alpha) * sin(gam); rotmat[0][2] = sin(gam) * sin(beta); @@ -550,32 +549,10 @@ int bioem::deviceInit() int bioem::deviceStartRun() { - if (FFTAlgo) - { - const int count = omp_get_max_threads(); - localCCT = new mycomplex_t*[count]; - lCC = new myfloat_t*[count]; - for (int i = 0;i < count;i++) - { - localCCT[i] = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D); - lCC[i] = (myfloat_t *) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels); - } - } return(0); } int bioem::deviceFinishRun() { - if (FFTAlgo) - { - const int count = omp_get_max_threads(); - for (int i = 0;i < count;i++) - { - myfftw_free(localCCT[i]); - myfftw_free(lCC[i]); - } - delete[] localCCT; - delete[] lCC; - } return(0); } diff --git a/bioem_algorithm.h b/bioem_algorithm.h index 2bd499f57f713495673e70ead3cf1bea15abc9fb..58baf794e81051a7fa153e5961fc15efce2934d1 100644 --- a/bioem_algorithm.h +++ b/bioem_algorithm.h @@ -12,33 +12,36 @@ #endif template <int GPUAlgo> -__device__ static inline void update_prob(const myfloat_t logpro, const int iRefMap, const int iOrient, const int iConv, const int cent_x, const int cent_y, bioem_Probability* pProb, myfloat_t* buf3 = NULL, int* bufint = NULL) +__device__ static inline void update_prob(const myfloat_t logpro, const int iRefMap, const int iOrient, const int iConv, const int cent_x, const int cent_y, bioem_Probability& pProb, myfloat_t* buf3 = NULL, int* bufint = NULL) { // ******* Summing total Probabilities ************* // ******* Need a constant because of numerical divergence***** - if(pProb[iRefMap].Constoadd < logpro) + bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap); + bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient); + + if(pProbMap.Constoadd < logpro) { - pProb[iRefMap].Total = pProb[iRefMap].Total * exp(-logpro + pProb[iRefMap].Constoadd); - pProb[iRefMap].Constoadd = logpro; + pProbMap.Total = pProbMap.Total * exp(-logpro + pProbMap.Constoadd); + pProbMap.Constoadd = logpro; } //Summing probabilities for each orientation - if(pProb[iRefMap].ConstAngle[iOrient] < logpro) + if(pProbAngle.ConstAngle < logpro) { - pProb[iRefMap].forAngles[iOrient] = pProb[iRefMap].forAngles[iOrient] * exp(-logpro + pProb[iRefMap].ConstAngle[iOrient]); - pProb[iRefMap].ConstAngle[iOrient] = logpro; + pProbAngle.forAngles = pProbAngle.forAngles * exp(-logpro + pProbAngle.ConstAngle); + pProbAngle.ConstAngle = logpro; } if (GPUAlgo != 2) { - pProb[iRefMap].Total += exp(logpro - pProb[iRefMap].Constoadd); - pProb[iRefMap].forAngles[iOrient] += exp(logpro - pProb[iRefMap].ConstAngle[iOrient]); + pProbMap.Total += exp(logpro - pProbMap.Constoadd); + pProbAngle.forAngles += exp(logpro - pProbAngle.ConstAngle); } // ********** Getting parameters that maximize the probability *********** - if(pProb[iRefMap].max_prob < logpro) + if(pProbMap.max_prob < logpro) { - pProb[iRefMap].max_prob = logpro; + pProbMap.max_prob = logpro; if (GPUAlgo == 2) { @@ -47,11 +50,11 @@ __device__ static inline void update_prob(const myfloat_t logpro, const int iRef } else { - pProb[iRefMap].max_prob_cent_x = cent_x; - pProb[iRefMap].max_prob_cent_y = cent_y; + pProbMap.max_prob_cent_x = cent_x; + pProbMap.max_prob_cent_y = cent_y; } - pProb[iRefMap].max_prob_orient = iOrient; - pProb[iRefMap].max_prob_conv = iConv; + pProbMap.max_prob_orient = iOrient; + pProbMap.max_prob_conv = iConv; } } @@ -70,7 +73,7 @@ __device__ static inline myfloat_t calc_logpro(const bioem_param_device& param, return(logpro); } -__device__ static inline void calProb(int iRefMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, myfloat_t value, int disx, int disy, bioem_Probability* pProb, const bioem_param_device& param, const bioem_RefMap& RefMap) +__device__ static inline void calProb(int iRefMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, myfloat_t value, int disx, int disy, bioem_Probability& pProb, const bioem_param_device& param, const bioem_RefMap& RefMap) { // ******************************************************** // *********** Calculates the BioEM probability *********** @@ -78,33 +81,36 @@ __device__ static inline void calProb(int iRefMap, int iOrient, int iConv, myflo const myfloat_t logpro = calc_logpro(param, sumC, sumsquareC, value, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]); - //update_prob<-1>(logpro, iRefMap, iOrient, iConv, disx, disy, pProb); + bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap); + bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient); + //GCC is too stupid to inline properly, so the code is copied here - if(pProb[iRefMap].Constoadd < logpro) + //update_prob<-1>(logpro, iRefMap, iOrient, iConv, disx, disy, pProb); + if(pProbMap.Constoadd < logpro) { - pProb[iRefMap].Total = pProb[iRefMap].Total * exp(-logpro + pProb[iRefMap].Constoadd); - pProb[iRefMap].Constoadd = logpro; + pProbMap.Total = pProbMap.Total * exp(-logpro + pProbMap.Constoadd); + pProbMap.Constoadd = logpro; } - pProb[iRefMap].Total += exp(logpro - pProb[iRefMap].Constoadd); + pProbMap.Total += exp(logpro - pProbMap.Constoadd); - if(pProb[iRefMap].ConstAngle[iOrient] < logpro) + if(pProbAngle.ConstAngle < logpro) { - pProb[iRefMap].forAngles[iOrient] = pProb[iRefMap].forAngles[iOrient] * exp(-logpro + pProb[iRefMap].ConstAngle[iOrient]); - pProb[iRefMap].ConstAngle[iOrient] = logpro; + pProbAngle.forAngles = pProbAngle.forAngles * exp(-logpro + pProbAngle.ConstAngle); + pProbAngle.ConstAngle = logpro; } - pProb[iRefMap].forAngles[iOrient] += exp(logpro - pProb[iRefMap].ConstAngle[iOrient]); + pProbAngle.forAngles += exp(logpro - pProbAngle.ConstAngle); - if(pProb[iRefMap].max_prob < logpro) + if(pProbMap.max_prob < logpro) { - pProb[iRefMap].max_prob = logpro; - pProb[iRefMap].max_prob_cent_x = disx; - pProb[iRefMap].max_prob_cent_y = disy; - pProb[iRefMap].max_prob_orient = iOrient; - pProb[iRefMap].max_prob_conv = iConv; + pProbMap.max_prob = logpro; + pProbMap.max_prob_cent_x = disx; + pProbMap.max_prob_cent_y = disy; + pProbMap.max_prob_orient = iOrient; + pProbMap.max_prob_conv = iConv; } } -__device__ static inline void doRefMapFFT(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* lCC, const myfloat_t sumC, const myfloat_t sumsquareC, bioem_Probability* pProb, const bioem_param_device& param, const bioem_RefMap& RefMap) +__device__ static inline void doRefMapFFT(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* lCC, const myfloat_t sumC, const myfloat_t sumsquareC, bioem_Probability& pProb, const bioem_param_device& param, const bioem_RefMap& RefMap) { for (int cent_x = 0; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter) { @@ -131,7 +137,7 @@ __device__ static inline void doRefMapFFT(const int iRefMap, const int iOrient, } template <int GPUAlgo, class RefT> -__device__ static inline void compareRefMap(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability* pProb, const bioem_param_device& param, const RefT& RefMap, +__device__ static inline void compareRefMap(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability& pProb, const bioem_param_device& param, const RefT& RefMap, const int cent_x, const int cent_y, const int myShift = 0, const int nShifts2 = 0, const int myRef = 0, const bool threadActive = true) { @@ -281,18 +287,22 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient } __syncthreads(); + + bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap); + bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient); + if (bufint[0] == 1 && buf3[1] == logpro && iRefMap < RefMap.ntotRefMap && atomicAdd(&bufint[0], 1) == 1) { - pProb[iRefMap].max_prob_cent_x = cent_x; - pProb[iRefMap].max_prob_cent_y = cent_y; + pProbMap.max_prob_cent_x = cent_x; + pProbMap.max_prob_cent_y = cent_y; } __syncthreads(); if (iRefMap < RefMap.ntotRefMap) { - buf[myThreadIdxX] = exp(logpro - pProb[iRefMap].Constoadd); - buf2[myThreadIdxX] = exp(logpro - pProb[iRefMap].ConstAngle[iOrient]); + buf[myThreadIdxX] = exp(logpro - pProbMap.Constoadd); + buf2[myThreadIdxX] = exp(logpro - pProbAngle.ConstAngle); } __syncthreads(); @@ -372,8 +382,8 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient } if (myShift == 0 && iRefMap < RefMap.ntotRefMap) { - pProb[iRefMap].Total += vbuf[myThreadIdxX]; - pProb[iRefMap].forAngles[iOrient] += vbuf2[myThreadIdxX]; + pProbMap.Total += vbuf[myThreadIdxX]; + pProbAngle.forAngles += vbuf2[myThreadIdxX]; } } } @@ -385,7 +395,7 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient } template <int GPUAlgo, class RefT> -__device__ static inline void compareRefMapShifted(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability* pProb, const bioem_param_device& param, const RefT& RefMap) +__device__ static inline void compareRefMapShifted(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability& pProb, const bioem_param_device& param, const RefT& RefMap) { for (int cent_x = -param.maxDisplaceCenter; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter) { diff --git a/bioem_cuda.cu b/bioem_cuda.cu index 28943dc0b4b2076fd3c727bc6640ff7e53a51dc3..724d3255551a2c43df826b8e443f4d4c4615b2b1 100644 --- a/bioem_cuda.cu +++ b/bioem_cuda.cu @@ -69,7 +69,7 @@ bioem_cuda::~bioem_cuda() deviceExit(); } -__global__ void compareRefMap_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod RefMap, const int cent_x, const int cent_y, const int maxRef) +__global__ void compareRefMap_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability pProb, const bioem_param_device param, const bioem_RefMap_Mod RefMap, const int cent_x, const int cent_y, const int maxRef) { const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX; if (iRefMap < maxRef) @@ -78,7 +78,7 @@ __global__ void compareRefMap_kernel(const int iOrient, const int iConv, const m } } -__global__ void compareRefMapShifted_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod RefMap, const int maxRef) +__global__ void compareRefMapShifted_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability pProb, const bioem_param_device param, const bioem_RefMap_Mod RefMap, const int maxRef) { const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX; if (iRefMap < maxRef) @@ -96,7 +96,7 @@ __global__ void cudaZeroMem(void* ptr, size_t size) for (int i = myid; i < mysize; i += mygrid) myptr[i] = 0; } -__global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap RefMap, const int blockoffset, const int nShifts, const int nShiftBits, const int maxRef) +__global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability pProb, const bioem_param_device param, const bioem_RefMap RefMap, const int blockoffset, const int nShifts, const int nShiftBits, const int maxRef) { const size_t myid = (myBlockIdxX + blockoffset) * myBlockDimX + myThreadIdxX; const int iRefMap = myid >> (nShiftBits << 1); @@ -124,7 +124,7 @@ __global__ void multComplexMap(const mycomplex_t* convmap, const mycomplex_t* re } } -__global__ void cuDoRefMapsFFT(const int iOrient, const int iConv, const myfloat_t* lCC, const myfloat_t sumC, const myfloat_t sumsquareC, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap RefMap, const int maxRef, const int Offset) +__global__ void cuDoRefMapsFFT(const int iOrient, const int iConv, const myfloat_t* lCC, const myfloat_t sumC, const myfloat_t sumsquareC, bioem_Probability pProb, const bioem_param_device param, const bioem_RefMap RefMap, const int maxRef, const int Offset) { const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX + Offset; const myfloat_t* mylCC = &lCC[(myBlockIdxX * myBlockDimX + myThreadIdxX) * param.NumberPixels * param.NumberPixels]; @@ -145,7 +145,7 @@ static inline int ilog2 (int value) static inline int ilog2(int value) {return 31 - __builtin_clz(value);} #endif -int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap) +int bioem_cuda::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap) { if (startMap) { @@ -170,7 +170,7 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* c cout << "Error running CUFFT " << cufftGetErrorStrung(err) << "\n"; exit(1); } - cuDoRefMapsFFT<<<divup(num, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>>(iProjectionOut, iConv, pFFTtmp, sumC, sumsquareC, pProb_device, param.param_device, *gpumap, num, i); + cuDoRefMapsFFT<<<divup(num, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>>(iOrient, iConv, pFFTtmp, sumC, sumsquareC, pProb_device, param.param_device, *gpumap, num, i); } checkCudaErrors(cudaGetLastError()); } @@ -201,7 +201,7 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* c size_t nBlocks = CUDA_BLOCK_COUNT; for (size_t i = 0; i < totalBlocks; i += nBlocks) { - compareRefMapLoopShifts_kernel<<<min(nBlocks, totalBlocks - i), CUDA_THREAD_COUNT, (CUDA_THREAD_COUNT * 2 + CUDA_THREAD_COUNT / (nShifts * nShifts) * 4) * sizeof(myfloat_t), cudaStream >>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *gpumap, i, nShifts, nShiftBits, maxRef); + compareRefMapLoopShifts_kernel<<<min(nBlocks, totalBlocks - i), CUDA_THREAD_COUNT, (CUDA_THREAD_COUNT * 2 + CUDA_THREAD_COUNT / (nShifts * nShifts) * 4) * sizeof(myfloat_t), cudaStream >>> (iOrient, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *gpumap, i, nShifts, nShiftBits, maxRef); } } else if (GPUAlgo == 1) //Split shifts in multiple kernels @@ -210,13 +210,13 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* c { for (int cent_y = -param.param_device.maxDisplaceCenter; cent_y <= param.param_device.maxDisplaceCenter; cent_y = cent_y + param.param_device.GridSpaceCenter) { - compareRefMap_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *pRefMap_device_Mod, cent_x, cent_y, maxRef); + compareRefMap_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iOrient, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *pRefMap_device_Mod, cent_x, cent_y, maxRef); } } } else if (GPUAlgo == 0) //All shifts in one kernel { - compareRefMapShifted_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *pRefMap_device_Mod, maxRef); + compareRefMapShifted_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iOrient, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *pRefMap_device_Mod, maxRef); } else { @@ -226,7 +226,7 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* c } if (GPUWorkload < 100) { - bioem::compareRefMaps(iProjectionOut, iConv, conv_map, localmultFFT, sumC, sumsquareC, maxRef); + bioem::compareRefMaps(iOrient, iConv, conv_map, localmultFFT, sumC, sumsquareC, maxRef); } if (GPUAsync) { @@ -273,7 +273,9 @@ int bioem_cuda::deviceInit() gpumap->sumsquare_RefMap = sumsquare; checkCudaErrors(cudaStreamCreate(&cudaStream)); - checkCudaErrors(cudaMalloc(&pProb_device, sizeof(bioem_Probability) * RefMap.ntotRefMap)); + pProb_device = pProb; + checkCudaErrors(cudaMalloc(&pProb_device.ptr, pProb_device.get_size(RefMap.ntotRefMap, param.nTotGridAngles))); + pProb_device.set_pointers(); for (int i = 0; i < 2; i++) { checkCudaErrors(cudaEventCreate(&cudaEvent[i])); @@ -298,7 +300,7 @@ int bioem_cuda::deviceExit() if (deviceInitialized == 0) return(0); cudaStreamDestroy(cudaStream); - cudaFree(pProb_device); + cudaFree(pProb_device.ptr); cudaFree(sum); cudaFree(sumsquare); for (int i = 0; i < 2; i++) @@ -332,7 +334,7 @@ int bioem_cuda::deviceStartRun() { maxRef = GPUWorkload >= 100 ? RefMap.ntotRefMap : ((size_t) RefMap.ntotRefMap * (size_t) GPUWorkload / 100); - cudaMemcpy(pProb_device, pProb, sizeof(bioem_Probability) * maxRef, cudaMemcpyHostToDevice); + cudaMemcpy(pProb_device.ptr, pProb.ptr, pProb.get_size(RefMap.ntotRefMap, param.nTotGridAngles), cudaMemcpyHostToDevice); if (FFTAlgo) { @@ -362,7 +364,7 @@ int bioem_cuda::deviceStartRun() int bioem_cuda::deviceFinishRun() { if (GPUAsync) cudaStreamSynchronize(cudaStream); - cudaMemcpy(pProb, pProb_device, sizeof(bioem_Probability) * maxRef, cudaMemcpyDeviceToHost); + cudaMemcpy(pProb.ptr, pProb_device.ptr, pProb.get_size(RefMap.ntotRefMap, param.nTotGridAngles), cudaMemcpyDeviceToHost); if (FFTAlgo) { diff --git a/include/bioem.h b/include/bioem.h index c71c3e56dafc3cab84d4f18df9abfa8993e3cf52..0eab8a05118150dc04822164f42eaed633d293f9 100644 --- a/include/bioem.h +++ b/include/bioem.h @@ -12,24 +12,28 @@ class bioem { + friend class bioem_RefMap; + public: bioem(); virtual ~bioem(); int configure(int ac, char* av[]); + void cleanup(); //Cleanup everything happening during configure + int precalculate(); // Is it better to pass directly the input File names? int dopreCalCrossCorrelation(int iRefMap, int iRefMapLocal); int run(); int doProjections(int iMap); int createConvolutedProjectionMap(int iOreint, int iMap, mycomplex_t* lproj, myfloat_t* Mapconv, mycomplex_t* localmultFFT, myfloat_t& sumC, myfloat_t& sumsquareC); - virtual int compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap = 0); + virtual int compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap = 0); int createProjection(int iMap, mycomplex_t* map); int calcross_cor(myfloat_t* localmap, myfloat_t& sum, myfloat_t& sumsquare); void calculateCCFFT(int iMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, mycomplex_t* localConvFFT, mycomplex_t* localCCT, myfloat_t* lCC); - bioem_Probability* pProb; + bioem_Probability pProb; protected: virtual int deviceInit(); diff --git a/include/bioem_cuda_internal.h b/include/bioem_cuda_internal.h index 204df1bf2c666772dca6c59b0730f298b84a510e..d87e7c63d37a0fdd974349f05aeaaa6a37b66a77 100644 --- a/include/bioem_cuda_internal.h +++ b/include/bioem_cuda_internal.h @@ -17,7 +17,7 @@ public: bioem_cuda(); virtual ~bioem_cuda(); - virtual int compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap = 0); + virtual int compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap = 0); protected: virtual int deviceInit(); @@ -31,7 +31,7 @@ protected: cudaEvent_t cudaEvent[2]; bioem_RefMap_Mod* pRefMap_device_Mod; bioem_RefMap* gpumap; - bioem_Probability* pProb_device; + bioem_Probability pProb_device; myfloat_t* pConvMap_device[2]; mycomplex_t* pRefMapsFFT; diff --git a/include/defs.h b/include/defs.h index 9e2b68e7be3da6a05fa060a9fbf534ead0a9c750..83ba1bfec722bdecd284f83cea6fa185890efcce 100644 --- a/include/defs.h +++ b/include/defs.h @@ -40,7 +40,6 @@ typedef myfloat_t mycomplex_t[2]; #define BIOEM_FLOAT_3_PHYSICAL_SIZE 3 //Possible set to 4 for GPU #define BIOEM_MODEL_SIZE 120000 -#define MAX_ORIENT 20000 struct myfloat3_t { diff --git a/include/map.h b/include/map.h index 25b0da2a032b75f759cfb80c3b7a6421239d57e1..01f3a5279f4fed518c17146aef38be91155c2727 100644 --- a/include/map.h +++ b/include/map.h @@ -6,6 +6,7 @@ #include <math.h> class bioem_param; +class bioem; class bioem_RefMap { @@ -30,6 +31,7 @@ public: RefMapsFFT = NULL; } int readRefMaps(bioem_param& param); + int precalculate(bioem_param& param, bioem& bio); int PreCalculateMapsFFT(bioem_param& param); mycomplex_t* RefMapsFFT; @@ -68,15 +70,52 @@ public: } }; -class bioem_Probability +class bioem_Probability_map { public: - myfloat_t forAngles[MAX_ORIENT]; myfloat_t Total; myfloat_t Constoadd; - myfloat_t ConstAngle[MAX_ORIENT]; myfloat_t max_prob; int max_prob_cent_x, max_prob_cent_y, max_prob_orient, max_prob_conv; }; +class bioem_Probability_angle +{ +public: + myfloat_t forAngles; + myfloat_t ConstAngle; +}; + +class bioem_Probability +{ +public: + int nMaps; + int nAngles; + __device__ __host__ bioem_Probability_map& getProbMap(int map) {return(ptr_map[map]);} + __device__ __host__ bioem_Probability_angle& getProbAngle(int map, int angle) {return(ptr_angle[angle * nMaps + map]);} + + void* ptr; + bioem_Probability_map* ptr_map; + bioem_Probability_angle* ptr_angle; + + size_t get_size(size_t maps, size_t angles) + { + return(maps * (angles * sizeof(bioem_Probability_angle) + sizeof(bioem_Probability_map))); + } + + void init(size_t maps, size_t angles) + { + nMaps = maps; + nAngles = angles; + ptr = mallocchk(get_size(maps, angles)); + set_pointers(); + } + + void set_pointers() + { + ptr_map = (bioem_Probability_map*) ptr; + ptr_angle = (bioem_Probability_angle*) (&ptr_map[nMaps]); + } +}; + #endif diff --git a/main.cpp b/main.cpp index b589405999d92132ebb27bacc24b1427b18c02f9..0aa776b31702eb41fd19f065b74785b5917ab544 100644 --- a/main.cpp +++ b/main.cpp @@ -55,6 +55,7 @@ int main(int argc, char* argv[]) // ************************************ End ********************************** printf ("The code ran for %f seconds.\n", timer.GetElapsedTime()); + bio->cleanup(); delete bio; return(0); diff --git a/map.cpp b/map.cpp index 7848d21d39d5f5ba76cde5c5d46feb1e9dd2ee55..20e42cee9807a9ff0dcb488aac8b09c35f429f3e 100644 --- a/map.cpp +++ b/map.cpp @@ -8,6 +8,7 @@ #include "map.h" #include "param.h" +#include "bioem.h" using namespace std; @@ -178,3 +179,31 @@ int bioem_RefMap::PreCalculateMapsFFT(bioem_param& param) return(0); } + +int bioem_RefMap::precalculate(bioem_param& param, bioem& bio) +{ + // ************************************************************************************** + // *******************************Precalculating Routine for Maps************************ + // ************************************************************************************** + + myfloat_t sum, sumsquare; + + //Precalculating cross-correlations of maps + for (int iRefMap = 0; iRefMap < ntotRefMap ; iRefMap++) + { + bio.calcross_cor(getmap(iRefMap), sum, sumsquare); + //Storing Crosscorrelations in Map class + sum_RefMap[iRefMap] = sum; + sumsquare_RefMap[iRefMap] = sumsquare; + } + + // Precalculating Maps in Fourier space + if (bio.FFTAlgo) + { + PreCalculateMapsFFT(param); + free(maps); + maps = NULL; + } + + return(0); +} diff --git a/param.cpp b/param.cpp index e17de97a1108f406e00a01877dcf943356465d23..c25352096e10523439f3a874adc4f4737709907d 100644 --- a/param.cpp +++ b/param.cpp @@ -246,6 +246,7 @@ int bioem_param::CalculateGridsParam() //TO DO FOR QUATERNIONS cos_grid_beta = 2.f / (myfloat_t) angleGridPointsBeta; // Euler Angle Array + delete[] angles; angles = new myfloat3_t[angleGridPointsAlpha * angleGridPointsBeta * angleGridPointsAlpha]; for (int ialpha = 0; ialpha < angleGridPointsAlpha; ialpha ++) {