Commit 73602d4b authored by David Rohr's avatar David Rohr

merge compareRefMaps functions, create calc_logpro and update_prob function to...

merge compareRefMaps functions, create calc_logpro and update_prob function to remvoe code duplication
parent 8f4d17a7
......@@ -269,14 +269,7 @@ int bioem::run()
/***************************************************************************************/
/*** Comparing each calculated convoluted map with all experimental maps ***/
timer.ResetStart();
if (FFTAlgo == 0)
{
compareRefMaps(iProjectionOut, iConv, conv_map);
}
else
{
compareRefMaps2(iProjectionOut, iConv,conv_mapFFT,sumCONV,sumsquareCONV);
}
compareRefMaps(iProjectionOut, iConv, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
const double compTime = timer.GetCurrentElapsedTime();
const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1;
......@@ -363,39 +356,39 @@ int bioem::run()
return(0);
}
int bioem::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, const int startMap)
int bioem::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap)
{
#pragma omp parallel for
for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++)
if (FFTAlgo)
{
compareRefMapShifted<-1>(iRefMap,iProjectionOut,iConv,conv_map, pProb, param.param_device, RefMap);
}
return(0);
}
int bioem::compareRefMaps2(int iOrient, int iConv, mycomplex_t* localConvFFT,myfloat_t sumC,myfloat_t sumsquareC)
{
#pragma omp parallel
{
mycomplex_t *localCCT;
myfloat_t *lCC;
localCCT= (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) *param.param_device.NumberPixels*param.param_device.NumberFFTPixels1D);
lCC= (myfloat_t *) myfftw_malloc(sizeof(myfloat_t) *param.param_device.NumberPixels*param.param_device.NumberPixels);
const int num_threads = omp_get_num_threads();
const int thread_id = omp_get_thread_num();
const int mapsPerThread = (RefMap.ntotRefMap - startMap + num_threads - 1) / num_threads;
const int iStart = startMap + thread_id * mapsPerThread;
const int iEnd = min(RefMap.ntotRefMap, startMap + (thread_id + 1) * mapsPerThread);
for (int iRefMap = iStart; iRefMap < iEnd; iRefMap ++)
{
calculateCCFFT(iRefMap,iProjectionOut, iConv, sumC,sumsquareC, localmultFFT, localCCT,lCC);
}
myfftw_free(localCCT);
myfftw_free(lCC);
}
}
else
{
mycomplex_t *localCCT;
myfloat_t *lCC;
localCCT= (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) *param.param_device.NumberPixels*param.param_device.NumberFFTPixels1D);
lCC= (myfloat_t *) myfftw_malloc(sizeof(myfloat_t) *param.param_device.NumberPixels*param.param_device.NumberPixels);
const int num_threads = omp_get_num_threads();
const int thread_id = omp_get_thread_num();
const int mapsPerThread = (RefMap.ntotRefMap + num_threads - 1) / num_threads;
const int iStart = thread_id * mapsPerThread;
const int iEnd = min(RefMap.ntotRefMap, (thread_id + 1) * mapsPerThread);
for (int iRefMap = iStart; iRefMap < iEnd; iRefMap ++)
#pragma omp parallel for
for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++)
{
calculateCCFFT(iRefMap,iOrient, iConv, sumC,sumsquareC, localConvFFT, localCCT,lCC);
compareRefMapShifted<-1>(iRefMap,iProjectionOut,iConv,conv_map, pProb, param.param_device, RefMap);
}
myfftw_free(localCCT);
myfftw_free(lCC);
}
return(0);
}
......@@ -437,51 +430,40 @@ inline int bioem::calculateCCFFT(int iRefMap, int iOrient, int iConv, myfloat_t
return (0);
}
inline int bioem::calProb(int iRefMap,int iOrient, int iConv,myfloat_t sumC,myfloat_t sumsquareC, int value, int disx, int disy)
inline int bioem::calProb(int iRefMap,int iOrient, int iConv,myfloat_t sumC,myfloat_t sumsquareC, float value, int disx, int disy)
{
/********************************************************/
/*********** Calculates the BioEM probability ***********/
/********************************************************/
const myfloat_t ForLogProb = (sumsquareC * param.param_device.Ntotpi - sumC * sumC);
const myfloat_t logpro = calc_logpro(param.param_device, sumC, sumsquareC, value, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]);
// Products of different cross-correlations (first element in formula)
const myfloat_t firstele = param.param_device.Ntotpi * (RefMap.sumsquare_RefMap[iRefMap] * sumsquareC - value * value) +
2 * RefMap.sum_RefMap[iRefMap] * sumC * value - RefMap.sumsquare_RefMap[iRefMap] * sumC * sumC - RefMap.sum_RefMap[iRefMap] * RefMap.sum_RefMap[iRefMap] * sumsquareC;
//update_prob<-1>(logpro, iRefMap, iOrient, iConv, disx, disy, pProb);
//GCC is too stupid to inline properly, so the code is copied here
if(pProb[iRefMap].Constoadd < logpro)
{
pProb[iRefMap].Total = pProb[iRefMap].Total * exp(-logpro + pProb[iRefMap].Constoadd);
pProb[iRefMap].Constoadd = logpro;
}
pProb[iRefMap].Total += exp(logpro - pProb[iRefMap].Constoadd);
//******* Calculating log of Prob*********/
// As in fortran code: logpro=(3-Ntotpi)*0.5*log(firstele/pConvMap[iOrient].ForLogProbfromConv[iConv])+(Ntotpi*0.5-2)*log(Ntotpi-2)-0.5*log(pConvMap[iOrient].ForLogProbfromConv[iConv])+0.5*log(PI)+(1-Ntotpi*0.5)*(log(2*PI)+1);
const myfloat_t logpro = (3 - param.param_device.Ntotpi) * 0.5 * log(firstele) + (param.param_device.Ntotpi * 0.5 - 2) * log((param.param_device.Ntotpi - 2) * ForLogProb);
// cout << n <<" " << firstele << " "<< logpro << "\n";
{
/******* Summing total Probabilities *************/
/******* Need a constant because of numerical divergence*****/
if(pProb[iRefMap].Constoadd < logpro)
{
pProb[iRefMap].Total = pProb[iRefMap].Total * exp(-logpro + pProb[iRefMap].Constoadd);
pProb[iRefMap].Constoadd = logpro;
}
pProb[iRefMap].Total += exp(logpro - pProb[iRefMap].Constoadd);
if(pProb[iRefMap].ConstAngle[iOrient] < logpro)
{
pProb[iRefMap].forAngles[iOrient] = pProb[iRefMap].forAngles[iOrient] * exp(-logpro + pProb[iRefMap].ConstAngle[iOrient]);
pProb[iRefMap].ConstAngle[iOrient] = logpro;
}
pProb[iRefMap].forAngles[iOrient] += exp(logpro - pProb[iRefMap].ConstAngle[iOrient]);
//Summing probabilities for each orientation
if(pProb[iRefMap].ConstAngle[iOrient] < logpro)
{
pProb[iRefMap].forAngles[iOrient] = pProb[iRefMap].forAngles[iOrient] * exp(-logpro + pProb[iRefMap].ConstAngle[iOrient]);
pProb[iRefMap].ConstAngle[iOrient] = logpro;
}
pProb[iRefMap].forAngles[iOrient] += exp(logpro - pProb[iRefMap].ConstAngle[iOrient]);
if(pProb[iRefMap].max_prob < logpro)
{
pProb[iRefMap].max_prob = logpro;
pProb[iRefMap].max_prob_cent_x = disx;
pProb[iRefMap].max_prob_cent_y = disy;
pProb[iRefMap].max_prob_orient = iOrient;
pProb[iRefMap].max_prob_conv = iConv;
}
/********** Getting parameters that maximize the probability ***********/
if(pProb[iRefMap].max_prob < logpro)
{
pProb[iRefMap].max_prob = logpro;
pProb[iRefMap].max_prob_cent_x = disx;
pProb[iRefMap].max_prob_cent_y = disy;
pProb[iRefMap].max_prob_orient = iOrient;
pProb[iRefMap].max_prob_conv = iConv;
}
}
return (0);
}
......
......@@ -11,6 +11,65 @@
#include <smmintrin.h>
#endif
template <int GPUAlgo>
__device__ static inline void update_prob(const myfloat_t logpro, const int iRefMap, const int iOrient, const int iConv, const int cent_x, const int cent_y, bioem_Probability* pProb, myfloat_t* buf3 = NULL, int* bufint = NULL)
{
/******* Summing total Probabilities *************/
/******* Need a constant because of numerical divergence*****/
if(pProb[iRefMap].Constoadd < logpro)
{
pProb[iRefMap].Total = pProb[iRefMap].Total * exp(-logpro + pProb[iRefMap].Constoadd);
pProb[iRefMap].Constoadd = logpro;
}
//Summing probabilities for each orientation
if(pProb[iRefMap].ConstAngle[iOrient] < logpro)
{
pProb[iRefMap].forAngles[iOrient] = pProb[iRefMap].forAngles[iOrient] * exp(-logpro + pProb[iRefMap].ConstAngle[iOrient]);
pProb[iRefMap].ConstAngle[iOrient] = logpro;
}
if (GPUAlgo != 2)
{
pProb[iRefMap].Total += exp(logpro - pProb[iRefMap].Constoadd);
pProb[iRefMap].forAngles[iOrient] += exp(logpro - pProb[iRefMap].ConstAngle[iOrient]);
}
/********** Getting parameters that maximize the probability ***********/
if(pProb[iRefMap].max_prob < logpro)
{
pProb[iRefMap].max_prob = logpro;
if (GPUAlgo == 2)
{
bufint[0] = 1;
buf3[1] = logpro;
}
else
{
pProb[iRefMap].max_prob_cent_x = cent_x;
pProb[iRefMap].max_prob_cent_y = cent_y;
}
pProb[iRefMap].max_prob_orient = iOrient;
pProb[iRefMap].max_prob_conv = iConv;
}
}
__device__ static inline myfloat_t calc_logpro(const bioem_param_device& param, const myfloat_t sum, const myfloat_t sumsquare, const myfloat_t crossproMapConv, const myfloat_t sumref, const myfloat_t sumsquareref)
{
// Related to Reference calculated Projection
const myfloat_t ForLogProb = (sumsquare * param.Ntotpi - sum * sum);
// Products of different cross-correlations (first element in formula)
const myfloat_t firstele = param.Ntotpi * (sumsquareref * sumsquare-crossproMapConv * crossproMapConv) +
2 * sumref * sum * crossproMapConv - sumsquareref * sum * sum - sumref * sumref * sumsquare;
//******* Calculating log of Prob*********/
// As in fortran code: logpro=(3-Ntotpi)*0.5*log(firstele/pConvMap[iOrient].ForLogProbfromConv[iConv])+(Ntotpi*0.5-2)*log(Ntotpi-2)-0.5*log(pConvMap[iOrient].ForLogProbfromConv[iConv])+0.5*log(PI)+(1-Ntotpi*0.5)*(log(2*PI)+1);
const myfloat_t logpro = (3 - param.Ntotpi) * 0.5 * log(firstele) + (param.Ntotpi * 0.5 - 2) * log((param.Ntotpi - 2) * ForLogProb);
return(logpro);
}
template <int GPUAlgo, class RefT>
__device__ static inline void compareRefMap(const int iRefMap, const int iOrient, const int iConv, const bioem_map& Mapconv, bioem_Probability* pProb, const bioem_param_device& param, const RefT& RefMap,
const int cent_x, const int cent_y, const int myShift = 0, const int nShifts2 = 0, const int myRef = 0, const bool threadActive = true)
......@@ -99,16 +158,7 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient
#endif
/********** Calculating elements in BioEM Probability formula ********/
// Related to Reference calculated Projection
const myfloat_t ForLogProb = (sumsquare * param.Ntotpi - sum * sum);
// Products of different cross-correlations (first element in formula)
const myfloat_t firstele = param.Ntotpi * (RefMap.sumsquare_RefMap[iRefMap] * sumsquare-crossproMapConv * crossproMapConv) +
2 * RefMap.sum_RefMap[iRefMap] * sum * crossproMapConv - RefMap.sumsquare_RefMap[iRefMap] * sum * sum - RefMap.sum_RefMap[iRefMap] * RefMap.sum_RefMap[iRefMap] * sumsquare;
//******* Calculating log of Prob*********/
// As in fortran code: logpro=(3-Ntotpi)*0.5*log(firstele/pConvMap[iOrient].ForLogProbfromConv[iConv])+(Ntotpi*0.5-2)*log(Ntotpi-2)-0.5*log(pConvMap[iOrient].ForLogProbfromConv[iConv])+0.5*log(PI)+(1-Ntotpi*0.5)*(log(2*PI)+1);
logpro = (3 - param.Ntotpi) * 0.5 * log(firstele) + (param.Ntotpi * 0.5 - 2) * log((param.Ntotpi - 2) * ForLogProb);
logpro = calc_logpro(param, sum, sumsquare, crossproMapConv, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]);
}
else
{
......@@ -166,24 +216,7 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient
if (myShift == 0 && iRefMap < RefMap.ntotRefMap)
{
const myfloat_t logpro_max = vbuf[myThreadIdxX];
if(pProb[iRefMap].Constoadd < logpro_max)
{
pProb[iRefMap].Total = pProb[iRefMap].Total * exp(-logpro_max + pProb[iRefMap].Constoadd);
pProb[iRefMap].Constoadd = logpro_max;
}
if(pProb[iRefMap].ConstAngle[iOrient] < logpro_max)
{
pProb[iRefMap].forAngles[iOrient] = pProb[iRefMap].forAngles[iOrient] * exp(-logpro_max + pProb[iRefMap].ConstAngle[iOrient]);
pProb[iRefMap].ConstAngle[iOrient] = logpro_max;
}
if(pProb[iRefMap].max_prob < logpro_max)
{
pProb[iRefMap].max_prob = logpro_max;
pProb[iRefMap].max_prob_orient = iOrient;
pProb[iRefMap].max_prob_conv = iConv;
bufint[0] = 1;
buf3[1] = logpro_max;
}
update_prob<GPUAlgo>(logpro_max, iRefMap, iOrient, iConv, -1, -1, pProb, buf3, bufint);
}
}
......@@ -289,32 +322,7 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient
/***** Summing & Storing total/Orientation Probabilites for each map ************/
{
/******* Summing total Probabilities *************/
/******* Need a constant because of numerical divergence*****/
if(pProb[iRefMap].Constoadd < logpro)
{
pProb[iRefMap].Total = pProb[iRefMap].Total * exp(-logpro + pProb[iRefMap].Constoadd);
pProb[iRefMap].Constoadd = logpro;
}
pProb[iRefMap].Total += exp(logpro - pProb[iRefMap].Constoadd);
//Summing probabilities for each orientation
if(pProb[iRefMap].ConstAngle[iOrient] < logpro)
{
pProb[iRefMap].forAngles[iOrient] = pProb[iRefMap].forAngles[iOrient] * exp(-logpro + pProb[iRefMap].ConstAngle[iOrient]);
pProb[iRefMap].ConstAngle[iOrient] = logpro;
}
pProb[iRefMap].forAngles[iOrient] += exp(logpro - pProb[iRefMap].ConstAngle[iOrient]);
/********** Getting parameters that maximize the probability ***********/
if(pProb[iRefMap].max_prob < logpro)
{
pProb[iRefMap].max_prob = logpro;
pProb[iRefMap].max_prob_cent_x = cent_x;
pProb[iRefMap].max_prob_cent_y = cent_y;
pProb[iRefMap].max_prob_orient = iOrient;
pProb[iRefMap].max_prob_conv = iConv;
}
update_prob<-1>(logpro, iRefMap, iOrient, iConv, cent_x, cent_y, pProb);
}
}
......
......@@ -62,9 +62,9 @@ __global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iCon
const int myShift = myid & (nShifts * nShifts - 1);
const int cent_x = myShiftIdx * param.GridSpaceCenter - param.maxDisplaceCenter;
const int cent_y = myShiftIdy * param.GridSpaceCenter - param.maxDisplaceCenter;
const bool threadActive = myShiftIdx < nShifts && myShiftIdy < nShifts && iRefMap < maxRef;
compareRefMap<2>(iRefMap,iOrient,iConv,*pMap, pProb, param, *RefMap, cent_x, cent_y, myShift, nShifts * nShifts, myRef, threadActive);
}
......@@ -81,7 +81,7 @@ static inline int ilog2 (int value)
static inline int ilog2(int value) {return 31 - __builtin_clz(value);}
#endif
int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, const int startMap)
int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap)
{
if (startMap)
{
......@@ -94,7 +94,7 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& c
checkCudaErrors(cudaEventSynchronize(cudaEvent[iConv & 1]));
}
checkCudaErrors(cudaMemcpyAsync(pConvMap_device[iConv & 1], &conv_map, sizeof(bioem_map), cudaMemcpyHostToDevice, cudaStream));
if (GPUAlgo == 2) //Loop over shifts
{
const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1;
......@@ -129,7 +129,7 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& c
{
compareRefMap_kernel <<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device_Mod, cent_x, cent_y, maxRef);
}
}
}
}
else if (GPUAlgo == 0) //All shifts in one kernel
{
......@@ -142,7 +142,7 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& c
}
if (GPUWorkload < 100)
{
bioem::compareRefMaps(iProjectionOut, iConv, conv_map, maxRef);
bioem::compareRefMaps(iProjectionOut, iConv, conv_map, localmultFFT, sumC, sumsquareC, maxRef);
}
if (GPUAsync)
{
......@@ -158,7 +158,7 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& c
int bioem_cuda::deviceInit()
{
deviceExit();
checkCudaErrors(cudaStreamCreate(&cudaStream));
checkCudaErrors(cudaMalloc(&pRefMap_device, sizeof(bioem_RefMap)));
checkCudaErrors(cudaMalloc(&pProb_device, sizeof(bioem_Probability) * RefMap.ntotRefMap));
......@@ -168,7 +168,7 @@ int bioem_cuda::deviceInit()
checkCudaErrors(cudaMalloc(&pConvMap_device[i], sizeof(bioem_map)));
}
pRefMap_device_Mod = (bioem_RefMap_Mod*) pRefMap_device;
if (GPUAlgo == 0 || GPUAlgo == 1)
{
bioem_RefMap_Mod* RefMapGPU = new bioem_RefMap_Mod(RefMap);
......@@ -186,7 +186,7 @@ int bioem_cuda::deviceInit()
int bioem_cuda::deviceExit()
{
if (deviceInitialized == 0) return(0);
cudaStreamDestroy(cudaStream);
cudaFree(pRefMap_device);
cudaFree(pProb_device);
......@@ -196,7 +196,7 @@ int bioem_cuda::deviceExit()
cudaFree(pConvMap_device);
}
cudaThreadExit();
deviceInitialized = 0;
return(0);
}
......@@ -204,7 +204,7 @@ int bioem_cuda::deviceExit()
int bioem_cuda::deviceStartRun()
{
maxRef = GPUWorkload >= 100 ? RefMap.ntotRefMap : ((size_t) RefMap.ntotRefMap * (size_t) GPUWorkload / 100);
cudaMemcpy(pProb_device, pProb, sizeof(bioem_Probability) * maxRef, cudaMemcpyHostToDevice);
return(0);
}
......@@ -213,7 +213,7 @@ int bioem_cuda::deviceFinishRun()
{
if (GPUAsync) cudaStreamSynchronize(cudaStream);
cudaMemcpy(pProb, pProb_device, sizeof(bioem_Probability) * maxRef, cudaMemcpyDeviceToHost);
return(0);
}
......
......@@ -23,14 +23,12 @@ public:
int doProjections(int iMap);
int createConvolutedProjectionMap(int iOreint,int iMap, mycomplex_t* lproj,bioem_map& Mapconv,mycomplex_t* localmultFFT,myfloat_t& sumC,myfloat_t& sumsquareC);
virtual int compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, const int startMap = 0);
int compareRefMaps2(int iProjectionOut, int iConv,mycomplex_t* localmultFFT,myfloat_t sumC,myfloat_t sumsquareC);
virtual int compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap = 0);
int createProjection(int iMap, mycomplex_t* map);
int calcross_cor(bioem_map& localmap,myfloat_t& sum,myfloat_t& sumsquare);
int calProb(int iRefMap,int iOrient, int iConv,myfloat_t sumC,myfloat_t sumsquareC, int value, int disx, int disy);
int calculateCCFFT(int iMap, int iOrient, int iConv, myfloat_t sumC,myfloat_t sumsquareC, mycomplex_t* localConvFFT,mycomplex_t* localCCT,myfloat_t* lCC);
int calProb(int iRefMap,int iOrient, int iConv,myfloat_t sumC,myfloat_t sumsquareC, float value, int disx, int disy);
int calculateCCFFT(int iMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, mycomplex_t* localConvFFT,mycomplex_t* localCCT,myfloat_t* lCC);
bioem_Probability* pProb;
......
......@@ -16,7 +16,7 @@ public:
bioem_cuda();
virtual ~bioem_cuda();
virtual int compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, const int startMap = 0);
virtual int compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap = 0);
protected:
virtual int deviceInit();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment