Commit a65e74a6 authored by David Rohr's avatar David Rohr

Fix allocation of FFTW scratch space, move deallocation of memory allocated in...

Fix allocation of FFTW scratch space, move deallocation of memory allocated in configuration to cleanup function, get rid of MAX_ORIENT constant
parent 6b401955
......@@ -18,10 +18,8 @@
#include "model.h"
#include "map.h"
#include "bioem_algorithm.h"
using namespace boost;
namespace po = boost::program_options;
......@@ -170,11 +168,20 @@ int bioem::configure(int ac, char* av[])
param.nTotCTFs = atoi(getenv("BIOEM_DEBUG_BREAK"));
}
pProb.init(RefMap.ntotRefMap, param.nTotGridAngles);
deviceInit();
return(0);
}
void bioem::cleanup()
{
//Deleting allocated pointers
free(pProb.ptr);
RefMap.freePointers();
}
int bioem::precalculate()
{
// **************************************************************************************
......@@ -184,32 +191,15 @@ int bioem::precalculate()
// Generating Grids of orientations
param.CalculateGridsParam();
myfloat_t sum, sumsquare;
//Precalculating cross-correlations of maps
for (int iRefMap = 0; iRefMap < RefMap.ntotRefMap ; iRefMap++)
{
calcross_cor(RefMap.getmap(iRefMap), sum, sumsquare);
//Storing Crosscorrelations in Map class
RefMap.sum_RefMap[iRefMap] = sum;
RefMap.sumsquare_RefMap[iRefMap] = sumsquare;
}
// Precalculating CTF Kernels stored in class Param
param.CalculateRefCTF();
// Precalculating Maps in Fourier space
if (FFTAlgo)
{
RefMap.PreCalculateMapsFFT(param);
free(RefMap.maps);
RefMap.maps = NULL;
}
//Precalculate Maps
RefMap.precalculate(param, *this);
return(0);
}
int bioem::run()
{
// **************************************************************************************
......@@ -218,23 +208,36 @@ int bioem::run()
// **** If we want to control the number of threads -> omp_set_num_threads(XX); ******
// ****************** Declarying class of Probability Pointer *************************
pProb = new bioem_Probability[RefMap.ntotRefMap];
printf("\tInitializing\n");
printf("\tInitializing Probabilities\n");
// Inizialzing Probabilites to zero and constant to -Infinity
for (int iRefMap = 0; iRefMap < RefMap.ntotRefMap; iRefMap ++)
{
pProb[iRefMap].Total = 0.0;
pProb[iRefMap].Constoadd = -9999999;
pProb[iRefMap].max_prob = -9999999;
bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
pProbMap.Total = 0.0;
pProbMap.Constoadd = -9999999;
pProbMap.max_prob = -9999999;
for (int iOrient = 0; iOrient < param.nTotGridAngles; iOrient ++)
{
pProb[iRefMap].forAngles[iOrient] = 0.0;
pProb[iRefMap].ConstAngle[iOrient] = -99999999;
bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);
pProbAngle.forAngles = 0.0;
pProbAngle.ConstAngle = -99999999;
}
}
// **************************************************************************************
deviceStartRun();
{
const int count = omp_get_max_threads();
localCCT = new mycomplex_t*[count];
lCC = new myfloat_t*[count];
for (int i = 0;i < count;i++)
{
localCCT[i] = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
lCC[i] = (myfloat_t *) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels);
}
}
// ******************************** MAIN CYCLE ******************************************
......@@ -252,29 +255,29 @@ int bioem::run()
printf("\tMain Loop (GridAngles %d, CTFs %d, RefMaps %d, Shifts (%d/%d)²), Pixels %d²\n", param.nTotGridAngles, param.nTotCTFs, RefMap.ntotRefMap, 2 * param.param_device.maxDisplaceCenter + param.param_device.GridSpaceCenter, param.param_device.GridSpaceCenter, param.param_device.NumberPixels);
printf("\tInner Loop Count (%d %d %d) %lld\n", param.param_device.maxDisplaceCenter, param.param_device.GridSpaceCenter, param.param_device.NumberPixels, (long long int) (param.param_device.NumberPixels * param.param_device.NumberPixels * (2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1) * (2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1)));
for (int iProjectionOut = 0; iProjectionOut < param.nTotGridAngles; iProjectionOut++)
for (int iOrient = 0; iOrient < param.nTotGridAngles; iOrient++)
{
// ***************************************************************************************
// ***** Creating Projection for given orientation and transforming to Fourier space *****
timer.ResetStart();
createProjection(iProjectionOut, proj_mapFFT);
printf("Time Projection %d: %f\n", iProjectionOut, timer.GetCurrentElapsedTime());
createProjection(iOrient, proj_mapFFT);
printf("Time Projection %d: %f\n", iOrient, timer.GetCurrentElapsedTime());
// ***************************************************************************************
// ***** **** Internal Loop over convolutions **** *****
for (int iConv = 0; iConv < param.nTotCTFs; iConv++)
{
printf("\t\tConvolution %d %d\n", iProjectionOut, iConv);
printf("\t\tConvolution %d %d\n", iOrient, iConv);
// *** Calculating convolutions of projection map and crosscorrelations ***
timer.ResetStart();
createConvolutedProjectionMap(iProjectionOut, iConv, proj_mapFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
printf("Time Convolution %d %d: %f\n", iProjectionOut, iConv, timer.GetCurrentElapsedTime());
createConvolutedProjectionMap(iOrient, iConv, proj_mapFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
printf("Time Convolution %d %d: %f\n", iOrient, iConv, timer.GetCurrentElapsedTime());
// ***************************************************************************************
// *** Comparing each calculated convoluted map with all experimental maps ***
timer.ResetStart();
compareRefMaps(iProjectionOut, iConv, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
compareRefMaps(iOrient, iConv, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
const double compTime = timer.GetCurrentElapsedTime();
const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1;
......@@ -284,7 +287,7 @@ int bioem::run()
(((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 2. + 8.) * (double) sizeof(myfloat_t) / compTime;
const double nGBs2 = (double) RefMap.ntotRefMap * ((double) param.param_device.NumberPixels * (double) param.param_device.NumberPixels + 8.) * (double) sizeof(myfloat_t) / compTime;
printf("Time Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s)\n", iProjectionOut, iConv, compTime, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000.);
printf("Time Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s)\n", iOrient, iConv, compTime, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000.);
}
}
//deallocating fftw_complex vector
......@@ -293,6 +296,16 @@ int bioem::run()
delete[] conv_map;
deviceFinishRun();
{
const int count = omp_get_max_threads();
for (int i = 0;i < count;i++)
{
myfftw_free(localCCT[i]);
myfftw_free(lCC[i]);
}
delete[] localCCT;
delete[] lCC;
}
// ************* Writing Out Probabilities ***************
......@@ -305,34 +318,36 @@ int bioem::run()
ofstream outputProbFile;
outputProbFile.open ("Output_Probabilities");
for (int iRefMap = 0; iRefMap < RefMap.ntotRefMap; iRefMap ++)
{
// **** Total Probability ***
outputProbFile << "RefMap " << iRefMap << " Probability " << log(pProb[iRefMap].Total) + pProb[iRefMap].Constoadd + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu) << " Constant " << pProb[iRefMap].Constoadd << "\n";
bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
outputProbFile << "RefMap " << iRefMap << " Probability " << log(pProbMap.Total) + pProbMap.Constoadd + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu) << " Constant " << pProbMap.Constoadd << "\n";
outputProbFile << "RefMap " << iRefMap << " Maximizing Param: ";
// *** Param that maximize probability****
outputProbFile << (pProb[iRefMap].max_prob + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5) * (log(2 * M_PI) + 1) + log(param.param_device.volu)) << " ";
outputProbFile << param.angles[pProb[iRefMap].max_prob_orient].pos[0] << " ";
outputProbFile << param.angles[pProb[iRefMap].max_prob_orient].pos[1] << " ";
outputProbFile << param.angles[pProb[iRefMap].max_prob_orient].pos[2] << " ";
outputProbFile << param.CtfParam[pProb[iRefMap].max_prob_conv].pos[0] << " ";
outputProbFile << param.CtfParam[pProb[iRefMap].max_prob_conv].pos[1] << " ";
outputProbFile << param.CtfParam[pProb[iRefMap].max_prob_conv].pos[2] << " ";
outputProbFile << pProb[iRefMap].max_prob_cent_x << " ";
outputProbFile << pProb[iRefMap].max_prob_cent_y;
outputProbFile << (pProbMap.max_prob + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5) * (log(2 * M_PI) + 1) + log(param.param_device.volu)) << " ";
outputProbFile << param.angles[pProbMap.max_prob_orient].pos[0] << " ";
outputProbFile << param.angles[pProbMap.max_prob_orient].pos[1] << " ";
outputProbFile << param.angles[pProbMap.max_prob_orient].pos[2] << " ";
outputProbFile << param.CtfParam[pProbMap.max_prob_conv].pos[0] << " ";
outputProbFile << param.CtfParam[pProbMap.max_prob_conv].pos[1] << " ";
outputProbFile << param.CtfParam[pProbMap.max_prob_conv].pos[2] << " ";
outputProbFile << pProbMap.max_prob_cent_x << " ";
outputProbFile << pProbMap.max_prob_cent_y;
outputProbFile << "\n";
// *** For individual files*** //angProbfile.open ("ANG_PROB_"iRefMap);
if(param.writeAngles)
{
for (int iProjectionOut = 0; iProjectionOut < param.nTotGridAngles; iProjectionOut++)
for (int iOrient = 0; iOrient < param.nTotGridAngles; iOrient++)
{
angProbfile << " " << iRefMap << " " << param.angles[iProjectionOut].pos[0] << " " << param.angles[iProjectionOut].pos[1] << " " << param.angles[iProjectionOut].pos[2] << " " << log(pProb[iRefMap].forAngles[iProjectionOut]) + pProb[iRefMap].ConstAngle[iProjectionOut] + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu) << " " << log(param.param_device.volu) << "\n";
bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);
angProbfile << " " << iRefMap << " " << param.angles[iOrient].pos[0] << " " << param.angles[iOrient].pos[1] << " " << param.angles[iOrient].pos[2] << " " << log(pProbAngle.forAngles) + pProbAngle.ConstAngle + 0.5 * log(M_PI) + (1 - param.param_device.Ntotpi * 0.5)*(log(2 * M_PI) + 1) + log(param.param_device.volu) << " " << log(param.param_device.volu) << "\n";
}
}
}
......@@ -340,25 +355,10 @@ int bioem::run()
angProbfile.close();
outputProbFile.close();
//Deleting allocated pointers
if (pProb)
{
delete[] pProb;
pProb = NULL;
}
if (param.refCTF)
{
delete[] param.refCTF;
param.refCTF = NULL;
}
RefMap.freePointers();
return(0);
}
int bioem::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap)
int bioem::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap)
{
if (FFTAlgo)
{
......@@ -366,7 +366,7 @@ int bioem::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* conv_m
for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++)
{
const int num = omp_get_thread_num();
calculateCCFFT(iRefMap, iProjectionOut, iConv, sumC, sumsquareC, localmultFFT, localCCT[num], lCC[num]);
calculateCCFFT(iRefMap, iOrient, iConv, sumC, sumsquareC, localmultFFT, localCCT[num], lCC[num]);
}
}
else
......@@ -374,7 +374,7 @@ int bioem::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* conv_m
#pragma omp parallel for
for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++)
{
compareRefMapShifted < -1 > (iRefMap, iProjectionOut, iConv, conv_map, pProb, param.param_device, RefMap);
compareRefMapShifted < -1 > (iRefMap, iOrient, iConv, conv_map, pProb, param.param_device, RefMap);
}
}
return(0);
......@@ -416,7 +416,6 @@ int bioem::createProjection(int iMap, mycomplex_t* mapFFT)
// **** To see how things are going: cout << "Id " << omp_get_thread_num() << " Angs: " << alpha << " " << beta << " " << gam << "\n"; ***
// ********** Creat Rotation with pre-defiend grid of orientations**********
rotmat[0][0] = cos(gam) * cos(alpha) - cos(beta) * sin(alpha) * sin(gam);
rotmat[0][1] = cos(gam) * sin(alpha) + cos(beta) * cos(alpha) * sin(gam);
rotmat[0][2] = sin(gam) * sin(beta);
......@@ -550,32 +549,10 @@ int bioem::deviceInit()
int bioem::deviceStartRun()
{
if (FFTAlgo)
{
const int count = omp_get_max_threads();
localCCT = new mycomplex_t*[count];
lCC = new myfloat_t*[count];
for (int i = 0;i < count;i++)
{
localCCT[i] = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
lCC[i] = (myfloat_t *) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels);
}
}
return(0);
}
int bioem::deviceFinishRun()
{
if (FFTAlgo)
{
const int count = omp_get_max_threads();
for (int i = 0;i < count;i++)
{
myfftw_free(localCCT[i]);
myfftw_free(lCC[i]);
}
delete[] localCCT;
delete[] lCC;
}
return(0);
}
......@@ -12,33 +12,36 @@
#endif
template <int GPUAlgo>
__device__ static inline void update_prob(const myfloat_t logpro, const int iRefMap, const int iOrient, const int iConv, const int cent_x, const int cent_y, bioem_Probability* pProb, myfloat_t* buf3 = NULL, int* bufint = NULL)
__device__ static inline void update_prob(const myfloat_t logpro, const int iRefMap, const int iOrient, const int iConv, const int cent_x, const int cent_y, bioem_Probability& pProb, myfloat_t* buf3 = NULL, int* bufint = NULL)
{
// ******* Summing total Probabilities *************
// ******* Need a constant because of numerical divergence*****
if(pProb[iRefMap].Constoadd < logpro)
bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);
if(pProbMap.Constoadd < logpro)
{
pProb[iRefMap].Total = pProb[iRefMap].Total * exp(-logpro + pProb[iRefMap].Constoadd);
pProb[iRefMap].Constoadd = logpro;
pProbMap.Total = pProbMap.Total * exp(-logpro + pProbMap.Constoadd);
pProbMap.Constoadd = logpro;
}
//Summing probabilities for each orientation
if(pProb[iRefMap].ConstAngle[iOrient] < logpro)
if(pProbAngle.ConstAngle < logpro)
{
pProb[iRefMap].forAngles[iOrient] = pProb[iRefMap].forAngles[iOrient] * exp(-logpro + pProb[iRefMap].ConstAngle[iOrient]);
pProb[iRefMap].ConstAngle[iOrient] = logpro;
pProbAngle.forAngles = pProbAngle.forAngles * exp(-logpro + pProbAngle.ConstAngle);
pProbAngle.ConstAngle = logpro;
}
if (GPUAlgo != 2)
{
pProb[iRefMap].Total += exp(logpro - pProb[iRefMap].Constoadd);
pProb[iRefMap].forAngles[iOrient] += exp(logpro - pProb[iRefMap].ConstAngle[iOrient]);
pProbMap.Total += exp(logpro - pProbMap.Constoadd);
pProbAngle.forAngles += exp(logpro - pProbAngle.ConstAngle);
}
// ********** Getting parameters that maximize the probability ***********
if(pProb[iRefMap].max_prob < logpro)
if(pProbMap.max_prob < logpro)
{
pProb[iRefMap].max_prob = logpro;
pProbMap.max_prob = logpro;
if (GPUAlgo == 2)
{
......@@ -47,11 +50,11 @@ __device__ static inline void update_prob(const myfloat_t logpro, const int iRef
}
else
{
pProb[iRefMap].max_prob_cent_x = cent_x;
pProb[iRefMap].max_prob_cent_y = cent_y;
pProbMap.max_prob_cent_x = cent_x;
pProbMap.max_prob_cent_y = cent_y;
}
pProb[iRefMap].max_prob_orient = iOrient;
pProb[iRefMap].max_prob_conv = iConv;
pProbMap.max_prob_orient = iOrient;
pProbMap.max_prob_conv = iConv;
}
}
......@@ -70,7 +73,7 @@ __device__ static inline myfloat_t calc_logpro(const bioem_param_device& param,
return(logpro);
}
__device__ static inline void calProb(int iRefMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, myfloat_t value, int disx, int disy, bioem_Probability* pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
__device__ static inline void calProb(int iRefMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, myfloat_t value, int disx, int disy, bioem_Probability& pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
{
// ********************************************************
// *********** Calculates the BioEM probability ***********
......@@ -78,33 +81,36 @@ __device__ static inline void calProb(int iRefMap, int iOrient, int iConv, myflo
const myfloat_t logpro = calc_logpro(param, sumC, sumsquareC, value, RefMap.sum_RefMap[iRefMap], RefMap.sumsquare_RefMap[iRefMap]);
//update_prob<-1>(logpro, iRefMap, iOrient, iConv, disx, disy, pProb);
bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);
//GCC is too stupid to inline properly, so the code is copied here
if(pProb[iRefMap].Constoadd < logpro)
//update_prob<-1>(logpro, iRefMap, iOrient, iConv, disx, disy, pProb);
if(pProbMap.Constoadd < logpro)
{
pProb[iRefMap].Total = pProb[iRefMap].Total * exp(-logpro + pProb[iRefMap].Constoadd);
pProb[iRefMap].Constoadd = logpro;
pProbMap.Total = pProbMap.Total * exp(-logpro + pProbMap.Constoadd);
pProbMap.Constoadd = logpro;
}
pProb[iRefMap].Total += exp(logpro - pProb[iRefMap].Constoadd);
pProbMap.Total += exp(logpro - pProbMap.Constoadd);
if(pProb[iRefMap].ConstAngle[iOrient] < logpro)
if(pProbAngle.ConstAngle < logpro)
{
pProb[iRefMap].forAngles[iOrient] = pProb[iRefMap].forAngles[iOrient] * exp(-logpro + pProb[iRefMap].ConstAngle[iOrient]);
pProb[iRefMap].ConstAngle[iOrient] = logpro;
pProbAngle.forAngles = pProbAngle.forAngles * exp(-logpro + pProbAngle.ConstAngle);
pProbAngle.ConstAngle = logpro;
}
pProb[iRefMap].forAngles[iOrient] += exp(logpro - pProb[iRefMap].ConstAngle[iOrient]);
pProbAngle.forAngles += exp(logpro - pProbAngle.ConstAngle);
if(pProb[iRefMap].max_prob < logpro)
if(pProbMap.max_prob < logpro)
{
pProb[iRefMap].max_prob = logpro;
pProb[iRefMap].max_prob_cent_x = disx;
pProb[iRefMap].max_prob_cent_y = disy;
pProb[iRefMap].max_prob_orient = iOrient;
pProb[iRefMap].max_prob_conv = iConv;
pProbMap.max_prob = logpro;
pProbMap.max_prob_cent_x = disx;
pProbMap.max_prob_cent_y = disy;
pProbMap.max_prob_orient = iOrient;
pProbMap.max_prob_conv = iConv;
}
}
__device__ static inline void doRefMapFFT(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* lCC, const myfloat_t sumC, const myfloat_t sumsquareC, bioem_Probability* pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
__device__ static inline void doRefMapFFT(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* lCC, const myfloat_t sumC, const myfloat_t sumsquareC, bioem_Probability& pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
{
for (int cent_x = 0; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter)
{
......@@ -131,7 +137,7 @@ __device__ static inline void doRefMapFFT(const int iRefMap, const int iOrient,
}
template <int GPUAlgo, class RefT>
__device__ static inline void compareRefMap(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability* pProb, const bioem_param_device& param, const RefT& RefMap,
__device__ static inline void compareRefMap(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability& pProb, const bioem_param_device& param, const RefT& RefMap,
const int cent_x, const int cent_y, const int myShift = 0, const int nShifts2 = 0, const int myRef = 0, const bool threadActive = true)
{
......@@ -281,18 +287,22 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient
}
__syncthreads();
bioem_Probability_map& pProbMap = pProb.getProbMap(iRefMap);
bioem_Probability_angle& pProbAngle = pProb.getProbAngle(iRefMap, iOrient);
if (bufint[0] == 1 && buf3[1] == logpro && iRefMap < RefMap.ntotRefMap && atomicAdd(&bufint[0], 1) == 1)
{
pProb[iRefMap].max_prob_cent_x = cent_x;
pProb[iRefMap].max_prob_cent_y = cent_y;
pProbMap.max_prob_cent_x = cent_x;
pProbMap.max_prob_cent_y = cent_y;
}
__syncthreads();
if (iRefMap < RefMap.ntotRefMap)
{
buf[myThreadIdxX] = exp(logpro - pProb[iRefMap].Constoadd);
buf2[myThreadIdxX] = exp(logpro - pProb[iRefMap].ConstAngle[iOrient]);
buf[myThreadIdxX] = exp(logpro - pProbMap.Constoadd);
buf2[myThreadIdxX] = exp(logpro - pProbAngle.ConstAngle);
}
__syncthreads();
......@@ -372,8 +382,8 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient
}
if (myShift == 0 && iRefMap < RefMap.ntotRefMap)
{
pProb[iRefMap].Total += vbuf[myThreadIdxX];
pProb[iRefMap].forAngles[iOrient] += vbuf2[myThreadIdxX];
pProbMap.Total += vbuf[myThreadIdxX];
pProbAngle.forAngles += vbuf2[myThreadIdxX];
}
}
}
......@@ -385,7 +395,7 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient
}
template <int GPUAlgo, class RefT>
__device__ static inline void compareRefMapShifted(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability* pProb, const bioem_param_device& param, const RefT& RefMap)
__device__ static inline void compareRefMapShifted(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* Mapconv, bioem_Probability& pProb, const bioem_param_device& param, const RefT& RefMap)
{
for (int cent_x = -param.maxDisplaceCenter; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter)
{
......
......@@ -69,7 +69,7 @@ bioem_cuda::~bioem_cuda()
deviceExit();
}
__global__ void compareRefMap_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod RefMap, const int cent_x, const int cent_y, const int maxRef)
__global__ void compareRefMap_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability pProb, const bioem_param_device param, const bioem_RefMap_Mod RefMap, const int cent_x, const int cent_y, const int maxRef)
{
const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX;
if (iRefMap < maxRef)
......@@ -78,7 +78,7 @@ __global__ void compareRefMap_kernel(const int iOrient, const int iConv, const m
}
}
__global__ void compareRefMapShifted_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod RefMap, const int maxRef)
__global__ void compareRefMapShifted_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability pProb, const bioem_param_device param, const bioem_RefMap_Mod RefMap, const int maxRef)
{
const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX;
if (iRefMap < maxRef)
......@@ -96,7 +96,7 @@ __global__ void cudaZeroMem(void* ptr, size_t size)
for (int i = myid; i < mysize; i += mygrid) myptr[i] = 0;
}
__global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap RefMap, const int blockoffset, const int nShifts, const int nShiftBits, const int maxRef)
__global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability pProb, const bioem_param_device param, const bioem_RefMap RefMap, const int blockoffset, const int nShifts, const int nShiftBits, const int maxRef)
{
const size_t myid = (myBlockIdxX + blockoffset) * myBlockDimX + myThreadIdxX;
const int iRefMap = myid >> (nShiftBits << 1);
......@@ -124,7 +124,7 @@ __global__ void multComplexMap(const mycomplex_t* convmap, const mycomplex_t* re
}
}
__global__ void cuDoRefMapsFFT(const int iOrient, const int iConv, const myfloat_t* lCC, const myfloat_t sumC, const myfloat_t sumsquareC, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap RefMap, const int maxRef, const int Offset)
__global__ void cuDoRefMapsFFT(const int iOrient, const int iConv, const myfloat_t* lCC, const myfloat_t sumC, const myfloat_t sumsquareC, bioem_Probability pProb, const bioem_param_device param, const bioem_RefMap RefMap, const int maxRef, const int Offset)
{
const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX + Offset;
const myfloat_t* mylCC = &lCC[(myBlockIdxX * myBlockDimX + myThreadIdxX) * param.NumberPixels * param.NumberPixels];
......@@ -145,7 +145,7 @@ static inline int ilog2 (int value)
static inline int ilog2(int value) {return 31 - __builtin_clz(value);}
#endif
int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap)
int bioem_cuda::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap)
{
if (startMap)
{
......@@ -170,7 +170,7 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* c
cout << "Error running CUFFT " << cufftGetErrorStrung(err) << "\n";
exit(1);
}
cuDoRefMapsFFT<<<divup(num, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>>(iProjectionOut, iConv, pFFTtmp, sumC, sumsquareC, pProb_device, param.param_device, *gpumap, num, i);
cuDoRefMapsFFT<<<divup(num, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>>(iOrient, iConv, pFFTtmp, sumC, sumsquareC, pProb_device, param.param_device, *gpumap, num, i);
}
checkCudaErrors(cudaGetLastError());
}
......@@ -201,7 +201,7 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* c
size_t nBlocks = CUDA_BLOCK_COUNT;
for (size_t i = 0; i < totalBlocks; i += nBlocks)
{
compareRefMapLoopShifts_kernel<<<min(nBlocks, totalBlocks - i), CUDA_THREAD_COUNT, (CUDA_THREAD_COUNT * 2 + CUDA_THREAD_COUNT / (nShifts * nShifts) * 4) * sizeof(myfloat_t), cudaStream >>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *gpumap, i, nShifts, nShiftBits, maxRef);
compareRefMapLoopShifts_kernel<<<min(nBlocks, totalBlocks - i), CUDA_THREAD_COUNT, (CUDA_THREAD_COUNT * 2 + CUDA_THREAD_COUNT / (nShifts * nShifts) * 4) * sizeof(myfloat_t), cudaStream >>> (iOrient, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *gpumap, i, nShifts, nShiftBits, maxRef);
}
}
else if (GPUAlgo == 1) //Split shifts in multiple kernels
......@@ -210,13 +210,13 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* c
{
for (int cent_y = -param.param_device.maxDisplaceCenter; cent_y <= param.param_device.maxDisplaceCenter; cent_y = cent_y + param.param_device.GridSpaceCenter)
{
compareRefMap_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *pRefMap_device_Mod, cent_x, cent_y, maxRef);
compareRefMap_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iOrient, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *pRefMap_device_Mod, cent_x, cent_y, maxRef);
}
}
}
else if (GPUAlgo == 0) //All shifts in one kernel
{
compareRefMapShifted_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *pRefMap_device_Mod, maxRef);
compareRefMapShifted_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iOrient, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *pRefMap_device_Mod, maxRef);
}
else
{
......@@ -226,7 +226,7 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* c
}
if (GPUWorkload < 100)
{
bioem::compareRefMaps(iProjectionOut, iConv, conv_map, localmultFFT, sumC, sumsquareC, maxRef);
bioem::compareRefMaps(iOrient, iConv, conv_map, localmultFFT, sumC, sumsquareC, maxRef);
}
if (GPUAsync)
{
......@@ -273,7 +273,9 @@ int bioem_cuda::deviceInit()
gpumap->sumsquare_RefMap = sumsquare;
checkCudaErrors(cudaStreamCreate(&cudaStream));
checkCudaErrors(cudaMalloc(&pProb_device, sizeof(bioem_Probability) * RefMap.ntotRefMap));
pProb_device = pProb;
checkCudaErrors(cudaMalloc(&pProb_device.ptr, pProb_device.get_size(RefMap.ntotRefMap, param.nTotGridAngles)));
pProb_device.set_pointers();
for (int i = 0; i < 2; i++)
{
checkCudaErrors(cudaEventCreate(&cudaEvent[i]));
......@@ -298,7 +300,7 @@ int bioem_cuda::deviceExit()
if (deviceInitialized == 0) return(0);
cudaStreamDestroy(cudaStream);
cudaFree(pProb_device);
cudaFree(pProb_device.ptr);
cudaFree(sum);
cudaFree(sumsquare);