Commit 1e3203f7 authored by David Rohr's avatar David Rohr
Browse files

convert indent spaces to tabs, unify indentation

parent 87ad9163
This diff is collapsed.
...@@ -61,8 +61,8 @@ __device__ static inline myfloat_t calc_logpro(const bioem_param_device& param, ...@@ -61,8 +61,8 @@ __device__ static inline myfloat_t calc_logpro(const bioem_param_device& param,
const myfloat_t ForLogProb = (sumsquare * param.Ntotpi - sum * sum); const myfloat_t ForLogProb = (sumsquare * param.Ntotpi - sum * sum);
// Products of different cross-correlations (first element in formula) // Products of different cross-correlations (first element in formula)
const myfloat_t firstele = param.Ntotpi * (sumsquareref * sumsquare-crossproMapConv * crossproMapConv) + const myfloat_t firstele = param.Ntotpi * (sumsquareref * sumsquare - crossproMapConv * crossproMapConv) +
2 * sumref * sum * crossproMapConv - sumsquareref * sum * sum - sumref * sumref * sumsquare; 2 * sumref * sum * crossproMapConv - sumsquareref * sum * sum - sumref * sumref * sumsquare;
//******* Calculating log of Prob*********/ //******* Calculating log of Prob*********/
// As in fortran code: logpro=(3-Ntotpi)*0.5*log(firstele/pConvMap[iOrient].ForLogProbfromConv[iConv])+(Ntotpi*0.5-2)*log(Ntotpi-2)-0.5*log(pConvMap[iOrient].ForLogProbfromConv[iConv])+0.5*log(PI)+(1-Ntotpi*0.5)*(log(2*PI)+1); // As in fortran code: logpro=(3-Ntotpi)*0.5*log(firstele/pConvMap[iOrient].ForLogProbfromConv[iConv])+(Ntotpi*0.5-2)*log(Ntotpi-2)-0.5*log(pConvMap[iOrient].ForLogProbfromConv[iConv])+0.5*log(PI)+(1-Ntotpi*0.5)*(log(2*PI)+1);
...@@ -70,7 +70,7 @@ __device__ static inline myfloat_t calc_logpro(const bioem_param_device& param, ...@@ -70,7 +70,7 @@ __device__ static inline myfloat_t calc_logpro(const bioem_param_device& param,
return(logpro); return(logpro);
} }
__device__ static inline void calProb(int iRefMap,int iOrient, int iConv,myfloat_t sumC,myfloat_t sumsquareC, float value, int disx, int disy, bioem_Probability* pProb, const bioem_param_device& param, const bioem_RefMap& RefMap) __device__ static inline void calProb(int iRefMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, float value, int disx, int disy, bioem_Probability* pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
{ {
/********************************************************/ /********************************************************/
/*********** Calculates the BioEM probability ***********/ /*********** Calculates the BioEM probability ***********/
...@@ -80,8 +80,8 @@ __device__ static inline void calProb(int iRefMap,int iOrient, int iConv,myfloat ...@@ -80,8 +80,8 @@ __device__ static inline void calProb(int iRefMap,int iOrient, int iConv,myfloat
//update_prob<-1>(logpro, iRefMap, iOrient, iConv, disx, disy, pProb); //update_prob<-1>(logpro, iRefMap, iOrient, iConv, disx, disy, pProb);
//GCC is too stupid to inline properly, so the code is copied here //GCC is too stupid to inline properly, so the code is copied here
if(pProb[iRefMap].Constoadd < logpro) if(pProb[iRefMap].Constoadd < logpro)
{ {
pProb[iRefMap].Total = pProb[iRefMap].Total * exp(-logpro + pProb[iRefMap].Constoadd); pProb[iRefMap].Total = pProb[iRefMap].Total * exp(-logpro + pProb[iRefMap].Constoadd);
pProb[iRefMap].Constoadd = logpro; pProb[iRefMap].Constoadd = logpro;
} }
...@@ -106,33 +106,33 @@ __device__ static inline void calProb(int iRefMap,int iOrient, int iConv,myfloat ...@@ -106,33 +106,33 @@ __device__ static inline void calProb(int iRefMap,int iOrient, int iConv,myfloat
__device__ static inline void doRefMapFFT(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* lCC, const myfloat_t sumC, const myfloat_t sumsquareC, bioem_Probability* pProb, const bioem_param_device& param, const bioem_RefMap& RefMap) __device__ static inline void doRefMapFFT(const int iRefMap, const int iOrient, const int iConv, const myfloat_t* lCC, const myfloat_t sumC, const myfloat_t sumsquareC, bioem_Probability* pProb, const bioem_param_device& param, const bioem_RefMap& RefMap)
{ {
for (int cent_x = 0; cent_x <= param.maxDisplaceCenter; cent_x=cent_x+param.GridSpaceCenter) for (int cent_x = 0; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter)
{ {
for (int cent_y = 0; cent_y <= param.maxDisplaceCenter; cent_y=cent_y+param.GridSpaceCenter) for (int cent_y = 0; cent_y <= param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
{ {
calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x*param.NumberPixels+cent_y]/ (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x, cent_y, pProb, param, RefMap); calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x, cent_y, pProb, param, RefMap);
} }
for (int cent_y = param.NumberPixels-param.maxDisplaceCenter; cent_y < param.NumberPixels; cent_y=cent_y+param.GridSpaceCenter) for (int cent_y = param.NumberPixels - param.maxDisplaceCenter; cent_y < param.NumberPixels; cent_y = cent_y + param.GridSpaceCenter)
{ {
calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x*param.NumberPixels+cent_y]/ (myfloat_t) (param.NumberPixels*param.NumberPixels), cent_x, cent_y - param.NumberPixels, pProb, param, RefMap); calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x, cent_y - param.NumberPixels, pProb, param, RefMap);
} }
} }
for (int cent_x = param.NumberPixels-param.maxDisplaceCenter; cent_x < param.NumberPixels; cent_x=cent_x+param.GridSpaceCenter) for (int cent_x = param.NumberPixels - param.maxDisplaceCenter; cent_x < param.NumberPixels; cent_x = cent_x + param.GridSpaceCenter)
{ {
for (int cent_y = 0; cent_y < param.maxDisplaceCenter; cent_y=cent_y+param.GridSpaceCenter) for (int cent_y = 0; cent_y < param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
{ {
calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x*param.NumberPixels+cent_y]/ (myfloat_t) (param.NumberPixels*param.NumberPixels), cent_x - param.NumberPixels, cent_y, pProb, param, RefMap); calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x - param.NumberPixels, cent_y, pProb, param, RefMap);
} }
for (int cent_y = param.NumberPixels-param.maxDisplaceCenter; cent_y <= param.NumberPixels; cent_y=cent_y+param.GridSpaceCenter) for (int cent_y = param.NumberPixels - param.maxDisplaceCenter; cent_y <= param.NumberPixels; cent_y = cent_y + param.GridSpaceCenter)
{ {
calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x*param.NumberPixels+cent_y]/ (myfloat_t) (param.NumberPixels*param.NumberPixels), cent_x - param.NumberPixels, cent_y - param.NumberPixels, pProb, param, RefMap); calProb(iRefMap, iOrient, iConv, sumC, sumsquareC, (myfloat_t) lCC[cent_x * param.NumberPixels + cent_y] / (myfloat_t) (param.NumberPixels * param.NumberPixels), cent_x - param.NumberPixels, cent_y - param.NumberPixels, pProb, param, RefMap);
} }
} }
} }
template <int GPUAlgo, class RefT> template <int GPUAlgo, class RefT>
__device__ static inline void compareRefMap(const int iRefMap, const int iOrient, const int iConv, const bioem_map& Mapconv, bioem_Probability* pProb, const bioem_param_device& param, const RefT& RefMap, __device__ static inline void compareRefMap(const int iRefMap, const int iOrient, const int iConv, const bioem_map& Mapconv, bioem_Probability* pProb, const bioem_param_device& param, const RefT& RefMap,
const int cent_x, const int cent_y, const int myShift = 0, const int nShifts2 = 0, const int myRef = 0, const bool threadActive = true) const int cent_x, const int cent_y, const int myShift = 0, const int nShifts2 = 0, const int myRef = 0, const bool threadActive = true)
{ {
/**************************************************************************************/ /**************************************************************************************/
/********************** Calculating BioEM Probability ********************************/ /********************** Calculating BioEM Probability ********************************/
...@@ -145,9 +145,9 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient ...@@ -145,9 +145,9 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient
myfloat_t sum, sumsquare, crossproMapConv; myfloat_t sum, sumsquare, crossproMapConv;
__m128 sum_v = _mm_setzero_ps(), sumsquare_v = _mm_setzero_ps(), cross_v = _mm_setzero_ps(), d1, d2; __m128 sum_v = _mm_setzero_ps(), sumsquare_v = _mm_setzero_ps(), cross_v = _mm_setzero_ps(), d1, d2;
#else #else
myfloat_t sum=0.0; myfloat_t sum = 0.0;
myfloat_t sumsquare=0.0; myfloat_t sumsquare = 0.0;
myfloat_t crossproMapConv=0.0; myfloat_t crossproMapConv = 0.0;
#endif #endif
/****** Loop over Pixels to calculate dot product and cross-correlations of displaced Ref Conv. Map***/ /****** Loop over Pixels to calculate dot product and cross-correlations of displaced Ref Conv. Map***/
myfloat_t logpro; myfloat_t logpro;
...@@ -182,7 +182,7 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient ...@@ -182,7 +182,7 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient
const float* ptr2 = RefMap.getp(iRefMap, i, jStart); const float* ptr2 = RefMap.getp(iRefMap, i, jStart);
int j; int j;
const int count = jEnd - jStart; const int count = jEnd - jStart;
for (j = 0;j <= count - 4;j += 4) for (j = 0; j <= count - 4; j += 4)
{ {
d1 = _mm_loadu_ps(ptr1); d1 = _mm_loadu_ps(ptr1);
d2 = _mm_loadu_ps(ptr2); d2 = _mm_loadu_ps(ptr2);
...@@ -201,7 +201,7 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient ...@@ -201,7 +201,7 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient
// Crosscorrelation of calculated displaced map // Crosscorrelation of calculated displaced map
sum += pointMap; sum += pointMap;
// Calculate Sum of pixels squared // Calculate Sum of pixels squared
sumsquare += pointMap*pointMap; sumsquare += pointMap * pointMap;
} }
#endif #endif
} }
...@@ -380,18 +380,18 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient ...@@ -380,18 +380,18 @@ __device__ static inline void compareRefMap(const int iRefMap, const int iOrient
else else
#endif #endif
/***** Summing & Storing total/Orientation Probabilites for each map ************/ /***** Summing & Storing total/Orientation Probabilites for each map ************/
{ {
update_prob<-1>(logpro, iRefMap, iOrient, iConv, cent_x, cent_y, pProb); update_prob < -1 > (logpro, iRefMap, iOrient, iConv, cent_x, cent_y, pProb);
} }
} }
template <int GPUAlgo, class RefT> template <int GPUAlgo, class RefT>
__device__ static inline void compareRefMapShifted(const int iRefMap, const int iOrient, const int iConv, const bioem_map& Mapconv, bioem_Probability* pProb, const bioem_param_device& param, const RefT& RefMap) __device__ static inline void compareRefMapShifted(const int iRefMap, const int iOrient, const int iConv, const bioem_map& Mapconv, bioem_Probability* pProb, const bioem_param_device& param, const RefT& RefMap)
{ {
for (int cent_x = -param.maxDisplaceCenter; cent_x <= param.maxDisplaceCenter; cent_x=cent_x+param.GridSpaceCenter) for (int cent_x = -param.maxDisplaceCenter; cent_x <= param.maxDisplaceCenter; cent_x = cent_x + param.GridSpaceCenter)
{ {
for (int cent_y = -param.maxDisplaceCenter; cent_y <= param.maxDisplaceCenter; cent_y=cent_y+param.GridSpaceCenter) for (int cent_y = -param.maxDisplaceCenter; cent_y <= param.maxDisplaceCenter; cent_y = cent_y + param.GridSpaceCenter)
{ {
compareRefMap<GPUAlgo>(iRefMap, iOrient, iConv, Mapconv, pProb, param, RefMap, cent_x, cent_y); compareRefMap<GPUAlgo>(iRefMap, iOrient, iConv, Mapconv, pProb, param, RefMap, cent_x, cent_y);
} }
......
...@@ -39,7 +39,7 @@ __global__ void compareRefMap_kernel(const int iOrient, const int iConv, const b ...@@ -39,7 +39,7 @@ __global__ void compareRefMap_kernel(const int iOrient, const int iConv, const b
const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX; const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX;
if (iRefMap < maxRef) if (iRefMap < maxRef)
{ {
compareRefMap<0>(iRefMap,iOrient,iConv,*pMap, pProb, param, *RefMap, cent_x, cent_y); compareRefMap<0>(iRefMap, iOrient, iConv, *pMap, pProb, param, *RefMap, cent_x, cent_y);
} }
} }
...@@ -48,7 +48,7 @@ __global__ void compareRefMapShifted_kernel(const int iOrient, const int iConv, ...@@ -48,7 +48,7 @@ __global__ void compareRefMapShifted_kernel(const int iOrient, const int iConv,
const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX; const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX;
if (iRefMap < maxRef) if (iRefMap < maxRef)
{ {
compareRefMapShifted<1>(iRefMap,iOrient,iConv,*pMap, pProb, param, *RefMap); compareRefMapShifted<1>(iRefMap, iOrient, iConv, *pMap, pProb, param, *RefMap);
} }
} }
...@@ -58,7 +58,7 @@ __global__ void cudaZeroMem(void* ptr, size_t size) ...@@ -58,7 +58,7 @@ __global__ void cudaZeroMem(void* ptr, size_t size)
int mysize = size / sizeof(int); int mysize = size / sizeof(int);
int myid = myBlockDimX * myBlockIdxX + myThreadIdxX; int myid = myBlockDimX * myBlockIdxX + myThreadIdxX;
int mygrid = myBlockDimX * myGridDimX; int mygrid = myBlockDimX * myGridDimX;
for (int i = myid;i < mysize;i += mygrid) myptr[i] = 0; for (int i = myid; i < mysize; i += mygrid) myptr[i] = 0;
} }
__global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iConv, const bioem_map* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap* RefMap, const int blockoffset, const int nShifts, const int nShiftBits, const int maxRef) __global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iConv, const bioem_map* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap* RefMap, const int blockoffset, const int nShifts, const int nShiftBits, const int maxRef)
...@@ -74,7 +74,7 @@ __global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iCon ...@@ -74,7 +74,7 @@ __global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iCon
const bool threadActive = myShiftIdx < nShifts && myShiftIdy < nShifts && iRefMap < maxRef; const bool threadActive = myShiftIdx < nShifts && myShiftIdy < nShifts && iRefMap < maxRef;
compareRefMap<2>(iRefMap,iOrient,iConv,*pMap, pProb, param, *RefMap, cent_x, cent_y, myShift, nShifts * nShifts, myRef, threadActive); compareRefMap<2>(iRefMap, iOrient, iConv, *pMap, pProb, param, *RefMap, cent_x, cent_y, myShift, nShifts * nShifts, myRef, threadActive);
} }
__global__ void multComplexMap(const mycomplex_t* convmap, const mycomplex_t* refmap, mycuComplex_t* out, const int NumberPixelsTotal, const int MapSize, const int NumberMaps, const int Offset) __global__ void multComplexMap(const mycomplex_t* convmap, const mycomplex_t* refmap, mycuComplex_t* out, const int NumberPixelsTotal, const int MapSize, const int NumberMaps, const int Offset)
...@@ -82,7 +82,7 @@ __global__ void multComplexMap(const mycomplex_t* convmap, const mycomplex_t* re ...@@ -82,7 +82,7 @@ __global__ void multComplexMap(const mycomplex_t* convmap, const mycomplex_t* re
if (myBlockIdxX >= NumberMaps) return; if (myBlockIdxX >= NumberMaps) return;
const mycomplex_t* myin = &refmap[myBlockIdxX * MapSize + Offset]; const mycomplex_t* myin = &refmap[myBlockIdxX * MapSize + Offset];
mycuComplex_t* myout = &out[(myBlockIdxX * MapSize)]; mycuComplex_t* myout = &out[(myBlockIdxX * MapSize)];
for(int i = myThreadIdxX;i < NumberPixelsTotal;i += myBlockDimX) for(int i = myThreadIdxX; i < NumberPixelsTotal; i += myBlockDimX)
{ {
myout[i].x = convmap[i][0] * myin[i][0] + convmap[i][1] * myin[i][1]; myout[i].x = convmap[i][0] * myin[i][0] + convmap[i][1] * myin[i][1];
myout[i].y = convmap[i][1] * myin[i][0] - convmap[i][0] * myin[i][1]; myout[i].y = convmap[i][1] * myin[i][0] - convmap[i][0] * myin[i][1];
...@@ -126,7 +126,7 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& c ...@@ -126,7 +126,7 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& c
if (FFTAlgo) if (FFTAlgo)
{ {
checkCudaErrors(cudaMemcpyAsync(&pConvMapFFT[(iConv & 1) * param.RefMapSize], localmultFFT, param.RefMapSize * sizeof(mycomplex_t), cudaMemcpyHostToDevice, cudaStream)); checkCudaErrors(cudaMemcpyAsync(&pConvMapFFT[(iConv & 1) * param.RefMapSize], localmultFFT, param.RefMapSize * sizeof(mycomplex_t), cudaMemcpyHostToDevice, cudaStream));
for (int i = 0;i < maxRef;i += CUDA_FFTS_AT_ONCE) for (int i = 0; i < maxRef; i += CUDA_FFTS_AT_ONCE)
{ {
const int num = min(CUDA_FFTS_AT_ONCE, maxRef - i); const int num = min(CUDA_FFTS_AT_ONCE, maxRef - i);
multComplexMap<<<num, CUDA_THREAD_COUNT, 0, cudaStream>>>(&pConvMapFFT[(iConv & 1) * param.RefMapSize], pRefMapsFFT, pFFTtmp2, param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D, param.RefMapSize, num, i); multComplexMap<<<num, CUDA_THREAD_COUNT, 0, cudaStream>>>(&pConvMapFFT[(iConv & 1) * param.RefMapSize], pRefMapsFFT, pFFTtmp2, param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D, param.RefMapSize, num, i);
...@@ -164,24 +164,24 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& c ...@@ -164,24 +164,24 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& c
const int nShiftBits = ilog2(nShifts); const int nShiftBits = ilog2(nShifts);
size_t totalBlocks = divup((size_t) maxRef * (size_t) nShifts * (size_t) nShifts, (size_t) CUDA_THREAD_COUNT); size_t totalBlocks = divup((size_t) maxRef * (size_t) nShifts * (size_t) nShifts, (size_t) CUDA_THREAD_COUNT);
size_t nBlocks = CUDA_BLOCK_COUNT; size_t nBlocks = CUDA_BLOCK_COUNT;
for (size_t i = 0;i < totalBlocks;i += nBlocks) for (size_t i = 0; i < totalBlocks; i += nBlocks)
{ {
compareRefMapLoopShifts_kernel <<<min(nBlocks, totalBlocks - i), CUDA_THREAD_COUNT, (CUDA_THREAD_COUNT * 2 + CUDA_THREAD_COUNT / (nShifts * nShifts) * 4) * sizeof(myfloat_t), cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device, i, nShifts, nShiftBits, maxRef); compareRefMapLoopShifts_kernel<<<min(nBlocks, totalBlocks - i), CUDA_THREAD_COUNT, (CUDA_THREAD_COUNT * 2 + CUDA_THREAD_COUNT / (nShifts * nShifts) * 4) * sizeof(myfloat_t), cudaStream >>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device, i, nShifts, nShiftBits, maxRef);
} }
} }
else if (GPUAlgo == 1) //Split shifts in multiple kernels else if (GPUAlgo == 1) //Split shifts in multiple kernels
{ {
for (int cent_x = -param.param_device.maxDisplaceCenter; cent_x <= param.param_device.maxDisplaceCenter; cent_x=cent_x+param.param_device.GridSpaceCenter) for (int cent_x = -param.param_device.maxDisplaceCenter; cent_x <= param.param_device.maxDisplaceCenter; cent_x = cent_x + param.param_device.GridSpaceCenter)
{ {
for (int cent_y = -param.param_device.maxDisplaceCenter; cent_y <= param.param_device.maxDisplaceCenter; cent_y=cent_y+param.param_device.GridSpaceCenter) for (int cent_y = -param.param_device.maxDisplaceCenter; cent_y <= param.param_device.maxDisplaceCenter; cent_y = cent_y + param.param_device.GridSpaceCenter)
{ {
compareRefMap_kernel <<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device_Mod, cent_x, cent_y, maxRef); compareRefMap_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device_Mod, cent_x, cent_y, maxRef);
} }
} }
} }
else if (GPUAlgo == 0) //All shifts in one kernel else if (GPUAlgo == 0) //All shifts in one kernel
{ {
compareRefMapShifted_kernel <<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device_Mod, maxRef); compareRefMapShifted_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device_Mod, maxRef);
} }
else else
{ {
...@@ -215,7 +215,7 @@ int bioem_cuda::deviceInit() ...@@ -215,7 +215,7 @@ int bioem_cuda::deviceInit()
checkCudaErrors(cudaMalloc(&pRefMap_device, sizeof(bioem_RefMap))); checkCudaErrors(cudaMalloc(&pRefMap_device, sizeof(bioem_RefMap)));
cout << "\tSize Probability\t" << sizeof(bioem_Probability) * RefMap.ntotRefMap << "\n"; cout << "\tSize Probability\t" << sizeof(bioem_Probability) * RefMap.ntotRefMap << "\n";
checkCudaErrors(cudaMalloc(&pProb_device, sizeof(bioem_Probability) * RefMap.ntotRefMap)); checkCudaErrors(cudaMalloc(&pProb_device, sizeof(bioem_Probability) * RefMap.ntotRefMap));
for (int i = 0;i < 2;i++) for (int i = 0; i < 2; i++)
{ {
checkCudaErrors(cudaEventCreate(&cudaEvent[i])); checkCudaErrors(cudaEventCreate(&cudaEvent[i]));
checkCudaErrors(cudaMalloc(&pConvMap_device[i], sizeof(bioem_map))); checkCudaErrors(cudaMalloc(&pConvMap_device[i], sizeof(bioem_map)));
...@@ -256,7 +256,7 @@ int bioem_cuda::deviceExit() ...@@ -256,7 +256,7 @@ int bioem_cuda::deviceExit()
cudaStreamDestroy(cudaStream); cudaStreamDestroy(cudaStream);
cudaFree(pRefMap_device); cudaFree(pRefMap_device);
cudaFree(pProb_device); cudaFree(pProb_device);
for (int i = 0;i < 2;i++) for (int i = 0; i < 2; i++)
{ {
cudaEventDestroy(cudaEvent[i]); cudaEventDestroy(cudaEvent[i]);
cudaFree(pConvMap_device); cudaFree(pConvMap_device);
...@@ -282,7 +282,7 @@ int bioem_cuda::deviceStartRun() ...@@ -282,7 +282,7 @@ int bioem_cuda::deviceStartRun()
if (FFTAlgo) if (FFTAlgo)
{ {
for (int i = 0;i < 2;i++) for (int i = 0; i < 2; i++)
{ {
int n[2] = {param.param_device.NumberPixels, param.param_device.NumberPixels}; int n[2] = {param.param_device.NumberPixels, param.param_device.NumberPixels};
if (cufftPlanMany(&plan[i], 2, n, NULL, 1, 0, NULL, 1, 0, CUFFT_C2R, i ? (maxRef % CUDA_FFTS_AT_ONCE) : CUDA_FFTS_AT_ONCE) != CUFFT_SUCCESS) if (cufftPlanMany(&plan[i], 2, n, NULL, 1, 0, NULL, 1, 0, CUFFT_C2R, i ? (maxRef % CUDA_FFTS_AT_ONCE) : CUDA_FFTS_AT_ONCE) != CUFFT_SUCCESS)
...@@ -312,7 +312,7 @@ int bioem_cuda::deviceFinishRun() ...@@ -312,7 +312,7 @@ int bioem_cuda::deviceFinishRun()
if (FFTAlgo) if (FFTAlgo)
{ {
for (int i = 0;i < 2;i++) cufftDestroy(plan[i]); for (int i = 0; i < 2; i++) cufftDestroy(plan[i]);
} }
return(0); return(0);
......
...@@ -21,13 +21,13 @@ public: ...@@ -21,13 +21,13 @@ public:
int dopreCalCrossCorrelation(int iRefMap, int iRefMapLocal); int dopreCalCrossCorrelation(int iRefMap, int iRefMapLocal);
int run(); int run();
int doProjections(int iMap); int doProjections(int iMap);
int createConvolutedProjectionMap(int iOreint,int iMap, mycomplex_t* lproj,bioem_map& Mapconv,mycomplex_t* localmultFFT,myfloat_t& sumC,myfloat_t& sumsquareC); int createConvolutedProjectionMap(int iOreint, int iMap, mycomplex_t* lproj, bioem_map& Mapconv, mycomplex_t* localmultFFT, myfloat_t& sumC, myfloat_t& sumsquareC);
virtual int compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap = 0); virtual int compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap = 0);
int createProjection(int iMap, mycomplex_t* map); int createProjection(int iMap, mycomplex_t* map);
int calcross_cor(bioem_map& localmap,myfloat_t& sum,myfloat_t& sumsquare); int calcross_cor(bioem_map& localmap, myfloat_t& sum, myfloat_t& sumsquare);
void calculateCCFFT(int iMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, mycomplex_t* localConvFFT,mycomplex_t* localCCT,myfloat_t* lCC); void calculateCCFFT(int iMap, int iOrient, int iConv, myfloat_t sumC, myfloat_t sumsquareC, mycomplex_t* localConvFFT, mycomplex_t* localCCT, myfloat_t* lCC);
bioem_Probability* pProb; bioem_Probability* pProb;
......
...@@ -22,13 +22,13 @@ public: ...@@ -22,13 +22,13 @@ public:
} }
int readRefMaps(bioem_param& param); int readRefMaps(bioem_param& param);
int PreCalculateMapsFFT(bioem_param& param); int PreCalculateMapsFFT(bioem_param& param);
int read_int(int *currlong, FILE *fin, int swap); int read_int(int *currlong, FILE *fin, int swap);
int read_float(float *currfloat, FILE *fin, int swap); int read_float(float *currfloat, FILE *fin, int swap);
int read_float_empty (FILE *fin); int read_float_empty (FILE *fin);
int read_char_float (float *currfloat, FILE *fin) ; int read_char_float (float *currfloat, FILE *fin) ;
int test_mrc (const char *vol_file, int swap); int test_mrc (const char *vol_file, int swap);
int read_MRC(const char* filename,bioem_param& param); int read_MRC(const char* filename, bioem_param& param);
mycomplex_t* RefMapsFFT; mycomplex_t* RefMapsFFT;
const char* filemap; const char* filemap;
...@@ -38,7 +38,7 @@ public: ...@@ -38,7 +38,7 @@ public:
myfloat_t sumsquare_RefMap[BIOEM_MAX_MAPS]; myfloat_t sumsquare_RefMap[BIOEM_MAX_MAPS];
myfloat_t ForLogProbfromRef[BIOEM_MAX_MAPS]; myfloat_t ForLogProbfromRef[BIOEM_MAX_MAPS];
bool dumpMap, loadMap, readMRC,readMultMRC; bool dumpMap, loadMap, readMRC, readMultMRC;
__host__ __device__ inline myfloat_t get(int map, int x, int y) const {return(Ref[map].points[x][y]);} __host__ __device__ inline myfloat_t get(int map, int x, int y) const {return(Ref[map].points[x][y]);}
__host__ __device__ inline const myfloat_t* getp(int map, int x, int y) const {return(&Ref[map].points[x][y]);} __host__ __device__ inline const myfloat_t* getp(int map, int x, int y) const {return(&Ref[map].points[x][y]);}
...@@ -65,12 +65,12 @@ public: ...@@ -65,12 +65,12 @@ public:
memcpy(sum_RefMap, map.sum_RefMap, sizeof(sum_RefMap)); memcpy(sum_RefMap, map.sum_RefMap, sizeof(sum_RefMap));
memcpy(sumsquare_RefMap, map.sumsquare_RefMap, sizeof(sumsquare_RefMap)); memcpy(sumsquare_RefMap, map.sumsquare_RefMap, sizeof(sumsquare_RefMap));
memcpy(ForLogProbfromRef, map.ForLogProbfromRef, sizeof(ForLogProbfromRef)); memcpy(ForLogProbfromRef, map.ForLogProbfromRef, sizeof(ForLogProbfromRef));
#pragma omp parallel for #pragma omp parallel for
for (int i = 0;i < ntotRefMap;i++) for (int i = 0; i < ntotRefMap; i++)
{ {
for (int j = 0;j < BIOEM_MAP_SIZE_X;j++) for (int j = 0; j < BIOEM_MAP_SIZE_X; j++)
{ {
for (int k = 0;k < BIOEM_MAP_SIZE_Y;k++) for (int k = 0; k < BIOEM_MAP_SIZE_Y; k++)
{ {
Ref[j][k][i] = map.get(i, j, k); Ref[j][k][i] = map.get(i, j, k);
} }
......
...@@ -15,7 +15,7 @@ public: ...@@ -15,7 +15,7 @@ public:
myfloat_t getAminoAcidRad(char *name); myfloat_t getAminoAcidRad(char *name);
myfloat_t getAminoAcidDensity(char *name); myfloat_t getAminoAcidDensity(char *name);
myfloat_t NormDen; myfloat_t NormDen;
const char* filemodel; const char* filemodel;
......
...@@ -26,7 +26,7 @@ int main(int argc, char* argv[]) ...@@ -26,7 +26,7 @@ int main(int argc, char* argv[])
//************************************************************************************* //*************************************************************************************
#ifdef _MM_SET_DENORMALS_ZERO_MODE #ifdef _MM_SET_DENORMALS_ZERO_MODE
#pragma omp parallel #pragma omp parallel
{ {
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); //Flush denormals to zero in all OpenMP threads _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); //Flush denormals to zero in all OpenMP threads
} }
...@@ -47,7 +47,7 @@ int main(int argc, char* argv[]) ...@@ -47,7 +47,7 @@ int main(int argc, char* argv[])
//************ Configuration and Pre-calculating necessary objects ********************* //************ Configuration and Pre-calculating necessary objects *********************
printf("Configuring\n"); printf("Configuring\n");
bio->configure(argc,argv); bio->configure(argc, argv);
//******************************* Run BioEM routine ************************************ //******************************* Run BioEM routine ************************************
printf("Running\n"); printf("Running\n");
......
This diff is collapsed.
...@@ -30,19 +30,19 @@ int bioem_model::readModel() ...@@ -30,19 +30,19 @@ int bioem_model::readModel()
char line[512] = {' '}; char line[512] = {' '};
char tmpLine[512] = {' '}; char tmpLine[512] = {' '};
int numres=0; int numres = 0;
NormDen=0.0; NormDen = 0.0;
// cout << " HERE " << filemodel ; // cout << " HERE " << filemodel ;
// for eachline in the file // for eachline in the file
while (!input.eof()) while (!input.eof())
{ {
input.getline(line,512); input.getline(line, 512);
strncpy(tmpLine,line,strlen(line)); strncpy(tmpLine, line, strlen(line));
char *token = strtok(tmpLine," "); char *token = strtok(tmpLine, " ");
if (strcmp(token,"ATOM")==0) // Optional,Mandatory if standard residues exist if (strcmp(token, "ATOM") == 0) // Optional,Mandatory if standard residues exist
{ {
/* /*
1-6 "ATOM " 1-6 "ATOM "
...@@ -66,26 +66,26 @@ int bioem_model::readModel() ...@@ -66,26 +66,26 @@ int bioem_model::readModel()
char tmp[6] = {' '}; char tmp[6] = {' '};
// parse name // parse name
strncpy(tmp,line+12,4); strncpy(tmp, line + 12, 4);
sscanf (tmp,"%s",name); sscanf (tmp, "%s", name);
// parse resName // parse resName
strncpy(tmp,line+17,3); strncpy(tmp, line + 17, 3);
sscanf (tmp,"%s",resName); sscanf (tmp, "%s", resName);
// parse x, y, z // parse x, y, z
char tmpVals[36] = {' '}; char tmpVals[36] = {' '};
strncpy (tmpVals,line+30,8); strncpy (tmpVals, line + 30, 8);
sscanf (tmpVals,"%f",&x); sscanf (tmpVals, "%f", &x);
strncpy (tmpVals,line+38,8); strncpy (tmpVals, line + 38, 8);
sscanf (tmpVals,"%f",&y); sscanf (tmpVals, "%f", &y);
strncpy (tmpVals,line+46,8); strncpy (tmpVals, line + 46, 8);
sscanf (tmpVals,"%f",&z); sscanf (tmpVals, "%f", &z);
if (strcmp(name,"CA") == 0) if (strcmp(name, "CA") == 0)
{ {
if (numres >= BIOEM_MODEL_SIZE) if (numres >= BIOEM_MODEL_SIZE)
{ {
...@@ -93,30 +93,30 @@ int bioem_model::readModel() ...@@ -93,30 +93,30 @@ int bioem_model::readModel()
exit(1); exit(1);
} }
//Getting residue Radius and electron density //Getting residue Radius and electron density
radiusPointsModel[numres]=getAminoAcidRad(resName);