Commit 89c694cc authored by Pilar Cossio's avatar Pilar Cossio

Priors with NoFFT and GPU

parent fd288ab5
......@@ -92,7 +92,7 @@ __global__ void compareRefMap_kernel(const int iOrient, const int iConv, const
}
}
__global__ void compareRefMapShifted_kernel(const int iOrient, const int iConv, const myfloat_t amp, const myfloat_t pha, const myfloat_t env, const myfloat_t sumC, const myfloat_t sumsquareC, nst myfloat_t* pMap, bioem_Probability pProb, const bioem_param_device param, const bioem_RefMap_Mod RefMap, const int maxRef)
__global__ void compareRefMapShifted_kernel(const int iOrient, const int iConv, const myfloat_t amp, const myfloat_t pha, const myfloat_t env, const myfloat_t sumC, const myfloat_t sumsquareC, const myfloat_t* pMap, bioem_Probability pProb, const bioem_param_device param, const bioem_RefMap_Mod RefMap, const int maxRef)
{
const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX;
if (iRefMap < maxRef)
......@@ -110,7 +110,7 @@ __global__ void cudaZeroMem(void* ptr, size_t size)
for (int i = myid; i < mysize; i += mygrid) myptr[i] = 0;
}
__global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability pProb, const bioem_param_device param, const bioem_RefMap RefMap, const int blockoffset, const int nShifts, const int nShiftBits, const int maxRef)
__global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iConv, const myfloat_t amp, const myfloat_t pha, const myfloat_t env, const myfloat_t sumC, const myfloat_t sumsquareC, const myfloat_t* pMap, bioem_Probability pProb, const bioem_param_device param, const bioem_RefMap RefMap, const int blockoffset, const int nShifts, const int nShiftBits, const int maxRef)
{
const size_t myid = (myBlockIdxX + blockoffset) * myBlockDimX + myThreadIdxX;
const int iRefMap = myid >> (nShiftBits << 1);
......@@ -123,7 +123,7 @@ __global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iCon
const bool threadActive = myShiftIdx < nShifts && myShiftIdy < nShifts && iRefMap < maxRef;
compareRefMap<2>(iRefMap, iOrient, iConv, pMap, pProb, param, RefMap, cent_x, cent_y, myShift, nShifts * nShifts, myRef, threadActive);
compareRefMap<2>(iRefMap, iOrient, iConv, amp, pha, env, sumC, sumsquareC, pMap, pProb, param, RefMap, cent_x, cent_y, myShift, nShifts * nShifts, myRef, threadActive);
}
__global__ void multComplexMap(const mycomplex_t* convmap, const mycomplex_t* refmap, mycuComplex_t* out, const int NumberPixelsTotal, const int MapSize, const int NumberMaps, const int Offset)
......@@ -238,7 +238,7 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t
size_t nBlocks = CUDA_BLOCK_COUNT;
for (size_t i = 0; i < totalBlocks; i += nBlocks)
{
compareRefMapLoopShifts_kernel<<<min(nBlocks, totalBlocks - i), CUDA_THREAD_COUNT, (CUDA_THREAD_COUNT * 2 + CUDA_THREAD_COUNT / (nShifts * nShifts) * 4) * sizeof(myfloat_t), cudaStream[0] >>> (iOrient, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *gpumap, i, nShifts, nShiftBits, maxRef);
compareRefMapLoopShifts_kernel<<<min(nBlocks, totalBlocks - i), CUDA_THREAD_COUNT, (CUDA_THREAD_COUNT * 2 + CUDA_THREAD_COUNT / (nShifts * nShifts) * 4) * sizeof(myfloat_t), cudaStream[0] >>> (iOrient, iConv, amp, pha, env, sumC, sumsquareC, pConvMap_device[iConv & 1], pProb_device, param.param_device, *gpumap, i, nShifts, nShiftBits, maxRef);
}
}
else if (GPUAlgo == 1) //Split shifts in multiple kernels
......@@ -247,7 +247,7 @@ int bioem_cuda::compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t
{
for (int cent_y = -param.param_device.maxDisplaceCenter; cent_y <= param.param_device.maxDisplaceCenter; cent_y = cent_y + param.param_device.GridSpaceCenter)
{
compareRefMap_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream[0]>>> (iOrient, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *pRefMap_device_Mod, cent_x, cent_y, maxRef);
compareRefMap_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream[0]>>> (iOrient, iConv, amp, pha, env, sumC, sumsquareC, pConvMap_device[iConv & 1], pProb_device, param.param_device, *pRefMap_device_Mod, cent_x, cent_y, maxRef);
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment