Commit fbf4c844 authored by David Rohr's avatar David Rohr

finalze version with dynamic allocation of map memory, MAP_SIZEX/Y and MAX_MAPS no longer needed

parent 68198fcf
...@@ -199,7 +199,12 @@ int bioem::precalculate() ...@@ -199,7 +199,12 @@ int bioem::precalculate()
param.CalculateRefCTF(); param.CalculateRefCTF();
// Precalculating Maps in Fourier space // Precalculating Maps in Fourier space
RefMap.PreCalculateMapsFFT(param); if (FFTAlgo)
{
RefMap.PreCalculateMapsFFT(param);
free(RefMap.maps);
RefMap.maps = NULL;
}
return(0); return(0);
} }
......
...@@ -32,21 +32,21 @@ bioem_cuda::~bioem_cuda() ...@@ -32,21 +32,21 @@ bioem_cuda::~bioem_cuda()
deviceExit(); deviceExit();
} }
__global__ void compareRefMap_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod* RefMap, const int cent_x, const int cent_y, const int maxRef) __global__ void compareRefMap_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod RefMap, const int cent_x, const int cent_y, const int maxRef)
{ {
const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX; const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX;
if (iRefMap < maxRef) if (iRefMap < maxRef)
{ {
compareRefMap<0>(iRefMap, iOrient, iConv, pMap, pProb, param, *RefMap, cent_x, cent_y); compareRefMap<0>(iRefMap, iOrient, iConv, pMap, pProb, param, RefMap, cent_x, cent_y);
} }
} }
__global__ void compareRefMapShifted_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod* RefMap, const int maxRef) __global__ void compareRefMapShifted_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod RefMap, const int maxRef)
{ {
const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX; const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX;
if (iRefMap < maxRef) if (iRefMap < maxRef)
{ {
compareRefMapShifted<1>(iRefMap, iOrient, iConv, pMap, pProb, param, *RefMap); compareRefMapShifted<1>(iRefMap, iOrient, iConv, pMap, pProb, param, RefMap);
} }
} }
...@@ -172,13 +172,13 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* c ...@@ -172,13 +172,13 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* c
{ {
for (int cent_y = -param.param_device.maxDisplaceCenter; cent_y <= param.param_device.maxDisplaceCenter; cent_y = cent_y + param.param_device.GridSpaceCenter) for (int cent_y = -param.param_device.maxDisplaceCenter; cent_y <= param.param_device.maxDisplaceCenter; cent_y = cent_y + param.param_device.GridSpaceCenter)
{ {
compareRefMap_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device_Mod, cent_x, cent_y, maxRef); compareRefMap_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *pRefMap_device_Mod, cent_x, cent_y, maxRef);
} }
} }
} }
else if (GPUAlgo == 0) //All shifts in one kernel else if (GPUAlgo == 0) //All shifts in one kernel
{ {
compareRefMapShifted_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device_Mod, maxRef); compareRefMapShifted_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *pRefMap_device_Mod, maxRef);
} }
else else
{ {
...@@ -212,7 +212,19 @@ int bioem_cuda::deviceInit() ...@@ -212,7 +212,19 @@ int bioem_cuda::deviceInit()
if (FFTAlgo == 0) if (FFTAlgo == 0)
{ {
checkCudaErrors(cudaMalloc(&maps, sizeof(myfloat_t) * RefMap.ntotRefMap * RefMap.refMapSize)); checkCudaErrors(cudaMalloc(&maps, sizeof(myfloat_t) * RefMap.ntotRefMap * RefMap.refMapSize));
checkCudaErrors(cudaMemcpy(maps, RefMap.maps, sizeof(myfloat_t) * RefMap.ntotRefMap * RefMap.refMapSize, cudaMemcpyHostToDevice));
if (GPUAlgo == 0 || GPUAlgo == 1)
{
pRefMap_device_Mod = (bioem_RefMap_Mod*) gpumap;
bioem_RefMap_Mod* RefMapGPU = new bioem_RefMap_Mod;
RefMapGPU->init(RefMap);
checkCudaErrors(cudaMemcpy(maps, RefMapGPU->maps, sizeof(myfloat_t) * RefMap.ntotRefMap * RefMap.refMapSize, cudaMemcpyHostToDevice));
delete RefMapGPU;
}
else
{
checkCudaErrors(cudaMemcpy(maps, RefMap.maps, sizeof(myfloat_t) * RefMap.ntotRefMap * RefMap.refMapSize, cudaMemcpyHostToDevice));
}
} }
checkCudaErrors(cudaMalloc(&sum, sizeof(myfloat_t) * RefMap.ntotRefMap)); checkCudaErrors(cudaMalloc(&sum, sizeof(myfloat_t) * RefMap.ntotRefMap));
checkCudaErrors(cudaMemcpy(sum, RefMap.sum_RefMap, sizeof(myfloat_t) * RefMap.ntotRefMap, cudaMemcpyHostToDevice)); checkCudaErrors(cudaMemcpy(sum, RefMap.sum_RefMap, sizeof(myfloat_t) * RefMap.ntotRefMap, cudaMemcpyHostToDevice));
...@@ -239,14 +251,6 @@ int bioem_cuda::deviceInit() ...@@ -239,14 +251,6 @@ int bioem_cuda::deviceInit()
checkCudaErrors(cudaMemcpy(pRefMapsFFT, RefMap.RefMapsFFT, RefMap.ntotRefMap * param.FFTMapSize * sizeof(mycomplex_t), cudaMemcpyHostToDevice)); checkCudaErrors(cudaMemcpy(pRefMapsFFT, RefMap.RefMapsFFT, RefMap.ntotRefMap * param.FFTMapSize * sizeof(mycomplex_t), cudaMemcpyHostToDevice));
} }
if (GPUAlgo == 0 || GPUAlgo == 1)
{
checkCudaErrors(cudaMalloc(&pRefMap_device_Mod, sizeof(bioem_RefMap_Mod)));
bioem_RefMap_Mod* RefMapGPU = new bioem_RefMap_Mod(RefMap);
cudaMemcpy(pRefMap_device_Mod, RefMapGPU, sizeof(bioem_RefMap), cudaMemcpyHostToDevice);
delete RefMapGPU;
}
deviceInitialized = 1; deviceInitialized = 1;
return(0); return(0);
} }
......
...@@ -37,10 +37,7 @@ typedef double myfloat_t; ...@@ -37,10 +37,7 @@ typedef double myfloat_t;
typedef myfloat_t mycomplex_t[2]; typedef myfloat_t mycomplex_t[2];
#define BIOEM_FLOAT_3_PHYSICAL_SIZE 3 //Possible set to 4 for GPU #define BIOEM_FLOAT_3_PHYSICAL_SIZE 3 //Possible set to 4 for GPU
#define BIOEM_MAP_SIZE_X 224
#define BIOEM_MAP_SIZE_Y 224
#define BIOEM_MODEL_SIZE 120000 #define BIOEM_MODEL_SIZE 120000
#define BIOEM_MAX_MAPS 8000
#define MAX_ORIENT 20000 #define MAX_ORIENT 20000
struct myfloat3_t struct myfloat3_t
......
...@@ -46,31 +46,22 @@ public: ...@@ -46,31 +46,22 @@ public:
__host__ __device__ inline myfloat_t* getmap(int map) {return(&maps[map * refMapSize]);} __host__ __device__ inline myfloat_t* getmap(int map) {return(&maps[map * refMapSize]);}
}; };
class bioem_RefMap_Mod class bioem_RefMap_Mod : public bioem_RefMap
{ {
public: public:
int ntotRefMap; __host__ __device__ inline myfloat_t get(int map, int x, int y) const {return(maps[(x * numPixels + y) * ntotRefMap + map]);}
myfloat_t Ref[BIOEM_MAP_SIZE_X][BIOEM_MAP_SIZE_Y][BIOEM_MAX_MAPS];
myfloat_t sum_RefMap[BIOEM_MAX_MAPS];
myfloat_t sumsquare_RefMap[BIOEM_MAX_MAPS];
__host__ __device__ inline myfloat_t get(int map, int x, int y) const {return(Ref[x][y][map]);}
bioem_RefMap_Mod() {ntotRefMap = 0;}
bioem_RefMap_Mod(const bioem_RefMap& map) void init(const bioem_RefMap& map)
{ {
ntotRefMap = map.ntotRefMap; maps = (myfloat_t*) malloc(map.refMapSize * map.ntotRefMap * sizeof(myfloat_t));
memcpy(sum_RefMap, map.sum_RefMap, sizeof(sum_RefMap));
memcpy(sumsquare_RefMap, map.sumsquare_RefMap, sizeof(sumsquare_RefMap));
#pragma omp parallel for #pragma omp parallel for
for (int i = 0; i < ntotRefMap; i++) for (int i = 0; i < map.ntotRefMap; i++)
{ {
for (int j = 0; j < BIOEM_MAP_SIZE_X; j++) for (int j = 0; j < map.numPixels; j++)
{ {
for (int k = 0; k < BIOEM_MAP_SIZE_Y; k++) for (int k = 0; k < map.numPixels; k++)
{ {
Ref[j][k][i] = map.get(i, j, k); maps[(j * map.numPixels + k) * map.ntotRefMap + i] = map.get(i, j, k);
} }
} }
} }
......
...@@ -41,7 +41,6 @@ public: ...@@ -41,7 +41,6 @@ public:
char logFile; char logFile;
// If to write Probabilities of Angles from Model // If to write Probabilities of Angles from Model
bool writeAngles; bool writeAngles;
// Pixel size && BIOEM_MAP_SIZE_X should be defined here too
//int NumberPixels; //in device class //int NumberPixels; //in device class
myfloat_t pixelSize; myfloat_t pixelSize;
// Grid Points in Euler angles, assuming uniform sampling d_alpha=d_gamma (in 2pi) & cos(beta)=-1,1 // Grid Points in Euler angles, assuming uniform sampling d_alpha=d_gamma (in 2pi) & cos(beta)=-1,1
......
...@@ -64,7 +64,6 @@ int bioem_RefMap::readRefMaps(bioem_param& param) ...@@ -64,7 +64,6 @@ int bioem_RefMap::readRefMaps(bioem_param& param)
{ {
allocsize = 64; allocsize = 64;
maps = (myfloat_t*) mallocchk(refMapSize * sizeof(myfloat_t) * allocsize); maps = (myfloat_t*) mallocchk(refMapSize * sizeof(myfloat_t) * allocsize);
} }
else if (nummap + 1 >= allocsize) else if (nummap + 1 >= allocsize)
{ {
...@@ -75,11 +74,6 @@ int bioem_RefMap::readRefMaps(bioem_param& param) ...@@ -75,11 +74,6 @@ int bioem_RefMap::readRefMaps(bioem_param& param)
{ {
cout << "..." << nummap << "\n"; cout << "..." << nummap << "\n";
} }
if (nummap == BIOEM_MAX_MAPS)
{
cout << "BIOEM_MAX_MAPS too small\n";
exit(1);
}
if(lasti != param.param_device.NumberPixels && lastj != param.param_device.NumberPixels && nummap > 0) if(lasti != param.param_device.NumberPixels && lastj != param.param_device.NumberPixels && nummap > 0)
{ {
cout << "PROBLEM INCONSISTENT NUMBER OF PIXELS IN MAPS AND INPUTFILE ( " << param.param_device.NumberPixels << ", i " << lasti << ", j " << lastj << ")" << "\n"; cout << "PROBLEM INCONSISTENT NUMBER OF PIXELS IN MAPS AND INPUTFILE ( " << param.param_device.NumberPixels << ", i " << lasti << ", j " << lastj << ")" << "\n";
...@@ -102,7 +96,7 @@ int bioem_RefMap::readRefMaps(bioem_param& param) ...@@ -102,7 +96,7 @@ int bioem_RefMap::readRefMaps(bioem_param& param)
strncpy (tmpVals, line + 16, 12); strncpy (tmpVals, line + 16, 12);
sscanf (tmpVals, "%f", &z); sscanf (tmpVals, "%f", &z);
//checking for Map limits //checking for Map limits
if(i > 0 && i - 1 < BIOEM_MAP_SIZE_X && j > 0 && j - 1 < BIOEM_MAP_SIZE_Y && nummap < BIOEM_MAX_MAPS) if(i > 0 && i - 1 < param.param_device.NumberPixels && j > 0 && j - 1 < param.param_device.NumberPixels)
{ {
maps[nummap * refMapSize + (i - 1) * numPixels + (j - 1)] = (myfloat_t) z; maps[nummap * refMapSize + (i - 1) * numPixels + (j - 1)] = (myfloat_t) z;
lasti = i; lasti = i;
......
...@@ -214,6 +214,8 @@ void bioem_param::releaseFFTPlans() ...@@ -214,6 +214,8 @@ void bioem_param::releaseFFTPlans()
{ {
myfftw_destroy_plan(fft_plan_c2c_forward); myfftw_destroy_plan(fft_plan_c2c_forward);
myfftw_destroy_plan(fft_plan_c2c_backward); myfftw_destroy_plan(fft_plan_c2c_backward);
myfftw_destroy_plan(fft_plan_r2c_forward);
myfftw_destroy_plan(fft_plan_c2r_backward);
} }
fft_plans_created = 0; fft_plans_created = 0;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment