Planned maintenance on Wednesday, 2021-01-20, 17:00-18:00. Expect some interruptions during that time

Commit fbf4c844 authored by David Rohr's avatar David Rohr

finalze version with dynamic allocation of map memory, MAP_SIZEX/Y and MAX_MAPS no longer needed

parent 68198fcf
......@@ -199,7 +199,12 @@ int bioem::precalculate()
param.CalculateRefCTF();
// Precalculating Maps in Fourier space
RefMap.PreCalculateMapsFFT(param);
if (FFTAlgo)
{
RefMap.PreCalculateMapsFFT(param);
free(RefMap.maps);
RefMap.maps = NULL;
}
return(0);
}
......
......@@ -32,21 +32,21 @@ bioem_cuda::~bioem_cuda()
deviceExit();
}
__global__ void compareRefMap_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod* RefMap, const int cent_x, const int cent_y, const int maxRef)
__global__ void compareRefMap_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod RefMap, const int cent_x, const int cent_y, const int maxRef)
{
const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX;
if (iRefMap < maxRef)
{
compareRefMap<0>(iRefMap, iOrient, iConv, pMap, pProb, param, *RefMap, cent_x, cent_y);
compareRefMap<0>(iRefMap, iOrient, iConv, pMap, pProb, param, RefMap, cent_x, cent_y);
}
}
__global__ void compareRefMapShifted_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod* RefMap, const int maxRef)
__global__ void compareRefMapShifted_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod RefMap, const int maxRef)
{
const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX;
if (iRefMap < maxRef)
{
compareRefMapShifted<1>(iRefMap, iOrient, iConv, pMap, pProb, param, *RefMap);
compareRefMapShifted<1>(iRefMap, iOrient, iConv, pMap, pProb, param, RefMap);
}
}
......@@ -172,13 +172,13 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* c
{
for (int cent_y = -param.param_device.maxDisplaceCenter; cent_y <= param.param_device.maxDisplaceCenter; cent_y = cent_y + param.param_device.GridSpaceCenter)
{
compareRefMap_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device_Mod, cent_x, cent_y, maxRef);
compareRefMap_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *pRefMap_device_Mod, cent_x, cent_y, maxRef);
}
}
}
else if (GPUAlgo == 0) //All shifts in one kernel
{
compareRefMapShifted_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device_Mod, maxRef);
compareRefMapShifted_kernel<<<divup(maxRef, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *pRefMap_device_Mod, maxRef);
}
else
{
......@@ -212,7 +212,19 @@ int bioem_cuda::deviceInit()
if (FFTAlgo == 0)
{
checkCudaErrors(cudaMalloc(&maps, sizeof(myfloat_t) * RefMap.ntotRefMap * RefMap.refMapSize));
checkCudaErrors(cudaMemcpy(maps, RefMap.maps, sizeof(myfloat_t) * RefMap.ntotRefMap * RefMap.refMapSize, cudaMemcpyHostToDevice));
if (GPUAlgo == 0 || GPUAlgo == 1)
{
pRefMap_device_Mod = (bioem_RefMap_Mod*) gpumap;
bioem_RefMap_Mod* RefMapGPU = new bioem_RefMap_Mod;
RefMapGPU->init(RefMap);
checkCudaErrors(cudaMemcpy(maps, RefMapGPU->maps, sizeof(myfloat_t) * RefMap.ntotRefMap * RefMap.refMapSize, cudaMemcpyHostToDevice));
delete RefMapGPU;
}
else
{
checkCudaErrors(cudaMemcpy(maps, RefMap.maps, sizeof(myfloat_t) * RefMap.ntotRefMap * RefMap.refMapSize, cudaMemcpyHostToDevice));
}
}
checkCudaErrors(cudaMalloc(&sum, sizeof(myfloat_t) * RefMap.ntotRefMap));
checkCudaErrors(cudaMemcpy(sum, RefMap.sum_RefMap, sizeof(myfloat_t) * RefMap.ntotRefMap, cudaMemcpyHostToDevice));
......@@ -239,14 +251,6 @@ int bioem_cuda::deviceInit()
checkCudaErrors(cudaMemcpy(pRefMapsFFT, RefMap.RefMapsFFT, RefMap.ntotRefMap * param.FFTMapSize * sizeof(mycomplex_t), cudaMemcpyHostToDevice));
}
if (GPUAlgo == 0 || GPUAlgo == 1)
{
checkCudaErrors(cudaMalloc(&pRefMap_device_Mod, sizeof(bioem_RefMap_Mod)));
bioem_RefMap_Mod* RefMapGPU = new bioem_RefMap_Mod(RefMap);
cudaMemcpy(pRefMap_device_Mod, RefMapGPU, sizeof(bioem_RefMap), cudaMemcpyHostToDevice);
delete RefMapGPU;
}
deviceInitialized = 1;
return(0);
}
......
......@@ -37,10 +37,7 @@ typedef double myfloat_t;
typedef myfloat_t mycomplex_t[2];
#define BIOEM_FLOAT_3_PHYSICAL_SIZE 3 //Possible set to 4 for GPU
#define BIOEM_MAP_SIZE_X 224
#define BIOEM_MAP_SIZE_Y 224
#define BIOEM_MODEL_SIZE 120000
#define BIOEM_MAX_MAPS 8000
#define MAX_ORIENT 20000
struct myfloat3_t
......
......@@ -46,31 +46,22 @@ public:
__host__ __device__ inline myfloat_t* getmap(int map) {return(&maps[map * refMapSize]);}
};
class bioem_RefMap_Mod
class bioem_RefMap_Mod : public bioem_RefMap
{
public:
int ntotRefMap;
myfloat_t Ref[BIOEM_MAP_SIZE_X][BIOEM_MAP_SIZE_Y][BIOEM_MAX_MAPS];
myfloat_t sum_RefMap[BIOEM_MAX_MAPS];
myfloat_t sumsquare_RefMap[BIOEM_MAX_MAPS];
__host__ __device__ inline myfloat_t get(int map, int x, int y) const {return(Ref[x][y][map]);}
bioem_RefMap_Mod() {ntotRefMap = 0;}
__host__ __device__ inline myfloat_t get(int map, int x, int y) const {return(maps[(x * numPixels + y) * ntotRefMap + map]);}
bioem_RefMap_Mod(const bioem_RefMap& map)
void init(const bioem_RefMap& map)
{
ntotRefMap = map.ntotRefMap;
memcpy(sum_RefMap, map.sum_RefMap, sizeof(sum_RefMap));
memcpy(sumsquare_RefMap, map.sumsquare_RefMap, sizeof(sumsquare_RefMap));
maps = (myfloat_t*) malloc(map.refMapSize * map.ntotRefMap * sizeof(myfloat_t));
#pragma omp parallel for
for (int i = 0; i < ntotRefMap; i++)
for (int i = 0; i < map.ntotRefMap; i++)
{
for (int j = 0; j < BIOEM_MAP_SIZE_X; j++)
for (int j = 0; j < map.numPixels; j++)
{
for (int k = 0; k < BIOEM_MAP_SIZE_Y; k++)
for (int k = 0; k < map.numPixels; k++)
{
Ref[j][k][i] = map.get(i, j, k);
maps[(j * map.numPixels + k) * map.ntotRefMap + i] = map.get(i, j, k);
}
}
}
......
......@@ -41,7 +41,6 @@ public:
char logFile;
// If to write Probabilities of Angles from Model
bool writeAngles;
// Pixel size && BIOEM_MAP_SIZE_X should be defined here too
//int NumberPixels; //in device class
myfloat_t pixelSize;
// Grid Points in Euler angles, assuming uniform sampling d_alpha=d_gamma (in 2pi) & cos(beta)=-1,1
......
......@@ -64,7 +64,6 @@ int bioem_RefMap::readRefMaps(bioem_param& param)
{
allocsize = 64;
maps = (myfloat_t*) mallocchk(refMapSize * sizeof(myfloat_t) * allocsize);
}
else if (nummap + 1 >= allocsize)
{
......@@ -75,11 +74,6 @@ int bioem_RefMap::readRefMaps(bioem_param& param)
{
cout << "..." << nummap << "\n";
}
if (nummap == BIOEM_MAX_MAPS)
{
cout << "BIOEM_MAX_MAPS too small\n";
exit(1);
}
if(lasti != param.param_device.NumberPixels && lastj != param.param_device.NumberPixels && nummap > 0)
{
cout << "PROBLEM INCONSISTENT NUMBER OF PIXELS IN MAPS AND INPUTFILE ( " << param.param_device.NumberPixels << ", i " << lasti << ", j " << lastj << ")" << "\n";
......@@ -102,7 +96,7 @@ int bioem_RefMap::readRefMaps(bioem_param& param)
strncpy (tmpVals, line + 16, 12);
sscanf (tmpVals, "%f", &z);
//checking for Map limits
if(i > 0 && i - 1 < BIOEM_MAP_SIZE_X && j > 0 && j - 1 < BIOEM_MAP_SIZE_Y && nummap < BIOEM_MAX_MAPS)
if(i > 0 && i - 1 < param.param_device.NumberPixels && j > 0 && j - 1 < param.param_device.NumberPixels)
{
maps[nummap * refMapSize + (i - 1) * numPixels + (j - 1)] = (myfloat_t) z;
lasti = i;
......
......@@ -214,6 +214,8 @@ void bioem_param::releaseFFTPlans()
{
myfftw_destroy_plan(fft_plan_c2c_forward);
myfftw_destroy_plan(fft_plan_c2c_backward);
myfftw_destroy_plan(fft_plan_r2c_forward);
myfftw_destroy_plan(fft_plan_c2r_backward);
}
fft_plans_created = 0;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment