From fbf4c844c857267810d96476b2b949a72d3111b6 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 19 Apr 2014 17:58:27 +0200 Subject: [PATCH] finalze version with dynamic allocation of map memory, MAP_SIZEX/Y and MAX_MAPS no longer needed --- bioem.cpp | 7 ++++++- bioem_cuda.cu | 34 +++++++++++++++++++--------------- include/defs.h | 3 --- include/map.h | 25 ++++++++----------------- include/param.h | 1 - map.cpp | 8 +------- param.cpp | 2 ++ 7 files changed, 36 insertions(+), 44 deletions(-) diff --git a/bioem.cpp b/bioem.cpp index 1f847a2..0776969 100644 --- a/bioem.cpp +++ b/bioem.cpp @@ -199,7 +199,12 @@ int bioem::precalculate() param.CalculateRefCTF(); // Precalculating Maps in Fourier space - RefMap.PreCalculateMapsFFT(param); + if (FFTAlgo) + { + RefMap.PreCalculateMapsFFT(param); + free(RefMap.maps); + RefMap.maps = NULL; + } return(0); } diff --git a/bioem_cuda.cu b/bioem_cuda.cu index 26b6c6a..44cc666 100644 --- a/bioem_cuda.cu +++ b/bioem_cuda.cu @@ -32,21 +32,21 @@ bioem_cuda::~bioem_cuda() deviceExit(); } -__global__ void compareRefMap_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod* RefMap, const int cent_x, const int cent_y, const int maxRef) +__global__ void compareRefMap_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod RefMap, const int cent_x, const int cent_y, const int maxRef) { const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX; if (iRefMap < maxRef) { - compareRefMap<0>(iRefMap, iOrient, iConv, pMap, pProb, param, *RefMap, cent_x, cent_y); + compareRefMap<0>(iRefMap, iOrient, iConv, pMap, pProb, param, RefMap, cent_x, cent_y); } } -__global__ void compareRefMapShifted_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod* RefMap, const int maxRef) +__global__ void compareRefMapShifted_kernel(const int iOrient, const int iConv, const myfloat_t* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod RefMap, const int maxRef) { const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX; if (iRefMap < maxRef) { - compareRefMapShifted<1>(iRefMap, iOrient, iConv, pMap, pProb, param, *RefMap); + compareRefMapShifted<1>(iRefMap, iOrient, iConv, pMap, pProb, param, RefMap); } } @@ -172,13 +172,13 @@ int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const myfloat_t* c { for (int cent_y = -param.param_device.maxDisplaceCenter; cent_y <= param.param_device.maxDisplaceCenter; cent_y = cent_y + param.param_device.GridSpaceCenter) { - compareRefMap_kernel<<>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device_Mod, cent_x, cent_y, maxRef); + compareRefMap_kernel<<>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *pRefMap_device_Mod, cent_x, cent_y, maxRef); } } } else if (GPUAlgo == 0) //All shifts in one kernel { - compareRefMapShifted_kernel<<>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device_Mod, maxRef); + compareRefMapShifted_kernel<<>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, *pRefMap_device_Mod, maxRef); } else { @@ -212,7 +212,19 @@ int bioem_cuda::deviceInit() if (FFTAlgo == 0) { checkCudaErrors(cudaMalloc(&maps, sizeof(myfloat_t) * RefMap.ntotRefMap * RefMap.refMapSize)); - checkCudaErrors(cudaMemcpy(maps, RefMap.maps, sizeof(myfloat_t) * RefMap.ntotRefMap * RefMap.refMapSize, cudaMemcpyHostToDevice)); + + if (GPUAlgo == 0 || GPUAlgo == 1) + { + pRefMap_device_Mod = (bioem_RefMap_Mod*) gpumap; + bioem_RefMap_Mod* RefMapGPU = new bioem_RefMap_Mod; + RefMapGPU->init(RefMap); + checkCudaErrors(cudaMemcpy(maps, RefMapGPU->maps, sizeof(myfloat_t) * RefMap.ntotRefMap * RefMap.refMapSize, cudaMemcpyHostToDevice)); + delete RefMapGPU; + } + else + { + checkCudaErrors(cudaMemcpy(maps, RefMap.maps, sizeof(myfloat_t) * RefMap.ntotRefMap * RefMap.refMapSize, cudaMemcpyHostToDevice)); + } } checkCudaErrors(cudaMalloc(&sum, sizeof(myfloat_t) * RefMap.ntotRefMap)); checkCudaErrors(cudaMemcpy(sum, RefMap.sum_RefMap, sizeof(myfloat_t) * RefMap.ntotRefMap, cudaMemcpyHostToDevice)); @@ -239,14 +251,6 @@ int bioem_cuda::deviceInit() checkCudaErrors(cudaMemcpy(pRefMapsFFT, RefMap.RefMapsFFT, RefMap.ntotRefMap * param.FFTMapSize * sizeof(mycomplex_t), cudaMemcpyHostToDevice)); } - if (GPUAlgo == 0 || GPUAlgo == 1) - { - checkCudaErrors(cudaMalloc(&pRefMap_device_Mod, sizeof(bioem_RefMap_Mod))); - bioem_RefMap_Mod* RefMapGPU = new bioem_RefMap_Mod(RefMap); - cudaMemcpy(pRefMap_device_Mod, RefMapGPU, sizeof(bioem_RefMap), cudaMemcpyHostToDevice); - delete RefMapGPU; - } - deviceInitialized = 1; return(0); } diff --git a/include/defs.h b/include/defs.h index 91eb353..9be83f6 100644 --- a/include/defs.h +++ b/include/defs.h @@ -37,10 +37,7 @@ typedef double myfloat_t; typedef myfloat_t mycomplex_t[2]; #define BIOEM_FLOAT_3_PHYSICAL_SIZE 3 //Possible set to 4 for GPU -#define BIOEM_MAP_SIZE_X 224 -#define BIOEM_MAP_SIZE_Y 224 #define BIOEM_MODEL_SIZE 120000 -#define BIOEM_MAX_MAPS 8000 #define MAX_ORIENT 20000 struct myfloat3_t diff --git a/include/map.h b/include/map.h index 7c30479..25b0da2 100644 --- a/include/map.h +++ b/include/map.h @@ -46,31 +46,22 @@ public: __host__ __device__ inline myfloat_t* getmap(int map) {return(&maps[map * refMapSize]);} }; -class bioem_RefMap_Mod +class bioem_RefMap_Mod : public bioem_RefMap { public: - int ntotRefMap; - myfloat_t Ref[BIOEM_MAP_SIZE_X][BIOEM_MAP_SIZE_Y][BIOEM_MAX_MAPS]; - myfloat_t sum_RefMap[BIOEM_MAX_MAPS]; - myfloat_t sumsquare_RefMap[BIOEM_MAX_MAPS]; - - __host__ __device__ inline myfloat_t get(int map, int x, int y) const {return(Ref[x][y][map]);} - - bioem_RefMap_Mod() {ntotRefMap = 0;} + __host__ __device__ inline myfloat_t get(int map, int x, int y) const {return(maps[(x * numPixels + y) * ntotRefMap + map]);} - bioem_RefMap_Mod(const bioem_RefMap& map) + void init(const bioem_RefMap& map) { - ntotRefMap = map.ntotRefMap; - memcpy(sum_RefMap, map.sum_RefMap, sizeof(sum_RefMap)); - memcpy(sumsquare_RefMap, map.sumsquare_RefMap, sizeof(sumsquare_RefMap)); + maps = (myfloat_t*) malloc(map.refMapSize * map.ntotRefMap * sizeof(myfloat_t)); #pragma omp parallel for - for (int i = 0; i < ntotRefMap; i++) + for (int i = 0; i < map.ntotRefMap; i++) { - for (int j = 0; j < BIOEM_MAP_SIZE_X; j++) + for (int j = 0; j < map.numPixels; j++) { - for (int k = 0; k < BIOEM_MAP_SIZE_Y; k++) + for (int k = 0; k < map.numPixels; k++) { - Ref[j][k][i] = map.get(i, j, k); + maps[(j * map.numPixels + k) * map.ntotRefMap + i] = map.get(i, j, k); } } } diff --git a/include/param.h b/include/param.h index 33564ec..733b9d8 100644 --- a/include/param.h +++ b/include/param.h @@ -41,7 +41,6 @@ public: char logFile; // If to write Probabilities of Angles from Model bool writeAngles; -// Pixel size && BIOEM_MAP_SIZE_X should be defined here too //int NumberPixels; //in device class myfloat_t pixelSize; // Grid Points in Euler angles, assuming uniform sampling d_alpha=d_gamma (in 2pi) & cos(beta)=-1,1 diff --git a/map.cpp b/map.cpp index e2e1c70..7848d21 100644 --- a/map.cpp +++ b/map.cpp @@ -64,7 +64,6 @@ int bioem_RefMap::readRefMaps(bioem_param& param) { allocsize = 64; maps = (myfloat_t*) mallocchk(refMapSize * sizeof(myfloat_t) * allocsize); - } else if (nummap + 1 >= allocsize) { @@ -75,11 +74,6 @@ int bioem_RefMap::readRefMaps(bioem_param& param) { cout << "..." << nummap << "\n"; } - if (nummap == BIOEM_MAX_MAPS) - { - cout << "BIOEM_MAX_MAPS too small\n"; - exit(1); - } if(lasti != param.param_device.NumberPixels && lastj != param.param_device.NumberPixels && nummap > 0) { cout << "PROBLEM INCONSISTENT NUMBER OF PIXELS IN MAPS AND INPUTFILE ( " << param.param_device.NumberPixels << ", i " << lasti << ", j " << lastj << ")" << "\n"; @@ -102,7 +96,7 @@ int bioem_RefMap::readRefMaps(bioem_param& param) strncpy (tmpVals, line + 16, 12); sscanf (tmpVals, "%f", &z); //checking for Map limits - if(i > 0 && i - 1 < BIOEM_MAP_SIZE_X && j > 0 && j - 1 < BIOEM_MAP_SIZE_Y && nummap < BIOEM_MAX_MAPS) + if(i > 0 && i - 1 < param.param_device.NumberPixels && j > 0 && j - 1 < param.param_device.NumberPixels) { maps[nummap * refMapSize + (i - 1) * numPixels + (j - 1)] = (myfloat_t) z; lasti = i; diff --git a/param.cpp b/param.cpp index df5c813..f5323fe 100644 --- a/param.cpp +++ b/param.cpp @@ -214,6 +214,8 @@ void bioem_param::releaseFFTPlans() { myfftw_destroy_plan(fft_plan_c2c_forward); myfftw_destroy_plan(fft_plan_c2c_backward); + myfftw_destroy_plan(fft_plan_r2c_forward); + myfftw_destroy_plan(fft_plan_c2r_backward); } fft_plans_created = 0; } -- GitLab