diff --git a/bioem.cpp b/bioem.cpp index d504a80e80e5ac9458402971a6ad92a13e5c9d3c..c3a7c8f6410798e2356499822429d02093f6dc4b 100644 --- a/bioem.cpp +++ b/bioem.cpp @@ -374,7 +374,7 @@ int bioem::run() int bioem::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, const int startMap) { - #pragma omp parallel for +#pragma omp parallel for for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++) { compareRefMapShifted<-1>(iRefMap,iProjectionOut,iConv,conv_map, pProb, param.param_device, RefMap); @@ -384,7 +384,7 @@ int bioem::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_m int bioem::compareRefMaps2(int iOrient, int iConv, mycomplex_t* localConvFFT,myfloat_t sumC,myfloat_t sumsquareC) { - #pragma omp parallel for +#pragma omp parallel for for (int iRefMap = 0; iRefMap < RefMap.ntotRefMap; iRefMap ++) { @@ -402,8 +402,6 @@ int bioem::compareRefMaps2(int iOrient, int iConv, mycomplex_t* localConvFFT,myf crossCor[iRefMap].disy[n]=-99999; } -// Before:: compareRefMapShifted<-1>(iRefMap,iProjectionOut,iConv,conv_map, pProb, param.param_device, RefMap); - calculateCCFFT(iRefMap,iConv, localConvFFT, localCCT,lCC); myfftw_free(localCCT); @@ -423,8 +421,6 @@ int bioem::compareRefMaps2(int iOrient, int iConv, mycomplex_t* localConvFFT,myf /////////////NEW ROUTINE //////////////// int bioem::calculateCCFFT(int iRefMap, int iConv, mycomplex_t* localConvFFT,mycomplex_t* localCCT,mycomplex_t* lCC) { - myfftw_plan plan; - for(int i=0; i < param.param_device.NumberPixels ; i++ ) { for(int j=0; j < param.param_device.NumberPixels ; j++ ) @@ -434,11 +430,7 @@ int bioem::calculateCCFFT(int iRefMap, int iConv, mycomplex_t* localConvFFT,myco } } - #pragma omp critical - { - plan = myfftw_plan_dft_2d(param.param_device.NumberPixels,param.param_device.NumberPixels,localCCT,lCC,FFTW_BACKWARD,FFTW_ESTIMATE); - myfftw_execute(plan); - } + myfftw_execute_dft(param.fft_plan_c2c_backward,localCCT,lCC); // Storing CORRELATIONS FOR CORRESPONDING DISPLACEMENTS & Normalizing after Backward FFT int n=0; @@ -476,10 +468,7 @@ int bioem::calculateCCFFT(int iRefMap, int iConv, mycomplex_t* localConvFFT,myco n++; } } -//#pragma omp critical - { - myfftw_destroy_plan(plan); - } + /* Controll but slows down the parallelisim if(n> MAX_DISPLACE && n> param.param_device.NtotDist) { @@ -554,7 +543,6 @@ int bioem::createProjection(int iMap,mycomplex_t* mapFFT) myfloat3_t RotatedPointsModel[Model.nPointsModel]; myfloat_t rotmat[3][3]; myfloat_t alpha, gam,beta; - myfftw_plan plan; mycomplex_t* localproj; localproj= (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) *param.param_device.NumberPixels*param.param_device.NumberPixels); @@ -630,14 +618,7 @@ int bioem::createProjection(int iMap,mycomplex_t* mapFFT) /***** Converting projection to Fourier Space for Convolution later with kernel****/ /********** Omp Critical is necessary with FFTW*******/ - //#pragma omp critical - { - plan = myfftw_plan_dft_2d(param.param_device.NumberPixels,param.param_device.NumberPixels,localproj,mapFFT,FFTW_FORWARD,FFTW_ESTIMATE); - - myfftw_execute(plan); - myfftw_destroy_plan(plan); - myfftw_free(localproj); - } + myfftw_execute_dft(param.fft_plan_c2c_forward,localproj,mapFFT); return(0); } @@ -650,7 +631,6 @@ int bioem::createConvolutedProjectionMap(int iMap,int iConv,mycomplex_t* lproj,b *************** and Backtransforming it to real Space ******************************/ /**************************************************************************************/ - myfftw_plan plan; mycomplex_t* localconvFFT; localconvFFT= (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t)*param.param_device.NumberPixels*param.param_device.NumberPixels); @@ -670,8 +650,7 @@ int bioem::createConvolutedProjectionMap(int iMap,int iConv,mycomplex_t* lproj,b /**** Bringing convoluted Map to real Space ****/ //#pragma omp critical { - plan = myfftw_plan_dft_2d(param.param_device.NumberPixels,param.param_device.NumberPixels,localmultFFT,localconvFFT,FFTW_BACKWARD,FFTW_ESTIMATE); - myfftw_execute(plan); + myfftw_execute_dft(param.fft_plan_c2c_backward,localmultFFT,localconvFFT); } @@ -701,11 +680,7 @@ int bioem::createConvolutedProjectionMap(int iMap,int iConv,mycomplex_t* lproj,b sumsquareC=sumsquareC/pow(float(param.param_device.NumberPixels),4); /**** Freeing fftw_complex created (dont know if omp critical is necessary) ****/ - //#pragma omp critical - { - myfftw_destroy_plan(plan); - myfftw_free(localconvFFT); - } + myfftw_free(localconvFFT); return(0); } diff --git a/include/bioem_cuda_internal.h b/include/bioem_cuda_internal.h index d184c75ef88fecf645501a1d4291c1f2c78b19da..38b8e6d08566363fe80f0c841b23195b8faf0766 100644 --- a/include/bioem_cuda_internal.h +++ b/include/bioem_cuda_internal.h @@ -3,6 +3,11 @@ #include <cuda.h> +//Hack to make nvcc compiler accept fftw.h, float128 is not used anyway +#define __float128 double +#include <fftw3.h> +#undef __float128 + #include "bioem_cuda.h" class bioem_cuda : public bioem diff --git a/include/defs.h b/include/defs.h index 5f0b1ac17ad2f09859b779306e3516ec6972a7f7..f0ae2eefc2580cd3a424fc76e94d26e87c819ce5 100644 --- a/include/defs.h +++ b/include/defs.h @@ -7,6 +7,7 @@ typedef float myfloat_t; #define myfftw_free fftwf_free #define myfftw_destroy_plan fftwf_destroy_plan #define myfftw_execute fftwf_execute +#define myfftw_execute_dft fftwf_execute_dft #define myfftw_plan_dft_2d fftwf_plan_dft_2d #define myfftw_plan fftwf_plan #else @@ -15,6 +16,7 @@ typedef double myfloat_t; #define myfftw_free fftw_free #define myfftw_destroy_plan fftw_destroy_plan #define myfftw_execute fftw_execute +#define myfftw_execute_dft fftw_execute_dft #define myfftw_plan_dft_2d fftw_plan_dft_2d #define myfftw_plan fftw_plan #endif diff --git a/include/param.h b/include/param.h index 4d1eea0c07fb5998e101657d8a765a3a61d082fd..836b0238c97372c0b452f68a67e3aaf25dd47f92 100644 --- a/include/param.h +++ b/include/param.h @@ -5,6 +5,7 @@ #include "map.h" #include <complex> #include <math.h> +#include <fftw3.h> class bioem_map_forFFT; @@ -68,6 +69,12 @@ public: int nTotGridAngles; int nTotCTFs; //myfloat_t Ntotpi;//in device class + + int fft_plans_created; + myfftw_plan fft_plan_c2c_forward, fft_plan_c2c_backward, fft_plan_r2c_forward, fft_plan_c2r_backward; + +private: + void releaseFFTPlans(); }; #endif diff --git a/map.cpp b/map.cpp index b282dda62d70c82a34627b0d5fefb9a176306ee7..959843be147dc97dc527f875f3ff3233170bdae0 100644 --- a/map.cpp +++ b/map.cpp @@ -134,7 +134,6 @@ int bioem_RefMap::PreCalculateMapsFFT(bioem_param& param) /********** Routine that pre-calculates Kernels for Convolution **********************/ /************************************************************************************/ - myfftw_plan plan; mycomplex_t* localMap; localMap= (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) *param.param_device.NumberPixels*param.param_device.NumberPixels); @@ -155,9 +154,7 @@ int bioem_RefMap::PreCalculateMapsFFT(bioem_param& param) } //Calling FFT_Forward - plan = myfftw_plan_dft_2d(param.param_device.NumberPixels,param.param_device.NumberPixels,localMap,localout,FFTW_FORWARD,FFTW_ESTIMATE); - myfftw_execute(plan); - myfftw_destroy_plan(plan); + myfftw_execute_dft(param.fft_plan_c2c_forward,localMap,localout); // Normalizing and saving the Reference CTFs for(int i=0; i < param.param_device.NumberPixels ; i++ ) diff --git a/param.cpp b/param.cpp index 9b77152173d4c82f277c5ea51fb6abbfabcf27f4..eedd06fff265d8471026c54e5022ac192777a4a3 100644 --- a/param.cpp +++ b/param.cpp @@ -27,6 +27,8 @@ bioem_param::bioem_param() /****center displacement paramters Equal in both directions***/ param_device.maxDisplaceCenter = 0; numberGridPointsDisplaceCenter = 0; + + fft_plans_created = 0; } int bioem_param::readParameters() @@ -174,9 +176,40 @@ int bioem_param::readParameters() } input.close(); cout << " +++++++++++++++++++++++++++++++++++++++++ \n"; + + cout << "Preparing FFTs\n"; + releaseFFTPlans(); + mycomplex_t *tmp_map, *tmp_map2; + tmp_map = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param_device.NumberPixels * param_device.NumberPixels); + tmp_map2 = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param_device.NumberPixels * param_device.NumberPixels); + + fft_plan_c2c_forward = myfftw_plan_dft_2d(param_device.NumberPixels, param_device.NumberPixels, tmp_map, tmp_map2, FFTW_FORWARD, FFTW_MEASURE); + fft_plan_c2c_backward = myfftw_plan_dft_2d(param_device.NumberPixels, param_device.NumberPixels, tmp_map, tmp_map2, FFTW_BACKWARD, FFTW_MEASURE); + + if (fft_plan_c2c_forward == 0 || fft_plan_c2c_backward == 0) + { + cout << "Error planing FFTs\n"; + exit(1); + } + + myfftw_free(tmp_map); + myfftw_free(tmp_map2); + fft_plans_created = 1; + cout << " +++++++++++++++++++++++++++++++++++++++++ \n"; + return(0); } +void bioem_param::releaseFFTPlans() +{ + if (fft_plans_created) + { + myfftw_destroy_plan(fft_plan_c2c_forward); + myfftw_destroy_plan(fft_plan_c2c_backward); + } + fft_plans_created = 0; +} + int bioem_param::CalculateGridsParam() //TO DO FOR QUATERNIONS { /**************************************************************************************/ @@ -231,7 +264,6 @@ int bioem_param::CalculateRefCTF() /************************************************************************************/ myfloat_t amp,env,phase,ctf,radsq; - myfftw_plan plan; mycomplex_t* localCTF; int nctfmax=int(float(param_device.NumberPixels)/2.0); int n=0; @@ -272,9 +304,7 @@ int bioem_param::CalculateRefCTF() mycomplex_t* localout; localout= (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) *param_device.NumberPixels*param_device.NumberPixels); //Calling FFT_Forward - plan = myfftw_plan_dft_2d(param_device.NumberPixels,param_device.NumberPixels,localCTF,localout,FFTW_FORWARD,FFTW_ESTIMATE); - myfftw_execute(plan); - myfftw_destroy_plan(plan); + myfftw_execute_dft(fft_plan_c2c_forward,localCTF,localout); // Normalizing and saving the Reference CTFs for(int i=0; i < param_device.NumberPixels ; i++ ) @@ -307,6 +337,7 @@ int bioem_param::CalculateRefCTF() bioem_param::~bioem_param() { + releaseFFTPlans(); param_device.NumberPixels=0; angleGridPointsAlpha = 0; angleGridPointsBeta = 0; @@ -315,5 +346,4 @@ bioem_param::~bioem_param() numberGridPointsCTF_phase = 0; param_device.maxDisplaceCenter = 0; numberGridPointsDisplaceCenter = 0; - }