Commit a33835d9 authored by David Rohr's avatar David Rohr
Browse files

Use a single plan for FFT, makes replaning obsolete, use FFTW_MEASURE instead...

Use a single plan for FFT, makes replaning obsolete, use FFTW_MEASURE instead of FFTW_ESTIMATE to obtain better plan
parent 378fa3f0
...@@ -374,7 +374,7 @@ int bioem::run() ...@@ -374,7 +374,7 @@ int bioem::run()
int bioem::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, const int startMap) int bioem::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, const int startMap)
{ {
#pragma omp parallel for #pragma omp parallel for
for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++) for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++)
{ {
compareRefMapShifted<-1>(iRefMap,iProjectionOut,iConv,conv_map, pProb, param.param_device, RefMap); compareRefMapShifted<-1>(iRefMap,iProjectionOut,iConv,conv_map, pProb, param.param_device, RefMap);
...@@ -384,7 +384,7 @@ int bioem::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_m ...@@ -384,7 +384,7 @@ int bioem::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_m
int bioem::compareRefMaps2(int iOrient, int iConv, mycomplex_t* localConvFFT,myfloat_t sumC,myfloat_t sumsquareC) int bioem::compareRefMaps2(int iOrient, int iConv, mycomplex_t* localConvFFT,myfloat_t sumC,myfloat_t sumsquareC)
{ {
#pragma omp parallel for #pragma omp parallel for
for (int iRefMap = 0; iRefMap < RefMap.ntotRefMap; iRefMap ++) for (int iRefMap = 0; iRefMap < RefMap.ntotRefMap; iRefMap ++)
{ {
...@@ -402,8 +402,6 @@ int bioem::compareRefMaps2(int iOrient, int iConv, mycomplex_t* localConvFFT,myf ...@@ -402,8 +402,6 @@ int bioem::compareRefMaps2(int iOrient, int iConv, mycomplex_t* localConvFFT,myf
crossCor[iRefMap].disy[n]=-99999; crossCor[iRefMap].disy[n]=-99999;
} }
// Before:: compareRefMapShifted<-1>(iRefMap,iProjectionOut,iConv,conv_map, pProb, param.param_device, RefMap);
calculateCCFFT(iRefMap,iConv, localConvFFT, localCCT,lCC); calculateCCFFT(iRefMap,iConv, localConvFFT, localCCT,lCC);
myfftw_free(localCCT); myfftw_free(localCCT);
...@@ -423,8 +421,6 @@ int bioem::compareRefMaps2(int iOrient, int iConv, mycomplex_t* localConvFFT,myf ...@@ -423,8 +421,6 @@ int bioem::compareRefMaps2(int iOrient, int iConv, mycomplex_t* localConvFFT,myf
/////////////NEW ROUTINE //////////////// /////////////NEW ROUTINE ////////////////
int bioem::calculateCCFFT(int iRefMap, int iConv, mycomplex_t* localConvFFT,mycomplex_t* localCCT,mycomplex_t* lCC) int bioem::calculateCCFFT(int iRefMap, int iConv, mycomplex_t* localConvFFT,mycomplex_t* localCCT,mycomplex_t* lCC)
{ {
myfftw_plan plan;
for(int i=0; i < param.param_device.NumberPixels ; i++ ) for(int i=0; i < param.param_device.NumberPixels ; i++ )
{ {
for(int j=0; j < param.param_device.NumberPixels ; j++ ) for(int j=0; j < param.param_device.NumberPixels ; j++ )
...@@ -434,11 +430,7 @@ int bioem::calculateCCFFT(int iRefMap, int iConv, mycomplex_t* localConvFFT,myco ...@@ -434,11 +430,7 @@ int bioem::calculateCCFFT(int iRefMap, int iConv, mycomplex_t* localConvFFT,myco
} }
} }
#pragma omp critical myfftw_execute_dft(param.fft_plan_c2c_backward,localCCT,lCC);
{
plan = myfftw_plan_dft_2d(param.param_device.NumberPixels,param.param_device.NumberPixels,localCCT,lCC,FFTW_BACKWARD,FFTW_ESTIMATE);
myfftw_execute(plan);
}
// Storing CORRELATIONS FOR CORRESPONDING DISPLACEMENTS & Normalizing after Backward FFT // Storing CORRELATIONS FOR CORRESPONDING DISPLACEMENTS & Normalizing after Backward FFT
int n=0; int n=0;
...@@ -476,10 +468,7 @@ int bioem::calculateCCFFT(int iRefMap, int iConv, mycomplex_t* localConvFFT,myco ...@@ -476,10 +468,7 @@ int bioem::calculateCCFFT(int iRefMap, int iConv, mycomplex_t* localConvFFT,myco
n++; n++;
} }
} }
//#pragma omp critical
{
myfftw_destroy_plan(plan);
}
/* Controll but slows down the parallelisim /* Controll but slows down the parallelisim
if(n> MAX_DISPLACE && n> param.param_device.NtotDist) if(n> MAX_DISPLACE && n> param.param_device.NtotDist)
{ {
...@@ -554,7 +543,6 @@ int bioem::createProjection(int iMap,mycomplex_t* mapFFT) ...@@ -554,7 +543,6 @@ int bioem::createProjection(int iMap,mycomplex_t* mapFFT)
myfloat3_t RotatedPointsModel[Model.nPointsModel]; myfloat3_t RotatedPointsModel[Model.nPointsModel];
myfloat_t rotmat[3][3]; myfloat_t rotmat[3][3];
myfloat_t alpha, gam,beta; myfloat_t alpha, gam,beta;
myfftw_plan plan;
mycomplex_t* localproj; mycomplex_t* localproj;
localproj= (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) *param.param_device.NumberPixels*param.param_device.NumberPixels); localproj= (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) *param.param_device.NumberPixels*param.param_device.NumberPixels);
...@@ -630,14 +618,7 @@ int bioem::createProjection(int iMap,mycomplex_t* mapFFT) ...@@ -630,14 +618,7 @@ int bioem::createProjection(int iMap,mycomplex_t* mapFFT)
/***** Converting projection to Fourier Space for Convolution later with kernel****/ /***** Converting projection to Fourier Space for Convolution later with kernel****/
/********** Omp Critical is necessary with FFTW*******/ /********** Omp Critical is necessary with FFTW*******/
//#pragma omp critical myfftw_execute_dft(param.fft_plan_c2c_forward,localproj,mapFFT);
{
plan = myfftw_plan_dft_2d(param.param_device.NumberPixels,param.param_device.NumberPixels,localproj,mapFFT,FFTW_FORWARD,FFTW_ESTIMATE);
myfftw_execute(plan);
myfftw_destroy_plan(plan);
myfftw_free(localproj);
}
return(0); return(0);
} }
...@@ -650,7 +631,6 @@ int bioem::createConvolutedProjectionMap(int iMap,int iConv,mycomplex_t* lproj,b ...@@ -650,7 +631,6 @@ int bioem::createConvolutedProjectionMap(int iMap,int iConv,mycomplex_t* lproj,b
*************** and Backtransforming it to real Space ******************************/ *************** and Backtransforming it to real Space ******************************/
/**************************************************************************************/ /**************************************************************************************/
myfftw_plan plan;
mycomplex_t* localconvFFT; mycomplex_t* localconvFFT;
localconvFFT= (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t)*param.param_device.NumberPixels*param.param_device.NumberPixels); localconvFFT= (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t)*param.param_device.NumberPixels*param.param_device.NumberPixels);
...@@ -670,8 +650,7 @@ int bioem::createConvolutedProjectionMap(int iMap,int iConv,mycomplex_t* lproj,b ...@@ -670,8 +650,7 @@ int bioem::createConvolutedProjectionMap(int iMap,int iConv,mycomplex_t* lproj,b
/**** Bringing convoluted Map to real Space ****/ /**** Bringing convoluted Map to real Space ****/
//#pragma omp critical //#pragma omp critical
{ {
plan = myfftw_plan_dft_2d(param.param_device.NumberPixels,param.param_device.NumberPixels,localmultFFT,localconvFFT,FFTW_BACKWARD,FFTW_ESTIMATE); myfftw_execute_dft(param.fft_plan_c2c_backward,localmultFFT,localconvFFT);
myfftw_execute(plan);
} }
...@@ -701,11 +680,7 @@ int bioem::createConvolutedProjectionMap(int iMap,int iConv,mycomplex_t* lproj,b ...@@ -701,11 +680,7 @@ int bioem::createConvolutedProjectionMap(int iMap,int iConv,mycomplex_t* lproj,b
sumsquareC=sumsquareC/pow(float(param.param_device.NumberPixels),4); sumsquareC=sumsquareC/pow(float(param.param_device.NumberPixels),4);
/**** Freeing fftw_complex created (dont know if omp critical is necessary) ****/ /**** Freeing fftw_complex created (dont know if omp critical is necessary) ****/
//#pragma omp critical
{
myfftw_destroy_plan(plan);
myfftw_free(localconvFFT); myfftw_free(localconvFFT);
}
return(0); return(0);
} }
......
...@@ -3,6 +3,11 @@ ...@@ -3,6 +3,11 @@
#include <cuda.h> #include <cuda.h>
//Hack to make nvcc compiler accept fftw.h, float128 is not used anyway
#define __float128 double
#include <fftw3.h>
#undef __float128
#include "bioem_cuda.h" #include "bioem_cuda.h"
class bioem_cuda : public bioem class bioem_cuda : public bioem
......
...@@ -7,6 +7,7 @@ typedef float myfloat_t; ...@@ -7,6 +7,7 @@ typedef float myfloat_t;
#define myfftw_free fftwf_free #define myfftw_free fftwf_free
#define myfftw_destroy_plan fftwf_destroy_plan #define myfftw_destroy_plan fftwf_destroy_plan
#define myfftw_execute fftwf_execute #define myfftw_execute fftwf_execute
#define myfftw_execute_dft fftwf_execute_dft
#define myfftw_plan_dft_2d fftwf_plan_dft_2d #define myfftw_plan_dft_2d fftwf_plan_dft_2d
#define myfftw_plan fftwf_plan #define myfftw_plan fftwf_plan
#else #else
...@@ -15,6 +16,7 @@ typedef double myfloat_t; ...@@ -15,6 +16,7 @@ typedef double myfloat_t;
#define myfftw_free fftw_free #define myfftw_free fftw_free
#define myfftw_destroy_plan fftw_destroy_plan #define myfftw_destroy_plan fftw_destroy_plan
#define myfftw_execute fftw_execute #define myfftw_execute fftw_execute
#define myfftw_execute_dft fftw_execute_dft
#define myfftw_plan_dft_2d fftw_plan_dft_2d #define myfftw_plan_dft_2d fftw_plan_dft_2d
#define myfftw_plan fftw_plan #define myfftw_plan fftw_plan
#endif #endif
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "map.h" #include "map.h"
#include <complex> #include <complex>
#include <math.h> #include <math.h>
#include <fftw3.h>
class bioem_map_forFFT; class bioem_map_forFFT;
...@@ -68,6 +69,12 @@ public: ...@@ -68,6 +69,12 @@ public:
int nTotGridAngles; int nTotGridAngles;
int nTotCTFs; int nTotCTFs;
//myfloat_t Ntotpi;//in device class //myfloat_t Ntotpi;//in device class
int fft_plans_created;
myfftw_plan fft_plan_c2c_forward, fft_plan_c2c_backward, fft_plan_r2c_forward, fft_plan_c2r_backward;
private:
void releaseFFTPlans();
}; };
#endif #endif
...@@ -134,7 +134,6 @@ int bioem_RefMap::PreCalculateMapsFFT(bioem_param& param) ...@@ -134,7 +134,6 @@ int bioem_RefMap::PreCalculateMapsFFT(bioem_param& param)
/********** Routine that pre-calculates Kernels for Convolution **********************/ /********** Routine that pre-calculates Kernels for Convolution **********************/
/************************************************************************************/ /************************************************************************************/
myfftw_plan plan;
mycomplex_t* localMap; mycomplex_t* localMap;
localMap= (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) *param.param_device.NumberPixels*param.param_device.NumberPixels); localMap= (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) *param.param_device.NumberPixels*param.param_device.NumberPixels);
...@@ -155,9 +154,7 @@ int bioem_RefMap::PreCalculateMapsFFT(bioem_param& param) ...@@ -155,9 +154,7 @@ int bioem_RefMap::PreCalculateMapsFFT(bioem_param& param)
} }
//Calling FFT_Forward //Calling FFT_Forward
plan = myfftw_plan_dft_2d(param.param_device.NumberPixels,param.param_device.NumberPixels,localMap,localout,FFTW_FORWARD,FFTW_ESTIMATE); myfftw_execute_dft(param.fft_plan_c2c_forward,localMap,localout);
myfftw_execute(plan);
myfftw_destroy_plan(plan);
// Normalizing and saving the Reference CTFs // Normalizing and saving the Reference CTFs
for(int i=0; i < param.param_device.NumberPixels ; i++ ) for(int i=0; i < param.param_device.NumberPixels ; i++ )
......
...@@ -27,6 +27,8 @@ bioem_param::bioem_param() ...@@ -27,6 +27,8 @@ bioem_param::bioem_param()
/****center displacement paramters Equal in both directions***/ /****center displacement paramters Equal in both directions***/
param_device.maxDisplaceCenter = 0; param_device.maxDisplaceCenter = 0;
numberGridPointsDisplaceCenter = 0; numberGridPointsDisplaceCenter = 0;
fft_plans_created = 0;
} }
int bioem_param::readParameters() int bioem_param::readParameters()
...@@ -174,9 +176,40 @@ int bioem_param::readParameters() ...@@ -174,9 +176,40 @@ int bioem_param::readParameters()
} }
input.close(); input.close();
cout << " +++++++++++++++++++++++++++++++++++++++++ \n"; cout << " +++++++++++++++++++++++++++++++++++++++++ \n";
cout << "Preparing FFTs\n";
releaseFFTPlans();
mycomplex_t *tmp_map, *tmp_map2;
tmp_map = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param_device.NumberPixels * param_device.NumberPixels);
tmp_map2 = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param_device.NumberPixels * param_device.NumberPixels);
fft_plan_c2c_forward = myfftw_plan_dft_2d(param_device.NumberPixels, param_device.NumberPixels, tmp_map, tmp_map2, FFTW_FORWARD, FFTW_MEASURE);
fft_plan_c2c_backward = myfftw_plan_dft_2d(param_device.NumberPixels, param_device.NumberPixels, tmp_map, tmp_map2, FFTW_BACKWARD, FFTW_MEASURE);
if (fft_plan_c2c_forward == 0 || fft_plan_c2c_backward == 0)
{
cout << "Error planing FFTs\n";
exit(1);
}
myfftw_free(tmp_map);
myfftw_free(tmp_map2);
fft_plans_created = 1;
cout << " +++++++++++++++++++++++++++++++++++++++++ \n";
return(0); return(0);
} }
void bioem_param::releaseFFTPlans()
{
if (fft_plans_created)
{
myfftw_destroy_plan(fft_plan_c2c_forward);
myfftw_destroy_plan(fft_plan_c2c_backward);
}
fft_plans_created = 0;
}
int bioem_param::CalculateGridsParam() //TO DO FOR QUATERNIONS int bioem_param::CalculateGridsParam() //TO DO FOR QUATERNIONS
{ {
/**************************************************************************************/ /**************************************************************************************/
...@@ -231,7 +264,6 @@ int bioem_param::CalculateRefCTF() ...@@ -231,7 +264,6 @@ int bioem_param::CalculateRefCTF()
/************************************************************************************/ /************************************************************************************/
myfloat_t amp,env,phase,ctf,radsq; myfloat_t amp,env,phase,ctf,radsq;
myfftw_plan plan;
mycomplex_t* localCTF; mycomplex_t* localCTF;
int nctfmax=int(float(param_device.NumberPixels)/2.0); int nctfmax=int(float(param_device.NumberPixels)/2.0);
int n=0; int n=0;
...@@ -272,9 +304,7 @@ int bioem_param::CalculateRefCTF() ...@@ -272,9 +304,7 @@ int bioem_param::CalculateRefCTF()
mycomplex_t* localout; mycomplex_t* localout;
localout= (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) *param_device.NumberPixels*param_device.NumberPixels); localout= (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) *param_device.NumberPixels*param_device.NumberPixels);
//Calling FFT_Forward //Calling FFT_Forward
plan = myfftw_plan_dft_2d(param_device.NumberPixels,param_device.NumberPixels,localCTF,localout,FFTW_FORWARD,FFTW_ESTIMATE); myfftw_execute_dft(fft_plan_c2c_forward,localCTF,localout);
myfftw_execute(plan);
myfftw_destroy_plan(plan);
// Normalizing and saving the Reference CTFs // Normalizing and saving the Reference CTFs
for(int i=0; i < param_device.NumberPixels ; i++ ) for(int i=0; i < param_device.NumberPixels ; i++ )
...@@ -307,6 +337,7 @@ int bioem_param::CalculateRefCTF() ...@@ -307,6 +337,7 @@ int bioem_param::CalculateRefCTF()
bioem_param::~bioem_param() bioem_param::~bioem_param()
{ {
releaseFFTPlans();
param_device.NumberPixels=0; param_device.NumberPixels=0;
angleGridPointsAlpha = 0; angleGridPointsAlpha = 0;
angleGridPointsBeta = 0; angleGridPointsBeta = 0;
...@@ -315,5 +346,4 @@ bioem_param::~bioem_param() ...@@ -315,5 +346,4 @@ bioem_param::~bioem_param()
numberGridPointsCTF_phase = 0; numberGridPointsCTF_phase = 0;
param_device.maxDisplaceCenter = 0; param_device.maxDisplaceCenter = 0;
numberGridPointsDisplaceCenter = 0; numberGridPointsDisplaceCenter = 0;
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment