diff --git a/bfps/cpp/fftw_interface.hpp b/bfps/cpp/fftw_interface.hpp index 495ec9fa3712153df4d31faf7dfb3046637b5483..682850c86aa4eb1c6326146459c3671338572542 100644 --- a/bfps/cpp/fftw_interface.hpp +++ b/bfps/cpp/fftw_interface.hpp @@ -34,6 +34,15 @@ #define DEFAULT_FFTW_FLAG FFTW_PATIENT #endif +// To have multiple calls to c2r/r2c +#define SPLIT_FFTW_MANY +#ifdef SPLIT_FFTW_MANY +#include <vector> +#include <memory> +#include <algorithm> +#include <cassert> +#endif + template <class realtype> class fftw_interface; @@ -45,6 +54,31 @@ public: using complex = fftwf_complex; using plan = fftwf_plan; using iodim = fftwf_iodim; +#ifdef SPLIT_FFTW_MANY + struct many_plan_container{ + int rnk; + std::vector<ptrdiff_t> n; + int howmany; + ptrdiff_t iblock; + ptrdiff_t oblock; + std::shared_ptr<real> buffer; + plan plan_to_use; + + ptrdiff_t local_n0, local_0_start; + ptrdiff_t local_n1, local_1_start; + + bool is_r2c; + void* in; + void* out; + + ptrdiff_t nb_real_to_copy; + ptrdiff_t nb_complex_to_copy; + }; + + using many_plan = many_plan_container; +#else + using many_plan = fftwf_plan; +#endif static complex* alloc_complex(const size_t in_size){ return fftwf_alloc_complex(in_size); @@ -66,6 +100,16 @@ public: fftwf_destroy_plan(in_plan); } + template <class ... Params> + static ptrdiff_t mpi_local_size_many_transposed(Params ... params){ + return fftwf_mpi_local_size_many_transposed(params...); + } + + template <class ... Params> + static ptrdiff_t mpi_local_size_many(Params ... params){ + return fftwf_mpi_local_size_many(params...); + } + template <class ... Params> static plan mpi_plan_transpose(Params ... params){ return fftwf_mpi_plan_transpose(params...); @@ -86,6 +130,175 @@ public: return fftwf_plan_guru_dft(params...); } +#ifdef SPLIT_FFTW_MANY + static many_plan mpi_plan_many_dft_c2r(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, + ptrdiff_t iblock, ptrdiff_t oblock, + complex *in, real *out, + MPI_Comm comm, unsigned flags){ + assert(iblock == FFTW_MPI_DEFAULT_BLOCK); + assert(oblock == FFTW_MPI_DEFAULT_BLOCK); + + many_plan c2r_plan; + c2r_plan.rnk = rnk; + c2r_plan.n.insert(c2r_plan.n.end(), n, n+rnk); + c2r_plan.howmany = howmany; + c2r_plan.iblock = iblock; + c2r_plan.oblock = oblock; + c2r_plan.is_r2c = false; + c2r_plan.in = in; + c2r_plan.out = out; + + // If 1 then use default without copy + if(howmany == 1){ + c2r_plan.plan_to_use = mpi_plan_dft_c2r(rnk, n, + (complex*)in, + out, + comm, flags); + return c2r_plan; + } + + // We need to find out the size of the buffer to allocate + mpi_local_size_many_transposed( + rnk, n, howmany, + FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, comm, + &c2r_plan.local_n0, &c2r_plan.local_0_start, + &c2r_plan.local_n1, &c2r_plan.local_1_start); + + ptrdiff_t sizeBuffer = c2r_plan.local_n0/howmany; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + sizeBuffer *= n[idxrnk]; + } + sizeBuffer *= n[rnk-1]+2; + + c2r_plan.buffer.reset(alloc_real(sizeBuffer)); + // Init the plan + c2r_plan.plan_to_use = mpi_plan_dft_c2r(rnk, n, + (complex*)c2r_plan.buffer.get(), + c2r_plan.buffer.get(), + comm, flags); + + c2r_plan.nb_real_to_copy = c2r_plan.local_n0/howmany; + c2r_plan.nb_complex_to_copy = c2r_plan.local_n0/howmany; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + c2r_plan.nb_real_to_copy *= n[idxrnk]; + c2r_plan.nb_complex_to_copy *= n[idxrnk]; + } + c2r_plan.nb_real_to_copy *= n[rnk-1]; + c2r_plan.nb_complex_to_copy *= n[rnk-1]/2 + 1; + + return c2r_plan; + } + + static many_plan mpi_plan_many_dft_r2c(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, + ptrdiff_t iblock, ptrdiff_t oblock, + real *in, complex *out, + MPI_Comm comm, unsigned flags){ + assert(iblock == FFTW_MPI_DEFAULT_BLOCK); + assert(oblock == FFTW_MPI_DEFAULT_BLOCK); + + many_plan r2c_plan; + r2c_plan.rnk = rnk; + r2c_plan.n.insert(r2c_plan.n.end(), n, n+rnk); + r2c_plan.howmany = howmany; + r2c_plan.iblock = iblock; + r2c_plan.oblock = oblock; + r2c_plan.is_r2c = true; + r2c_plan.in = in; + r2c_plan.out = out; + + // If 1 then use default without copy + if(howmany == 1){ + r2c_plan.plan_to_use = mpi_plan_dft_r2c(rnk, n, + in, + (complex*)out, + comm, flags); + return r2c_plan; + } + + // We need to find out the size of the buffer to allocate + mpi_local_size_many_transposed( + rnk, n, howmany, + FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, comm, + &r2c_plan.local_n0, &r2c_plan.local_0_start, + &r2c_plan.local_n1, &r2c_plan.local_1_start); + + ptrdiff_t sizeBuffer = r2c_plan.local_n0/howmany; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + sizeBuffer *= n[idxrnk]; + } + sizeBuffer *= n[rnk-1]+2; + + r2c_plan.buffer.reset(alloc_real(sizeBuffer)); + // Init the plan + r2c_plan.plan_to_use = mpi_plan_dft_r2c(rnk, n, + r2c_plan.buffer.get(), + (complex*)r2c_plan.buffer.get(), + comm, flags); + + + r2c_plan.nb_real_to_copy = r2c_plan.local_n0/howmany; + r2c_plan.nb_complex_to_copy = r2c_plan.local_n0/howmany; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + r2c_plan.nb_real_to_copy *= n[idxrnk]; + r2c_plan.nb_complex_to_copy *= n[idxrnk]; + } + r2c_plan.nb_real_to_copy *= n[rnk-1]; + r2c_plan.nb_complex_to_copy *= n[rnk-1]/2 + 1; + + return r2c_plan; + } + + static void execute(many_plan& in_plan){ + if(in_plan.howmany == 1){ + execute(in_plan.plan_to_use); + return; + } + + const ptrdiff_t nb_to_copy_to_buffer = (in_plan.is_r2c?in_plan.nb_real_to_copy:in_plan.nb_complex_to_copy); + const ptrdiff_t nb_to_copy_from_buffer = (!in_plan.is_r2c?in_plan.nb_real_to_copy:in_plan.nb_complex_to_copy); + + for(int idx_howmany = 0 ; idx_howmany < in_plan.howmany ; ++idx_howmany){ + // Copy to buffer + if(in_plan.is_r2c){ + real* dest = in_plan.buffer.get(); + const real* src = ((const real*)in_plan.in)+idx_howmany; + for(ptrdiff_t idx_copy = 0 ; idx_copy < nb_to_copy_to_buffer ; ++idx_copy){ + dest[idx_copy] = src[idx_copy*in_plan.howmany]; + } + } + else{ + complex* dest = (complex*)in_plan.buffer.get(); + const complex* src = ((const complex*)in_plan.in)+idx_howmany; + for(ptrdiff_t idx_copy = 0 ; idx_copy < nb_to_copy_to_buffer ; ++idx_copy){ + dest[idx_copy][0] = src[idx_copy*in_plan.howmany][0]; + dest[idx_copy][1] = src[idx_copy*in_plan.howmany][1]; + } + } + + execute(in_plan.plan_to_use); + // Copy result from buffer + if(in_plan.is_r2c){ + complex* dest = ((complex*)in_plan.in)+idx_howmany; + const complex* src = (const complex*)in_plan.buffer.get(); + for(ptrdiff_t idx_copy = 0 ; idx_copy < nb_to_copy_from_buffer ; ++idx_copy){ + dest[idx_copy*in_plan.howmany][0] = src[idx_copy][0]; + dest[idx_copy*in_plan.howmany][1] = src[idx_copy][1]; + } + } + else{ + real* dest = ((real*)in_plan.in)+idx_howmany; + const real* src = in_plan.buffer.get(); + for(ptrdiff_t idx_copy = 0 ; idx_copy < nb_to_copy_from_buffer ; ++idx_copy){ + dest[idx_copy*in_plan.howmany] = src[idx_copy]; + } + } + } + } + + static void destroy_plan(many_plan& in_plan){ + destroy_plan(in_plan.plan_to_use); + } +#else template <class ... Params> static plan mpi_plan_many_dft_c2r(Params ... params){ return fftwf_mpi_plan_many_dft_c2r(params...); @@ -95,6 +308,17 @@ public: static plan mpi_plan_many_dft_r2c(Params ... params){ return fftwf_mpi_plan_many_dft_r2c(params...); } +#endif + + template <class ... Params> + static plan mpi_plan_dft_c2r(Params ... params){ + return fftwf_mpi_plan_dft_c2r(params...); + } + + template <class ... Params> + static plan mpi_plan_dft_r2c(Params ... params){ + return fftwf_mpi_plan_dft_r2c(params...); + } template <class ... Params> static plan mpi_plan_dft_c2r_3d(Params ... params){ @@ -110,6 +334,31 @@ public: using complex = fftw_complex; using plan = fftw_plan; using iodim = fftw_iodim; +#ifdef SPLIT_FFTW_MANY + struct many_plan_container{ + int rnk; + std::vector<ptrdiff_t> n; + int howmany; + ptrdiff_t iblock; + ptrdiff_t oblock; + std::shared_ptr<real> buffer; + plan plan_to_use; + + ptrdiff_t local_n0, local_0_start; + ptrdiff_t local_n1, local_1_start; + + bool is_r2c; + void* in; + void* out; + + ptrdiff_t nb_real_to_copy; + ptrdiff_t nb_complex_to_copy; + }; + + using many_plan = many_plan_container; +#else + using many_plan = fftw_plan; +#endif static complex* alloc_complex(const size_t in_size){ return fftw_alloc_complex(in_size); @@ -131,6 +380,16 @@ public: fftw_destroy_plan(in_plan); } + template <class ... Params> + static ptrdiff_t mpi_local_size_many_transposed(Params ... params){ + return fftw_mpi_local_size_many_transposed(params...); + } + + template <class ... Params> + static ptrdiff_t mpi_local_size_many(Params ... params){ + return fftw_mpi_local_size_many(params...); + } + template <class ... Params> static plan mpi_plan_transpose(Params ... params){ return fftw_mpi_plan_transpose(params...); @@ -151,6 +410,176 @@ public: return fftw_plan_guru_dft(params...); } + +#ifdef SPLIT_FFTW_MANY + static many_plan mpi_plan_many_dft_c2r(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, + ptrdiff_t iblock, ptrdiff_t oblock, + complex *in, real *out, + MPI_Comm comm, unsigned flags){ + assert(iblock == FFTW_MPI_DEFAULT_BLOCK); + assert(oblock == FFTW_MPI_DEFAULT_BLOCK); + + many_plan c2r_plan; + c2r_plan.rnk = rnk; + c2r_plan.n.insert(c2r_plan.n.end(), n, n+rnk); + c2r_plan.howmany = howmany; + c2r_plan.iblock = iblock; + c2r_plan.oblock = oblock; + c2r_plan.is_r2c = false; + c2r_plan.in = in; + c2r_plan.out = out; + + // If 1 then use default without copy + if(howmany == 1){ + c2r_plan.plan_to_use = mpi_plan_dft_c2r(rnk, n, + (complex*)in, + out, + comm, flags); + return c2r_plan; + } + + // We need to find out the size of the buffer to allocate + mpi_local_size_many_transposed( + rnk, n, howmany, + FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, comm, + &c2r_plan.local_n0, &c2r_plan.local_0_start, + &c2r_plan.local_n1, &c2r_plan.local_1_start); + + ptrdiff_t sizeBuffer = c2r_plan.local_n0/howmany; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + sizeBuffer *= n[idxrnk]; + } + sizeBuffer *= n[rnk-1]+2; + + c2r_plan.buffer.reset(alloc_real(sizeBuffer)); + // Init the plan + c2r_plan.plan_to_use = mpi_plan_dft_c2r(rnk, n, + (complex*)c2r_plan.buffer.get(), + c2r_plan.buffer.get(), + comm, flags); + + c2r_plan.nb_real_to_copy = c2r_plan.local_n0/howmany; + c2r_plan.nb_complex_to_copy = c2r_plan.local_n0/howmany; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + c2r_plan.nb_real_to_copy *= n[idxrnk]; + c2r_plan.nb_complex_to_copy *= n[idxrnk]; + } + c2r_plan.nb_real_to_copy *= n[rnk-1]; + c2r_plan.nb_complex_to_copy *= n[rnk-1]/2 + 1; + + return c2r_plan; + } + + static many_plan mpi_plan_many_dft_r2c(int rnk, const ptrdiff_t *n, ptrdiff_t howmany, + ptrdiff_t iblock, ptrdiff_t oblock, + real *in, complex *out, + MPI_Comm comm, unsigned flags){ + assert(iblock == FFTW_MPI_DEFAULT_BLOCK); + assert(oblock == FFTW_MPI_DEFAULT_BLOCK); + + many_plan r2c_plan; + r2c_plan.rnk = rnk; + r2c_plan.n.insert(r2c_plan.n.end(), n, n+rnk); + r2c_plan.howmany = howmany; + r2c_plan.iblock = iblock; + r2c_plan.oblock = oblock; + r2c_plan.is_r2c = true; + r2c_plan.in = in; + r2c_plan.out = out; + + // If 1 then use default without copy + if(howmany == 1){ + r2c_plan.plan_to_use = mpi_plan_dft_r2c(rnk, n, + in, + (complex*)out, + comm, flags); + return r2c_plan; + } + + // We need to find out the size of the buffer to allocate + mpi_local_size_many_transposed( + rnk, n, howmany, + FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, comm, + &r2c_plan.local_n0, &r2c_plan.local_0_start, + &r2c_plan.local_n1, &r2c_plan.local_1_start); + + ptrdiff_t sizeBuffer = r2c_plan.local_n0/howmany; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + sizeBuffer *= n[idxrnk]; + } + sizeBuffer *= n[rnk-1]+2; + + r2c_plan.buffer.reset(alloc_real(sizeBuffer)); + // Init the plan + r2c_plan.plan_to_use = mpi_plan_dft_r2c(rnk, n, + r2c_plan.buffer.get(), + (complex*)r2c_plan.buffer.get(), + comm, flags); + + + r2c_plan.nb_real_to_copy = r2c_plan.local_n0/howmany; + r2c_plan.nb_complex_to_copy = r2c_plan.local_n0/howmany; + for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){ + r2c_plan.nb_real_to_copy *= n[idxrnk]; + r2c_plan.nb_complex_to_copy *= n[idxrnk]; + } + r2c_plan.nb_real_to_copy *= n[rnk-1]; + r2c_plan.nb_complex_to_copy *= n[rnk-1]/2 + 1; + + return r2c_plan; + } + + static void execute(many_plan& in_plan){ + if(in_plan.howmany == 1){ + execute(in_plan.plan_to_use); + return; + } + + const ptrdiff_t nb_to_copy_to_buffer = (in_plan.is_r2c?in_plan.nb_real_to_copy:in_plan.nb_complex_to_copy); + const ptrdiff_t nb_to_copy_from_buffer = (!in_plan.is_r2c?in_plan.nb_real_to_copy:in_plan.nb_complex_to_copy); + + for(int idx_howmany = 0 ; idx_howmany < in_plan.howmany ; ++idx_howmany){ + // Copy to buffer + if(in_plan.is_r2c){ + real* dest = in_plan.buffer.get(); + const real* src = ((const real*)in_plan.in)+idx_howmany; + for(ptrdiff_t idx_copy = 0 ; idx_copy < nb_to_copy_to_buffer ; ++idx_copy){ + dest[idx_copy] = src[idx_copy*in_plan.howmany]; + } + } + else{ + complex* dest = (complex*)in_plan.buffer.get(); + const complex* src = ((const complex*)in_plan.in)+idx_howmany; + for(ptrdiff_t idx_copy = 0 ; idx_copy < nb_to_copy_to_buffer ; ++idx_copy){ + dest[idx_copy][0] = src[idx_copy*in_plan.howmany][0]; + dest[idx_copy][1] = src[idx_copy*in_plan.howmany][1]; + } + } + + execute(in_plan.plan_to_use); + // Copy result from buffer + if(in_plan.is_r2c){ + complex* dest = ((complex*)in_plan.in)+idx_howmany; + const complex* src = (const complex*)in_plan.buffer.get(); + for(ptrdiff_t idx_copy = 0 ; idx_copy < nb_to_copy_from_buffer ; ++idx_copy){ + dest[idx_copy*in_plan.howmany][0] = src[idx_copy][0]; + dest[idx_copy*in_plan.howmany][1] = src[idx_copy][1]; + } + } + else{ + real* dest = ((real*)in_plan.in)+idx_howmany; + const real* src = in_plan.buffer.get(); + for(ptrdiff_t idx_copy = 0 ; idx_copy < nb_to_copy_from_buffer ; ++idx_copy){ + dest[idx_copy*in_plan.howmany] = src[idx_copy]; + } + } + } + } + + static void destroy_plan(many_plan& in_plan){ + destroy_plan(in_plan.plan_to_use); + } +#else template <class ... Params> static plan mpi_plan_many_dft_c2r(Params ... params){ return fftw_mpi_plan_many_dft_c2r(params...); @@ -160,6 +589,17 @@ public: static plan mpi_plan_many_dft_r2c(Params ... params){ return fftw_mpi_plan_many_dft_r2c(params...); } +#endif + + template <class ... Params> + static plan mpi_plan_dft_c2r(Params ... params){ + return fftw_mpi_plan_dft_c2r(params...); + } + + template <class ... Params> + static plan mpi_plan_dft_r2c(Params ... params){ + return fftw_mpi_plan_dft_r2c(params...); + } template <class ... Params> static plan mpi_plan_dft_c2r_3d(Params ... params){ diff --git a/bfps/cpp/field.cpp b/bfps/cpp/field.cpp index 6f2ff938fdafdb7c96ab9f696ff7b6a46a8c1196..e20207c15e9762b157da9e43263c5d73b2701775 100644 --- a/bfps/cpp/field.cpp +++ b/bfps/cpp/field.cpp @@ -77,7 +77,7 @@ field<rnumber, be, fc>::field( ptrdiff_t local_n0, local_0_start; ptrdiff_t local_n1, local_1_start; //tmp_local_size = fftw_mpi_local_size_many_transposed( - fftw_mpi_local_size_many_transposed( + fftw_interface<rnumber>::mpi_local_size_many_transposed( 3, nfftw, ncomp(fc), FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, this->comm, &local_n0, &local_0_start, diff --git a/bfps/cpp/field.hpp b/bfps/cpp/field.hpp index 9a5ab1be25764f00f48ba86b4e420db7ead62394..d8038517afa8ca7faca19d664d96d027be105445 100644 --- a/bfps/cpp/field.hpp +++ b/bfps/cpp/field.hpp @@ -72,8 +72,8 @@ class field field_layout<fc> *clayout, *rlayout, *rmemlayout; /* FFT plans */ - typename fftw_interface<rnumber>::plan c2r_plan; - typename fftw_interface<rnumber>::plan r2c_plan; + typename fftw_interface<rnumber>::many_plan c2r_plan; + typename fftw_interface<rnumber>::many_plan r2c_plan; unsigned fftw_plan_rigor; /* HDF5 data types for arrays */ diff --git a/bfps/cpp/field_descriptor.cpp b/bfps/cpp/field_descriptor.cpp index 20c634262dbb45ad4c2bb5a1b5640b6df23d4d2c..cb7da99514417937c54d54b8583d1e6f1b7007fc 100644 --- a/bfps/cpp/field_descriptor.cpp +++ b/bfps/cpp/field_descriptor.cpp @@ -62,7 +62,7 @@ field_descriptor<rnumber>::field_descriptor( ptrdiff_t local_n0, local_0_start; for (int i = 0; i < this->ndims; i++) nfftw[i] = n[i]; - this->local_size = fftw_mpi_local_size_many( + this->local_size = fftw_interface<rnumber>::mpi_local_size_many( this->ndims, &nfftw.front(), 1, diff --git a/bfps/cpp/fluid_solver.cpp b/bfps/cpp/fluid_solver.cpp index 319186103797f8135d4d3e2244ed5e3a8f271b00..7ec0c978102f2d0cad00d57d837fad6c141f91fb 100644 --- a/bfps/cpp/fluid_solver.cpp +++ b/bfps/cpp/fluid_solver.cpp @@ -86,10 +86,10 @@ fluid_solver<rnumber>::fluid_solver( this->rv[1] = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2); this->rv[2] = this->rv[1]; - this->c2r_vorticity = new typename fftw_interface<rnumber>::plan; - this->r2c_vorticity = new typename fftw_interface<rnumber>::plan; - this->c2r_velocity = new typename fftw_interface<rnumber>::plan; - this->r2c_velocity = new typename fftw_interface<rnumber>::plan; + this->c2r_vorticity = new typename fftw_interface<rnumber>::many_plan; + this->r2c_vorticity = new typename fftw_interface<rnumber>::many_plan; + this->c2r_velocity = new typename fftw_interface<rnumber>::many_plan; + this->r2c_velocity = new typename fftw_interface<rnumber>::many_plan; ptrdiff_t sizes[] = {nz, ny, @@ -120,10 +120,10 @@ fluid_solver<rnumber>::fluid_solver( this->vc2r[0] = this->c2r_vorticity; this->vr2c[0] = this->r2c_vorticity; - this->vc2r[1] = new typename fftw_interface<rnumber>::plan; - this->vr2c[1] = new typename fftw_interface<rnumber>::plan; - this->vc2r[2] = new typename fftw_interface<rnumber>::plan; - this->vr2c[2] = new typename fftw_interface<rnumber>::plan; + this->vc2r[1] = new typename fftw_interface<rnumber>::many_plan; + this->vr2c[1] = new typename fftw_interface<rnumber>::many_plan; + this->vc2r[2] = new typename fftw_interface<rnumber>::many_plan; + this->vr2c[2] = new typename fftw_interface<rnumber>::many_plan; *(this->vc2r[1]) = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, diff --git a/bfps/cpp/fluid_solver.hpp b/bfps/cpp/fluid_solver.hpp index 4cc75cee4385353f64dc9bc9e7d34c6efba9ad48..aaddbb59b4a29530779e3dba81f90a06c790bdcb 100644 --- a/bfps/cpp/fluid_solver.hpp +++ b/bfps/cpp/fluid_solver.hpp @@ -55,12 +55,12 @@ class fluid_solver:public fluid_solver_base<rnumber> typename fluid_solver_base<rnumber>::cnumber *cu, *cv[4]; /* plans */ - typename fftw_interface<rnumber>::plan *c2r_vorticity; - typename fftw_interface<rnumber>::plan *r2c_vorticity; - typename fftw_interface<rnumber>::plan *c2r_velocity; - typename fftw_interface<rnumber>::plan *r2c_velocity; - typename fftw_interface<rnumber>::plan *uc2r, *ur2c; - typename fftw_interface<rnumber>::plan *vr2c[3], *vc2r[3]; + typename fftw_interface<rnumber>::many_plan *c2r_vorticity; + typename fftw_interface<rnumber>::many_plan *r2c_vorticity; + typename fftw_interface<rnumber>::many_plan *c2r_velocity; + typename fftw_interface<rnumber>::many_plan *r2c_velocity; + typename fftw_interface<rnumber>::many_plan *uc2r, *ur2c; + typename fftw_interface<rnumber>::many_plan *vr2c[3], *vc2r[3]; /* physical parameters */ double nu; diff --git a/bfps/cpp/fluid_solver_base.cpp b/bfps/cpp/fluid_solver_base.cpp index 6e4fd3335238218bad0b78462d3506ca9b48c721..b1d64ef5ce8294efa53cac23b391700b6b8574d3 100644 --- a/bfps/cpp/fluid_solver_base.cpp +++ b/bfps/cpp/fluid_solver_base.cpp @@ -52,7 +52,7 @@ void fluid_solver_base<rnumber>::clean_up_real_space(rnumber *a, int howmany) template <class rnumber> double fluid_solver_base<rnumber>::autocorrel(cnumber *a) { - double *spec = fftw_alloc_real(this->nshells*9); + double *spec = fftw_interface<double>::alloc_real(this->nshells*9); double sum_local; this->cospectrum(a, a, spec); sum_local = 0.0; @@ -60,7 +60,7 @@ double fluid_solver_base<rnumber>::autocorrel(cnumber *a) { sum_local += spec[n*9] + spec[n*9 + 4] + spec[n*9 + 8]; } - fftw_free(spec); + fftw_interface<double>::free(spec); return sum_local; } @@ -427,7 +427,7 @@ template <class rnumber> void fluid_solver_base<rnumber>::write_spectrum(const char *fname, cnumber *a, const double k2exponent) { TIMEZONE("fluid_solver_base::write_spectrum"); - double *spec = fftw_alloc_real(this->nshells); + double *spec = fftw_interface<double>::alloc_real(this->nshells); this->cospectrum(a, a, spec, k2exponent); if (this->cd->myrank == 0) { @@ -439,7 +439,7 @@ void fluid_solver_base<rnumber>::write_spectrum(const char *fname, cnumber *a, c fwrite((void*)spec, sizeof(double), this->nshells, spec_file); fclose(spec_file); } - fftw_free(spec); + fftw_interface<double>::free(spec); } /*****************************************************************************/ diff --git a/bfps/cpp/slab_field_particles.cpp b/bfps/cpp/slab_field_particles.cpp index 15fa363f6d277d34c6081fd545c4578e1f735929..e3c84574062a4eabd5bf52d14a2b0d727c67b68e 100644 --- a/bfps/cpp/slab_field_particles.cpp +++ b/bfps/cpp/slab_field_particles.cpp @@ -69,11 +69,11 @@ slab_field_particles<rnumber>::slab_field_particles( this->buffer_width = this->interp_neighbours+1; this->buffer_size = this->buffer_width*this->fs->rd->slice_size; this->array_size = this->nparticles * this->ncomponents; - this->state = fftw_alloc_real(this->array_size); + this->state = fftw_interface<rnumber>::alloc_real(this->array_size); std::fill_n(this->state, this->array_size, 0.0); for (int i=0; i < this->integration_steps; i++) { - this->rhs[i] = fftw_alloc_real(this->array_size); + this->rhs[i] = fftw_interface<rnumber>::alloc_real(this->array_size); std::fill_n(this->rhs[i], this->array_size, 0.0); } this->watching = new bool[this->fs->rd->nprocs*nparticles]; @@ -131,10 +131,10 @@ slab_field_particles<rnumber>::~slab_field_particles() { delete[] this->computing; delete[] this->watching; - fftw_free(this->state); + fftw_interface<rnumber>::free(this->state); for (int i=0; i < this->integration_steps; i++) { - fftw_free(this->rhs[i]); + fftw_interface<rnumber>::free(this->rhs[i]); } delete[] this->lbound; delete[] this->ubound; @@ -193,7 +193,7 @@ void slab_field_particles<rnumber>::synchronize_single_particle_state(int p, dou template <class rnumber> void slab_field_particles<rnumber>::synchronize() { - double *tstate = fftw_alloc_real(this->array_size); + double *tstate = fftw_interface<double>::alloc_real(this->array_size); // first, synchronize state and jump across CPUs std::fill_n(tstate, this->array_size, 0.0); for (int p=0; p<this->nparticles; p++) @@ -236,14 +236,14 @@ void slab_field_particles<rnumber>::synchronize() this->fs->rd->comm); } } - fftw_free(tstate); + fftw_interface<double>::free(tstate); // assignment of particles for (int p=0; p<this->nparticles; p++) { this->computing[p] = this->get_rank(this->state[p*this->ncomponents + 2]); //DEBUG_MSG("synchronizing particles, particle %d computing is %d\n", p, this->computing[p]); } - double *jump = fftw_alloc_real(this->nparticles); + double *jump = fftw_interface<double>::alloc_real(this->nparticles); this->jump_estimate(jump); // now, see who needs to watch bool *local_watching = new bool[this->fs->rd->nprocs*this->nparticles]; @@ -255,7 +255,7 @@ void slab_field_particles<rnumber>::synchronize() local_watching[this->get_rank(this->state[this->ncomponents*p+2]-jump[p])*this->nparticles+p] = true; local_watching[this->get_rank(this->state[this->ncomponents*p+2]+jump[p])*this->nparticles+p] = true; } - fftw_free(jump); + fftw_interface<double>::free(jump); MPI_Allreduce( local_watching, this->watching, @@ -389,7 +389,7 @@ void slab_field_particles<rnumber>::step() template <class rnumber> void slab_field_particles<rnumber>::Euler() { - double *y = fftw_alloc_real(this->array_size); + double *y = fftw_interface<double>::alloc_real(this->array_size); this->get_rhs(this->state, y); for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) { @@ -399,7 +399,7 @@ void slab_field_particles<rnumber>::Euler() // "particle %d state is %lg %lg %lg\n", // p, this->state[p*this->ncomponents], this->state[p*this->ncomponents+1], this->state[p*this->ncomponents+2]); } - fftw_free(y); + fftw_interface<double>::free(y); }