diff --git a/bfps/cpp/fftw_interface.hpp b/bfps/cpp/fftw_interface.hpp
index 495ec9fa3712153df4d31faf7dfb3046637b5483..682850c86aa4eb1c6326146459c3671338572542 100644
--- a/bfps/cpp/fftw_interface.hpp
+++ b/bfps/cpp/fftw_interface.hpp
@@ -34,6 +34,15 @@
 #define DEFAULT_FFTW_FLAG FFTW_PATIENT
 #endif
 
+// To have multiple calls to c2r/r2c
+#define SPLIT_FFTW_MANY
+#ifdef SPLIT_FFTW_MANY
+#include <vector>
+#include <memory>
+#include <algorithm>
+#include <cassert>
+#endif
+
 template <class realtype>
 class fftw_interface;
 
@@ -45,6 +54,31 @@ public:
     using complex = fftwf_complex;
     using plan = fftwf_plan;
     using iodim = fftwf_iodim;
+#ifdef SPLIT_FFTW_MANY
+    struct many_plan_container{
+        int rnk;
+        std::vector<ptrdiff_t> n;
+        int howmany;
+        ptrdiff_t iblock;
+        ptrdiff_t oblock;
+        std::shared_ptr<real> buffer;
+        plan plan_to_use;
+
+        ptrdiff_t local_n0, local_0_start;
+        ptrdiff_t local_n1, local_1_start;
+
+        bool is_r2c;
+        void* in;
+        void* out;
+
+        ptrdiff_t nb_real_to_copy;
+        ptrdiff_t nb_complex_to_copy;
+    };
+
+    using many_plan = many_plan_container;
+#else
+    using many_plan = fftwf_plan;
+#endif
 
     static complex* alloc_complex(const size_t in_size){
         return fftwf_alloc_complex(in_size);
@@ -66,6 +100,16 @@ public:
         fftwf_destroy_plan(in_plan);
     }
 
+    template <class ... Params>
+    static ptrdiff_t mpi_local_size_many_transposed(Params ... params){
+        return fftwf_mpi_local_size_many_transposed(params...);
+    }
+
+    template <class ... Params>
+    static ptrdiff_t mpi_local_size_many(Params ... params){
+        return fftwf_mpi_local_size_many(params...);
+    }
+
     template <class ... Params>
     static plan mpi_plan_transpose(Params ... params){
         return fftwf_mpi_plan_transpose(params...);
@@ -86,6 +130,175 @@ public:
         return fftwf_plan_guru_dft(params...);
     }
 
+#ifdef SPLIT_FFTW_MANY
+    static many_plan mpi_plan_many_dft_c2r(int rnk, const ptrdiff_t *n, ptrdiff_t howmany,
+                                                         ptrdiff_t iblock, ptrdiff_t oblock,
+                                                         complex *in, real *out,
+                                                         MPI_Comm comm, unsigned flags){
+        assert(iblock == FFTW_MPI_DEFAULT_BLOCK);
+        assert(oblock == FFTW_MPI_DEFAULT_BLOCK);
+
+        many_plan c2r_plan;
+        c2r_plan.rnk = rnk;
+        c2r_plan.n.insert(c2r_plan.n.end(), n, n+rnk);
+        c2r_plan.howmany = howmany;
+        c2r_plan.iblock = iblock;
+        c2r_plan.oblock = oblock;
+        c2r_plan.is_r2c = false;
+        c2r_plan.in = in;
+        c2r_plan.out = out;
+
+        // If 1 then use default without copy
+        if(howmany == 1){
+            c2r_plan.plan_to_use = mpi_plan_dft_c2r(rnk, n,
+                                           (complex*)in,
+                                           out,
+                                           comm, flags);
+            return c2r_plan;
+        }
+
+        // We need to find out the size of the buffer to allocate
+        mpi_local_size_many_transposed(
+                rnk, n, howmany,
+                FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, comm,
+                &c2r_plan.local_n0, &c2r_plan.local_0_start,
+                &c2r_plan.local_n1, &c2r_plan.local_1_start);
+
+        ptrdiff_t sizeBuffer = c2r_plan.local_n0/howmany;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            sizeBuffer *= n[idxrnk];
+        }
+        sizeBuffer *= n[rnk-1]+2;
+
+        c2r_plan.buffer.reset(alloc_real(sizeBuffer));
+        // Init the plan
+        c2r_plan.plan_to_use = mpi_plan_dft_c2r(rnk, n,
+                                         (complex*)c2r_plan.buffer.get(),
+                                         c2r_plan.buffer.get(),
+                                         comm, flags);
+
+        c2r_plan.nb_real_to_copy = c2r_plan.local_n0/howmany;
+        c2r_plan.nb_complex_to_copy = c2r_plan.local_n0/howmany;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            c2r_plan.nb_real_to_copy *= n[idxrnk];
+            c2r_plan.nb_complex_to_copy *= n[idxrnk];
+        }
+        c2r_plan.nb_real_to_copy *= n[rnk-1];
+        c2r_plan.nb_complex_to_copy *= n[rnk-1]/2 + 1;
+
+        return c2r_plan;
+    }
+
+    static many_plan mpi_plan_many_dft_r2c(int rnk, const ptrdiff_t *n, ptrdiff_t howmany,
+                                                         ptrdiff_t iblock, ptrdiff_t oblock,
+                                                         real *in, complex *out,
+                                                         MPI_Comm comm, unsigned flags){
+        assert(iblock == FFTW_MPI_DEFAULT_BLOCK);
+        assert(oblock == FFTW_MPI_DEFAULT_BLOCK);
+
+        many_plan r2c_plan;
+        r2c_plan.rnk = rnk;
+        r2c_plan.n.insert(r2c_plan.n.end(), n, n+rnk);
+        r2c_plan.howmany = howmany;
+        r2c_plan.iblock = iblock;
+        r2c_plan.oblock = oblock;
+        r2c_plan.is_r2c = true;
+        r2c_plan.in = in;
+        r2c_plan.out = out;
+
+        // If 1 then use default without copy
+        if(howmany == 1){
+            r2c_plan.plan_to_use = mpi_plan_dft_r2c(rnk, n,
+                                           in,
+                                           (complex*)out,
+                                           comm, flags);
+            return r2c_plan;
+        }
+
+        // We need to find out the size of the buffer to allocate
+        mpi_local_size_many_transposed(
+                rnk, n, howmany,
+                FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, comm,
+                &r2c_plan.local_n0, &r2c_plan.local_0_start,
+                &r2c_plan.local_n1, &r2c_plan.local_1_start);
+
+        ptrdiff_t sizeBuffer = r2c_plan.local_n0/howmany;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            sizeBuffer *= n[idxrnk];
+        }
+        sizeBuffer *= n[rnk-1]+2;
+
+        r2c_plan.buffer.reset(alloc_real(sizeBuffer));
+        // Init the plan
+        r2c_plan.plan_to_use = mpi_plan_dft_r2c(rnk, n,
+                                         r2c_plan.buffer.get(),
+                                         (complex*)r2c_plan.buffer.get(),
+                                         comm, flags);
+
+
+        r2c_plan.nb_real_to_copy = r2c_plan.local_n0/howmany;
+        r2c_plan.nb_complex_to_copy = r2c_plan.local_n0/howmany;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            r2c_plan.nb_real_to_copy *= n[idxrnk];
+            r2c_plan.nb_complex_to_copy *= n[idxrnk];
+        }
+        r2c_plan.nb_real_to_copy *= n[rnk-1];
+        r2c_plan.nb_complex_to_copy *= n[rnk-1]/2 + 1;
+
+        return r2c_plan;
+    }
+
+    static void execute(many_plan& in_plan){
+        if(in_plan.howmany == 1){
+            execute(in_plan.plan_to_use);
+            return;
+        }
+
+        const ptrdiff_t nb_to_copy_to_buffer = (in_plan.is_r2c?in_plan.nb_real_to_copy:in_plan.nb_complex_to_copy);
+        const ptrdiff_t nb_to_copy_from_buffer = (!in_plan.is_r2c?in_plan.nb_real_to_copy:in_plan.nb_complex_to_copy);
+
+        for(int idx_howmany = 0 ; idx_howmany < in_plan.howmany ; ++idx_howmany){
+            // Copy to buffer
+            if(in_plan.is_r2c){
+                real* dest = in_plan.buffer.get();
+                const real* src = ((const real*)in_plan.in)+idx_howmany;
+                for(ptrdiff_t idx_copy = 0 ; idx_copy < nb_to_copy_to_buffer ; ++idx_copy){
+                    dest[idx_copy] = src[idx_copy*in_plan.howmany];
+                }
+            }
+            else{
+                complex* dest = (complex*)in_plan.buffer.get();
+                const complex* src = ((const complex*)in_plan.in)+idx_howmany;
+                for(ptrdiff_t idx_copy = 0 ; idx_copy < nb_to_copy_to_buffer ; ++idx_copy){
+                    dest[idx_copy][0] = src[idx_copy*in_plan.howmany][0];
+                    dest[idx_copy][1] = src[idx_copy*in_plan.howmany][1];
+                }
+            }
+
+            execute(in_plan.plan_to_use);
+            // Copy result from buffer
+            if(in_plan.is_r2c){
+                complex* dest = ((complex*)in_plan.in)+idx_howmany;
+                const complex* src = (const complex*)in_plan.buffer.get();
+                for(ptrdiff_t idx_copy = 0 ; idx_copy < nb_to_copy_from_buffer ; ++idx_copy){
+                    dest[idx_copy*in_plan.howmany][0] = src[idx_copy][0];
+                    dest[idx_copy*in_plan.howmany][1] = src[idx_copy][1];
+                }
+            }
+            else{
+                real* dest = ((real*)in_plan.in)+idx_howmany;
+                const real* src = in_plan.buffer.get();
+                for(ptrdiff_t idx_copy = 0 ; idx_copy < nb_to_copy_from_buffer ; ++idx_copy){
+                    dest[idx_copy*in_plan.howmany] = src[idx_copy];
+                }
+            }
+        }
+    }
+
+    static void destroy_plan(many_plan& in_plan){
+        destroy_plan(in_plan.plan_to_use);
+    }
+#else
     template <class ... Params>
     static plan mpi_plan_many_dft_c2r(Params ... params){
         return fftwf_mpi_plan_many_dft_c2r(params...);
@@ -95,6 +308,17 @@ public:
     static plan mpi_plan_many_dft_r2c(Params ... params){
         return fftwf_mpi_plan_many_dft_r2c(params...);
     }
+#endif
+
+    template <class ... Params>
+    static plan mpi_plan_dft_c2r(Params ... params){
+        return fftwf_mpi_plan_dft_c2r(params...);
+    }
+
+    template <class ... Params>
+    static plan mpi_plan_dft_r2c(Params ... params){
+        return fftwf_mpi_plan_dft_r2c(params...);
+    }
 
     template <class ... Params>
     static plan mpi_plan_dft_c2r_3d(Params ... params){
@@ -110,6 +334,31 @@ public:
     using complex = fftw_complex;
     using plan = fftw_plan;
     using iodim = fftw_iodim;
+#ifdef SPLIT_FFTW_MANY
+    struct many_plan_container{
+        int rnk;
+        std::vector<ptrdiff_t> n;
+        int howmany;
+        ptrdiff_t iblock;
+        ptrdiff_t oblock;
+        std::shared_ptr<real> buffer;
+        plan plan_to_use;
+
+        ptrdiff_t local_n0, local_0_start;
+        ptrdiff_t local_n1, local_1_start;
+
+        bool is_r2c;
+        void* in;
+        void* out;
+
+        ptrdiff_t nb_real_to_copy;
+        ptrdiff_t nb_complex_to_copy;
+    };
+
+    using many_plan = many_plan_container;
+#else
+    using many_plan = fftw_plan;
+#endif
 
     static complex* alloc_complex(const size_t in_size){
         return fftw_alloc_complex(in_size);
@@ -131,6 +380,16 @@ public:
         fftw_destroy_plan(in_plan);
     }
 
+    template <class ... Params>
+    static ptrdiff_t mpi_local_size_many_transposed(Params ... params){
+        return fftw_mpi_local_size_many_transposed(params...);
+    }
+
+    template <class ... Params>
+    static ptrdiff_t mpi_local_size_many(Params ... params){
+        return fftw_mpi_local_size_many(params...);
+    }
+
     template <class ... Params>
     static plan mpi_plan_transpose(Params ... params){
         return fftw_mpi_plan_transpose(params...);
@@ -151,6 +410,176 @@ public:
         return fftw_plan_guru_dft(params...);
     }
 
+
+#ifdef SPLIT_FFTW_MANY
+    static many_plan mpi_plan_many_dft_c2r(int rnk, const ptrdiff_t *n, ptrdiff_t howmany,
+                                                         ptrdiff_t iblock, ptrdiff_t oblock,
+                                                         complex *in, real *out,
+                                                         MPI_Comm comm, unsigned flags){
+        assert(iblock == FFTW_MPI_DEFAULT_BLOCK);
+        assert(oblock == FFTW_MPI_DEFAULT_BLOCK);
+
+        many_plan c2r_plan;
+        c2r_plan.rnk = rnk;
+        c2r_plan.n.insert(c2r_plan.n.end(), n, n+rnk);
+        c2r_plan.howmany = howmany;
+        c2r_plan.iblock = iblock;
+        c2r_plan.oblock = oblock;
+        c2r_plan.is_r2c = false;
+        c2r_plan.in = in;
+        c2r_plan.out = out;
+
+        // If 1 then use default without copy
+        if(howmany == 1){
+            c2r_plan.plan_to_use = mpi_plan_dft_c2r(rnk, n,
+                                           (complex*)in,
+                                           out,
+                                           comm, flags);
+            return c2r_plan;
+        }
+
+        // We need to find out the size of the buffer to allocate
+        mpi_local_size_many_transposed(
+                rnk, n, howmany,
+                FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, comm,
+                &c2r_plan.local_n0, &c2r_plan.local_0_start,
+                &c2r_plan.local_n1, &c2r_plan.local_1_start);
+
+        ptrdiff_t sizeBuffer = c2r_plan.local_n0/howmany;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            sizeBuffer *= n[idxrnk];
+        }
+        sizeBuffer *= n[rnk-1]+2;
+
+        c2r_plan.buffer.reset(alloc_real(sizeBuffer));
+        // Init the plan
+        c2r_plan.plan_to_use = mpi_plan_dft_c2r(rnk, n,
+                                         (complex*)c2r_plan.buffer.get(),
+                                         c2r_plan.buffer.get(),
+                                         comm, flags);
+
+        c2r_plan.nb_real_to_copy = c2r_plan.local_n0/howmany;
+        c2r_plan.nb_complex_to_copy = c2r_plan.local_n0/howmany;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            c2r_plan.nb_real_to_copy *= n[idxrnk];
+            c2r_plan.nb_complex_to_copy *= n[idxrnk];
+        }
+        c2r_plan.nb_real_to_copy *= n[rnk-1];
+        c2r_plan.nb_complex_to_copy *= n[rnk-1]/2 + 1;
+
+        return c2r_plan;
+    }
+
+    static many_plan mpi_plan_many_dft_r2c(int rnk, const ptrdiff_t *n, ptrdiff_t howmany,
+                                                         ptrdiff_t iblock, ptrdiff_t oblock,
+                                                         real *in, complex *out,
+                                                         MPI_Comm comm, unsigned flags){
+        assert(iblock == FFTW_MPI_DEFAULT_BLOCK);
+        assert(oblock == FFTW_MPI_DEFAULT_BLOCK);
+
+        many_plan r2c_plan;
+        r2c_plan.rnk = rnk;
+        r2c_plan.n.insert(r2c_plan.n.end(), n, n+rnk);
+        r2c_plan.howmany = howmany;
+        r2c_plan.iblock = iblock;
+        r2c_plan.oblock = oblock;
+        r2c_plan.is_r2c = true;
+        r2c_plan.in = in;
+        r2c_plan.out = out;
+
+        // If 1 then use default without copy
+        if(howmany == 1){
+            r2c_plan.plan_to_use = mpi_plan_dft_r2c(rnk, n,
+                                           in,
+                                           (complex*)out,
+                                           comm, flags);
+            return r2c_plan;
+        }
+
+        // We need to find out the size of the buffer to allocate
+        mpi_local_size_many_transposed(
+                rnk, n, howmany,
+                FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, comm,
+                &r2c_plan.local_n0, &r2c_plan.local_0_start,
+                &r2c_plan.local_n1, &r2c_plan.local_1_start);
+
+        ptrdiff_t sizeBuffer = r2c_plan.local_n0/howmany;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            sizeBuffer *= n[idxrnk];
+        }
+        sizeBuffer *= n[rnk-1]+2;
+
+        r2c_plan.buffer.reset(alloc_real(sizeBuffer));
+        // Init the plan
+        r2c_plan.plan_to_use = mpi_plan_dft_r2c(rnk, n,
+                                         r2c_plan.buffer.get(),
+                                         (complex*)r2c_plan.buffer.get(),
+                                         comm, flags);
+
+
+        r2c_plan.nb_real_to_copy = r2c_plan.local_n0/howmany;
+        r2c_plan.nb_complex_to_copy = r2c_plan.local_n0/howmany;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            r2c_plan.nb_real_to_copy *= n[idxrnk];
+            r2c_plan.nb_complex_to_copy *= n[idxrnk];
+        }
+        r2c_plan.nb_real_to_copy *= n[rnk-1];
+        r2c_plan.nb_complex_to_copy *= n[rnk-1]/2 + 1;
+
+        return r2c_plan;
+    }
+
+    static void execute(many_plan& in_plan){
+        if(in_plan.howmany == 1){
+            execute(in_plan.plan_to_use);
+            return;
+        }
+
+        const ptrdiff_t nb_to_copy_to_buffer = (in_plan.is_r2c?in_plan.nb_real_to_copy:in_plan.nb_complex_to_copy);
+        const ptrdiff_t nb_to_copy_from_buffer = (!in_plan.is_r2c?in_plan.nb_real_to_copy:in_plan.nb_complex_to_copy);
+
+        for(int idx_howmany = 0 ; idx_howmany < in_plan.howmany ; ++idx_howmany){
+            // Copy to buffer
+            if(in_plan.is_r2c){
+                real* dest = in_plan.buffer.get();
+                const real* src = ((const real*)in_plan.in)+idx_howmany;
+                for(ptrdiff_t idx_copy = 0 ; idx_copy < nb_to_copy_to_buffer ; ++idx_copy){
+                    dest[idx_copy] = src[idx_copy*in_plan.howmany];
+                }
+            }
+            else{
+                complex* dest = (complex*)in_plan.buffer.get();
+                const complex* src = ((const complex*)in_plan.in)+idx_howmany;
+                for(ptrdiff_t idx_copy = 0 ; idx_copy < nb_to_copy_to_buffer ; ++idx_copy){
+                    dest[idx_copy][0] = src[idx_copy*in_plan.howmany][0];
+                    dest[idx_copy][1] = src[idx_copy*in_plan.howmany][1];
+                }
+            }
+
+            execute(in_plan.plan_to_use);
+            // Copy result from buffer
+            if(in_plan.is_r2c){
+                complex* dest = ((complex*)in_plan.in)+idx_howmany;
+                const complex* src = (const complex*)in_plan.buffer.get();
+                for(ptrdiff_t idx_copy = 0 ; idx_copy < nb_to_copy_from_buffer ; ++idx_copy){
+                    dest[idx_copy*in_plan.howmany][0] = src[idx_copy][0];
+                    dest[idx_copy*in_plan.howmany][1] = src[idx_copy][1];
+                }
+            }
+            else{
+                real* dest = ((real*)in_plan.in)+idx_howmany;
+                const real* src = in_plan.buffer.get();
+                for(ptrdiff_t idx_copy = 0 ; idx_copy < nb_to_copy_from_buffer ; ++idx_copy){
+                    dest[idx_copy*in_plan.howmany] = src[idx_copy];
+                }
+            }
+        }
+    }
+
+    static void destroy_plan(many_plan& in_plan){
+        destroy_plan(in_plan.plan_to_use);
+    }
+#else
     template <class ... Params>
     static plan mpi_plan_many_dft_c2r(Params ... params){
         return fftw_mpi_plan_many_dft_c2r(params...);
@@ -160,6 +589,17 @@ public:
     static plan mpi_plan_many_dft_r2c(Params ... params){
         return fftw_mpi_plan_many_dft_r2c(params...);
     }
+#endif
+
+    template <class ... Params>
+    static plan mpi_plan_dft_c2r(Params ... params){
+        return fftw_mpi_plan_dft_c2r(params...);
+    }
+
+    template <class ... Params>
+    static plan mpi_plan_dft_r2c(Params ... params){
+        return fftw_mpi_plan_dft_r2c(params...);
+    }
 
     template <class ... Params>
     static plan mpi_plan_dft_c2r_3d(Params ... params){
diff --git a/bfps/cpp/field.cpp b/bfps/cpp/field.cpp
index 6f2ff938fdafdb7c96ab9f696ff7b6a46a8c1196..e20207c15e9762b157da9e43263c5d73b2701775 100644
--- a/bfps/cpp/field.cpp
+++ b/bfps/cpp/field.cpp
@@ -77,7 +77,7 @@ field<rnumber, be, fc>::field(
             ptrdiff_t local_n0, local_0_start;
             ptrdiff_t local_n1, local_1_start;
             //tmp_local_size = fftw_mpi_local_size_many_transposed(
-            fftw_mpi_local_size_many_transposed(
+            fftw_interface<rnumber>::mpi_local_size_many_transposed(
                     3, nfftw, ncomp(fc),
                     FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, this->comm,
                     &local_n0, &local_0_start,
diff --git a/bfps/cpp/field.hpp b/bfps/cpp/field.hpp
index 9a5ab1be25764f00f48ba86b4e420db7ead62394..d8038517afa8ca7faca19d664d96d027be105445 100644
--- a/bfps/cpp/field.hpp
+++ b/bfps/cpp/field.hpp
@@ -72,8 +72,8 @@ class field
         field_layout<fc> *clayout, *rlayout, *rmemlayout;
 
         /* FFT plans */
-        typename fftw_interface<rnumber>::plan c2r_plan;
-        typename fftw_interface<rnumber>::plan r2c_plan;
+        typename fftw_interface<rnumber>::many_plan c2r_plan;
+        typename fftw_interface<rnumber>::many_plan r2c_plan;
         unsigned fftw_plan_rigor;
 
         /* HDF5 data types for arrays */
diff --git a/bfps/cpp/field_descriptor.cpp b/bfps/cpp/field_descriptor.cpp
index 20c634262dbb45ad4c2bb5a1b5640b6df23d4d2c..cb7da99514417937c54d54b8583d1e6f1b7007fc 100644
--- a/bfps/cpp/field_descriptor.cpp
+++ b/bfps/cpp/field_descriptor.cpp
@@ -62,7 +62,7 @@ field_descriptor<rnumber>::field_descriptor(
     ptrdiff_t local_n0, local_0_start;
     for (int i = 0; i < this->ndims; i++)
         nfftw[i] = n[i];
-    this->local_size = fftw_mpi_local_size_many(
+    this->local_size = fftw_interface<rnumber>::mpi_local_size_many(
                 this->ndims,
                 &nfftw.front(),
                 1,
diff --git a/bfps/cpp/fluid_solver.cpp b/bfps/cpp/fluid_solver.cpp
index 319186103797f8135d4d3e2244ed5e3a8f271b00..7ec0c978102f2d0cad00d57d837fad6c141f91fb 100644
--- a/bfps/cpp/fluid_solver.cpp
+++ b/bfps/cpp/fluid_solver.cpp
@@ -86,10 +86,10 @@ fluid_solver<rnumber>::fluid_solver(
     this->rv[1] = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2);
     this->rv[2] = this->rv[1];
 
-    this->c2r_vorticity = new typename fftw_interface<rnumber>::plan;
-    this->r2c_vorticity = new typename fftw_interface<rnumber>::plan;
-    this->c2r_velocity  = new typename fftw_interface<rnumber>::plan;
-    this->r2c_velocity  = new typename fftw_interface<rnumber>::plan;
+    this->c2r_vorticity = new typename fftw_interface<rnumber>::many_plan;
+    this->r2c_vorticity = new typename fftw_interface<rnumber>::many_plan;
+    this->c2r_velocity  = new typename fftw_interface<rnumber>::many_plan;
+    this->r2c_velocity  = new typename fftw_interface<rnumber>::many_plan;
 
     ptrdiff_t sizes[] = {nz,
                          ny,
@@ -120,10 +120,10 @@ fluid_solver<rnumber>::fluid_solver(
     this->vc2r[0] = this->c2r_vorticity;
     this->vr2c[0] = this->r2c_vorticity;
 
-    this->vc2r[1] = new typename fftw_interface<rnumber>::plan;
-    this->vr2c[1] = new typename fftw_interface<rnumber>::plan;
-    this->vc2r[2] = new typename fftw_interface<rnumber>::plan;
-    this->vr2c[2] = new typename fftw_interface<rnumber>::plan;
+    this->vc2r[1] = new typename fftw_interface<rnumber>::many_plan;
+    this->vr2c[1] = new typename fftw_interface<rnumber>::many_plan;
+    this->vc2r[2] = new typename fftw_interface<rnumber>::many_plan;
+    this->vr2c[2] = new typename fftw_interface<rnumber>::many_plan;
 
     *(this->vc2r[1]) = fftw_interface<rnumber>::mpi_plan_many_dft_c2r(
                 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
diff --git a/bfps/cpp/fluid_solver.hpp b/bfps/cpp/fluid_solver.hpp
index 4cc75cee4385353f64dc9bc9e7d34c6efba9ad48..aaddbb59b4a29530779e3dba81f90a06c790bdcb 100644
--- a/bfps/cpp/fluid_solver.hpp
+++ b/bfps/cpp/fluid_solver.hpp
@@ -55,12 +55,12 @@ class fluid_solver:public fluid_solver_base<rnumber>
         typename fluid_solver_base<rnumber>::cnumber *cu, *cv[4];
 
         /* plans */
-        typename fftw_interface<rnumber>::plan *c2r_vorticity;
-        typename fftw_interface<rnumber>::plan *r2c_vorticity;
-        typename fftw_interface<rnumber>::plan *c2r_velocity;
-        typename fftw_interface<rnumber>::plan *r2c_velocity;
-        typename fftw_interface<rnumber>::plan *uc2r, *ur2c;
-        typename fftw_interface<rnumber>::plan *vr2c[3], *vc2r[3];
+        typename fftw_interface<rnumber>::many_plan *c2r_vorticity;
+        typename fftw_interface<rnumber>::many_plan *r2c_vorticity;
+        typename fftw_interface<rnumber>::many_plan *c2r_velocity;
+        typename fftw_interface<rnumber>::many_plan *r2c_velocity;
+        typename fftw_interface<rnumber>::many_plan *uc2r, *ur2c;
+        typename fftw_interface<rnumber>::many_plan *vr2c[3], *vc2r[3];
 
         /* physical parameters */
         double nu;
diff --git a/bfps/cpp/fluid_solver_base.cpp b/bfps/cpp/fluid_solver_base.cpp
index 6e4fd3335238218bad0b78462d3506ca9b48c721..b1d64ef5ce8294efa53cac23b391700b6b8574d3 100644
--- a/bfps/cpp/fluid_solver_base.cpp
+++ b/bfps/cpp/fluid_solver_base.cpp
@@ -52,7 +52,7 @@ void fluid_solver_base<rnumber>::clean_up_real_space(rnumber *a, int howmany)
 template <class rnumber>
 double fluid_solver_base<rnumber>::autocorrel(cnumber *a)
 {
-    double *spec = fftw_alloc_real(this->nshells*9);
+    double *spec = fftw_interface<double>::alloc_real(this->nshells*9);
     double sum_local;
     this->cospectrum(a, a, spec);
     sum_local = 0.0;
@@ -60,7 +60,7 @@ double fluid_solver_base<rnumber>::autocorrel(cnumber *a)
     {
         sum_local += spec[n*9] + spec[n*9 + 4] + spec[n*9 + 8];
     }
-    fftw_free(spec);
+    fftw_interface<double>::free(spec);
     return sum_local;
 }
 
@@ -427,7 +427,7 @@ template <class rnumber>
 void fluid_solver_base<rnumber>::write_spectrum(const char *fname, cnumber *a, const double k2exponent)
 {
     TIMEZONE("fluid_solver_base::write_spectrum");
-    double *spec = fftw_alloc_real(this->nshells);
+    double *spec = fftw_interface<double>::alloc_real(this->nshells);
     this->cospectrum(a, a, spec, k2exponent);
     if (this->cd->myrank == 0)
     {
@@ -439,7 +439,7 @@ void fluid_solver_base<rnumber>::write_spectrum(const char *fname, cnumber *a, c
         fwrite((void*)spec, sizeof(double), this->nshells, spec_file);
         fclose(spec_file);
     }
-    fftw_free(spec);
+    fftw_interface<double>::free(spec);
 }
 
 /*****************************************************************************/
diff --git a/bfps/cpp/slab_field_particles.cpp b/bfps/cpp/slab_field_particles.cpp
index 15fa363f6d277d34c6081fd545c4578e1f735929..e3c84574062a4eabd5bf52d14a2b0d727c67b68e 100644
--- a/bfps/cpp/slab_field_particles.cpp
+++ b/bfps/cpp/slab_field_particles.cpp
@@ -69,11 +69,11 @@ slab_field_particles<rnumber>::slab_field_particles(
     this->buffer_width = this->interp_neighbours+1;
     this->buffer_size = this->buffer_width*this->fs->rd->slice_size;
     this->array_size = this->nparticles * this->ncomponents;
-    this->state = fftw_alloc_real(this->array_size);
+    this->state = fftw_interface<rnumber>::alloc_real(this->array_size);
     std::fill_n(this->state, this->array_size, 0.0);
     for (int i=0; i < this->integration_steps; i++)
     {
-        this->rhs[i] = fftw_alloc_real(this->array_size);
+        this->rhs[i] = fftw_interface<rnumber>::alloc_real(this->array_size);
         std::fill_n(this->rhs[i], this->array_size, 0.0);
     }
     this->watching = new bool[this->fs->rd->nprocs*nparticles];
@@ -131,10 +131,10 @@ slab_field_particles<rnumber>::~slab_field_particles()
 {
     delete[] this->computing;
     delete[] this->watching;
-    fftw_free(this->state);
+    fftw_interface<rnumber>::free(this->state);
     for (int i=0; i < this->integration_steps; i++)
     {
-        fftw_free(this->rhs[i]);
+        fftw_interface<rnumber>::free(this->rhs[i]);
     }
     delete[] this->lbound;
     delete[] this->ubound;
@@ -193,7 +193,7 @@ void slab_field_particles<rnumber>::synchronize_single_particle_state(int p, dou
 template <class rnumber>
 void slab_field_particles<rnumber>::synchronize()
 {
-    double *tstate = fftw_alloc_real(this->array_size);
+    double *tstate = fftw_interface<double>::alloc_real(this->array_size);
     // first, synchronize state and jump across CPUs
     std::fill_n(tstate, this->array_size, 0.0);
     for (int p=0; p<this->nparticles; p++)
@@ -236,14 +236,14 @@ void slab_field_particles<rnumber>::synchronize()
                     this->fs->rd->comm);
         }
     }
-    fftw_free(tstate);
+    fftw_interface<double>::free(tstate);
     // assignment of particles
     for (int p=0; p<this->nparticles; p++)
     {
         this->computing[p] = this->get_rank(this->state[p*this->ncomponents + 2]);
         //DEBUG_MSG("synchronizing particles, particle %d computing is %d\n", p, this->computing[p]);
     }
-    double *jump = fftw_alloc_real(this->nparticles);
+    double *jump = fftw_interface<double>::alloc_real(this->nparticles);
     this->jump_estimate(jump);
     // now, see who needs to watch
     bool *local_watching = new bool[this->fs->rd->nprocs*this->nparticles];
@@ -255,7 +255,7 @@ void slab_field_particles<rnumber>::synchronize()
             local_watching[this->get_rank(this->state[this->ncomponents*p+2]-jump[p])*this->nparticles+p] = true;
             local_watching[this->get_rank(this->state[this->ncomponents*p+2]+jump[p])*this->nparticles+p] = true;
         }
-    fftw_free(jump);
+    fftw_interface<double>::free(jump);
     MPI_Allreduce(
             local_watching,
             this->watching,
@@ -389,7 +389,7 @@ void slab_field_particles<rnumber>::step()
 template <class rnumber>
 void slab_field_particles<rnumber>::Euler()
 {
-    double *y = fftw_alloc_real(this->array_size);
+    double *y = fftw_interface<double>::alloc_real(this->array_size);
     this->get_rhs(this->state, y);
     for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
     {
@@ -399,7 +399,7 @@ void slab_field_particles<rnumber>::Euler()
         //        "particle %d state is %lg %lg %lg\n",
         //        p, this->state[p*this->ncomponents], this->state[p*this->ncomponents+1], this->state[p*this->ncomponents+2]);
     }
-    fftw_free(y);
+    fftw_interface<double>::free(y);
 }