diff --git a/bfps/DNS.py b/bfps/DNS.py
index e6ace758a1f6efb240cfa0655cc661a83a1ab6e2..eb51862e414437b50151df870c739ebdcf23dae0 100644
--- a/bfps/DNS.py
+++ b/bfps/DNS.py
@@ -120,6 +120,7 @@ class DNS(_code):
         return None
     def generate_default_parameters(self):
         # these parameters are relevant for all DNS classes
+        self.parameters['fftw_plan_rigor'] = 'FFTW_ESTIMATE'
         self.parameters['dealias_type'] = int(1)
         self.parameters['dkx'] = float(1.0)
         self.parameters['dky'] = float(1.0)
@@ -929,18 +930,9 @@ class DNS(_code):
                      3)
         src_file = h5py.File(src_file_name, 'r')
         if (src_file[src_dset_name].shape == dst_shape):
-            if make_link and (src_file[src_dset_name].dtype == self.ctype):
-                dst_file[dst_dset_name] = h5py.ExternalLink(
-                        src_file_name,
-                        src_dset_name)
-            else:
-                dst_file.create_dataset(
-                        dst_dset_name,
-                        shape = dst_shape,
-                        dtype = self.ctype,
-                        fillvalue = 0.0)
-                for kz in range(src_file[src_dset_name].shape[0]):
-                    dst_file[dst_dset_name][kz] = src_file[src_dset_name][kz]
+            dst_file[dst_dset_name] = h5py.ExternalLink(
+                    src_file_name,
+                    src_dset_name)
         else:
             min_shape = (min(dst_shape[0], src_file[src_dset_name].shape[0]),
                          min(dst_shape[1], src_file[src_dset_name].shape[1]),
diff --git a/bfps/FluidConvert.py b/bfps/FluidConvert.py
deleted file mode 100644
index 58d19116bfb8ab386ef9783babb2ad8da79760e4..0000000000000000000000000000000000000000
--- a/bfps/FluidConvert.py
+++ /dev/null
@@ -1,140 +0,0 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
-
-
-
-import numpy as np
-import pickle
-import os
-from ._fluid_base import _fluid_particle_base
-from ._base import _base
-import bfps
-
-class FluidConvert(_fluid_particle_base):
-    """This class is meant to be used for conversion of native DNS field
-    representations to real-space representations of velocity/vorticity
-    fields.
-    It may be superseeded by streamlined functionality in the future...
-    """
-    def __init__(
-            self,
-            name = 'FluidConvert-v' + bfps.__version__,
-            work_dir = './',
-            simname = 'test',
-            fluid_precision = 'single',
-            use_fftw_wisdom = False):
-        _fluid_particle_base.__init__(
-                self,
-                name = name + '-' + fluid_precision,
-                work_dir = work_dir,
-                simname = simname,
-                dtype = fluid_precision,
-                use_fftw_wisdom = use_fftw_wisdom)
-        self.spec_parameters = {}
-        self.spec_parameters['write_rvelocity']  = 1
-        self.spec_parameters['write_rvorticity'] = 1
-        self.spec_parameters['write_rTrS2'] = 1
-        self.spec_parameters['write_renstrophy'] = 1
-        self.spec_parameters['write_rpressure'] = 1
-        self.spec_parameters['iter0'] = 0
-        self.spec_parameters['iter1'] = -1
-        self.fill_up_fluid_code()
-        self.finalize_code(postprocess_mode = True)
-        return None
-    def fill_up_fluid_code(self):
-        self.definitions += self.cread_pars(
-                parameters = self.spec_parameters,
-                function_suffix = '_specific',
-                file_group = 'conversion_parameters')
-        self.variables += self.cdef_pars(
-                parameters = self.spec_parameters)
-        self.main_start += 'read_parameters_specific();\n'
-        self.fluid_includes += '#include <cstring>\n'
-        self.fluid_variables += ('double t;\n' +
-                                 'fluid_solver<{0}> *fs;\n').format(self.C_dtype)
-        self.fluid_definitions += """
-                //begincpp
-                void do_conversion(fluid_solver<{0}> *bla)
-                {{
-                    bla->read('v', 'c');
-                    if (write_rvelocity)
-                        bla->write('u', 'r');
-                    if (write_rvorticity)
-                        bla->write('v', 'r');
-                    if (write_rTrS2)
-                        bla->write_rTrS2();
-                    if (write_renstrophy)
-                        bla->write_renstrophy();
-                    if (write_rpressure)
-                        bla->write_rpressure();
-                }}
-                //endcpp
-                """.format(self.C_dtype)
-        self.fluid_start += """
-                //begincpp
-                fs = new fluid_solver<{0}>(
-                        simname,
-                        nx, ny, nz,
-                        dkx, dky, dkz,
-                        dealias_type,
-                        DEFAULT_FFTW_FLAG);
-                //endcpp
-                """.format(self.C_dtype)
-        self.fluid_loop += """
-                //begincpp
-                fs->iteration = frame_index;
-                do_conversion(fs);
-                //endcpp
-                """
-        self.fluid_end += 'delete fs;\n'
-        return None
-    def specific_parser_arguments(
-            self,
-            parser):
-        _fluid_particle_base.specific_parser_arguments(self, parser)
-        self.parameters_to_parser_arguments(
-                parser,
-                parameters = self.spec_parameters)
-        return None
-    def launch(
-            self,
-            args = [],
-            **kwargs):
-        opt = self.prepare_launch(args)
-        if opt.iter1 == -1:
-            opt.iter1 = self.get_data_file()['iteration'].value
-        self.pars_from_namespace(
-                opt,
-                parameters = self.spec_parameters)
-        self.rewrite_par(
-                group = 'conversion_parameters',
-                parameters = self.spec_parameters)
-        self.run(opt.nb_processes,
-		 1,
-                 hours = opt.minutes // 60,
-                 minutes = opt.minutes % 60,
-                 err_file = 'err_convert',
-                 out_file = 'out_convert')
-        return None
-
diff --git a/bfps/NSManyParticles.py b/bfps/NSManyParticles.py
deleted file mode 100644
index 03f7345f61b27299bd2da60ea0c4d44924112837..0000000000000000000000000000000000000000
--- a/bfps/NSManyParticles.py
+++ /dev/null
@@ -1,92 +0,0 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
-
-
-
-import bfps
-
-class NSManyParticles(bfps.NavierStokes):
-    def specific_parser_arguments(
-            self,
-            parser):
-        bfps.NavierStokes.specific_parser_arguments(self, parser)
-        parser.add_argument(
-                '--particle-class',
-                default = 'rFFTW_distributed_particles',
-                dest = 'particle_class',
-                type = str)
-        parser.add_argument(
-                '--interpolator-class',
-                default = 'rFFTW_interpolator',
-                dest = 'interpolator_class',
-                type = str)
-        parser.add_argument('--neighbours',
-                type = int,
-                dest = 'neighbours',
-                default = 3)
-        parser.add_argument('--smoothness',
-                type = int,
-                dest = 'smoothness',
-                default = 2)
-        return None
-    def launch(
-            self,
-            args = [],
-            **kwargs):
-        opt = self.prepare_launch(args = args)
-        self.fill_up_fluid_code()
-        if type(opt.nparticles) == int:
-            if opt.nparticles > 0:
-                self.add_3D_rFFTW_field(
-                        name = 'rFFTW_acc')
-                interp_list = []
-                for n in range(1, opt.neighbours):
-                    interp_list.append('Lagrange_n{0}'.format(n))
-                    self.add_interpolator(
-                            interp_type = 'Lagrange',
-                            name = interp_list[-1],
-                            neighbours = n,
-                            class_name =  opt.interpolator_class)
-                    for m in range(1, opt.smoothness):
-                        interp_list.append('spline_n{0}m{1}'.format(n, m))
-                        self.add_interpolator(
-                                interp_type = 'spline',
-                                name = interp_list[-1],
-                                neighbours = n,
-                                smoothness = m,
-                                class_name =  opt.interpolator_class)
-                self.add_particles(
-                        integration_steps = 2,
-                        interpolator = interp_list,
-                        acc_name = 'rFFTW_acc',
-                        class_name = opt.particle_class)
-                self.add_particles(
-                        integration_steps = 4,
-                        interpolator = interp_list,
-                        acc_name = 'rFFTW_acc',
-                        class_name = opt.particle_class)
-        self.finalize_code()
-        self.launch_jobs(opt = opt)
-        return None
-
diff --git a/bfps/NSVorticityEquation.py b/bfps/NSVorticityEquation.py
deleted file mode 100644
index 5f87097fefbb56f731a75597395d42423fc17ba6..0000000000000000000000000000000000000000
--- a/bfps/NSVorticityEquation.py
+++ /dev/null
@@ -1,864 +0,0 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
-
-
-
-import sys
-import os
-import numpy as np
-import h5py
-import argparse
-
-import bfps
-import bfps.tools
-from bfps._code import _code
-from bfps._fluid_base import _fluid_particle_base
-
-class NSVorticityEquation(_fluid_particle_base):
-    def __init__(
-            self,
-            name = 'NSVorticityEquation-v' + bfps.__version__,
-            work_dir = './',
-            simname = 'test',
-            fluid_precision = 'single',
-            fftw_plan_rigor = 'FFTW_MEASURE',
-            use_fftw_wisdom = True):
-        """
-            This code uses checkpoints for DNS restarts, and it can be stopped
-            by creating the file "stop_<simname>" in the working directory.
-            For postprocessing of field snapshots, consider creating a separate
-            HDF5 file (from the python wrapper) which contains links to all the
-            different snapshots.
-        """
-        self.fftw_plan_rigor = fftw_plan_rigor
-        _fluid_particle_base.__init__(
-                self,
-                name = name + '-' + fluid_precision,
-                work_dir = work_dir,
-                simname = simname,
-                dtype = fluid_precision,
-                use_fftw_wisdom = use_fftw_wisdom)
-        self.parameters['nu'] = float(0.1)
-        self.parameters['fmode'] = 1
-        self.parameters['famplitude'] = float(0.5)
-        self.parameters['fk0'] = float(2.0)
-        self.parameters['fk1'] = float(4.0)
-        self.parameters['forcing_type'] = 'linear'
-        self.parameters['histogram_bins'] = int(256)
-        self.parameters['max_velocity_estimate'] = float(1)
-        self.parameters['max_vorticity_estimate'] = float(1)
-        self.parameters['checkpoints_per_file'] = int(1)
-        self.file_datasets_grow = """
-                //begincpp
-                hid_t group;
-                group = H5Gopen(stat_file, "/statistics", H5P_DEFAULT);
-                H5Ovisit(group, H5_INDEX_NAME, H5_ITER_NATIVE, grow_statistics_dataset, NULL);
-                H5Gclose(group);
-                //endcpp
-                """
-        self.style = {}
-        self.statistics = {}
-        self.fluid_output = """
-                fs->io_checkpoint(false);
-                """
-        # vorticity_equation specific things
-        self.includes += '#include "vorticity_equation.hpp"\n'
-        self.store_kspace = """
-                //begincpp
-                if (myrank == 0 && iteration == 0)
-                {
-                    TIMEZONE("fluid_base::store_kspace");
-                    hsize_t dims[4];
-                    hid_t space, dset;
-                    // store kspace information
-                    dset = H5Dopen(stat_file, "/kspace/kshell", H5P_DEFAULT);
-                    space = H5Dget_space(dset);
-                    H5Sget_simple_extent_dims(space, dims, NULL);
-                    H5Sclose(space);
-                    if (fs->kk->nshells != dims[0])
-                    {
-                        DEBUG_MSG(
-                            "ERROR: computed nshells %d not equal to data file nshells %d\\n",
-                            fs->kk->nshells, dims[0]);
-                    }
-                    H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->kshell.front());
-                    H5Dclose(dset);
-                    dset = H5Dopen(stat_file, "/kspace/nshell", H5P_DEFAULT);
-                    H5Dwrite(dset, H5T_NATIVE_INT64, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->nshell.front());
-                    H5Dclose(dset);
-                    dset = H5Dopen(stat_file, "/kspace/kM", H5P_DEFAULT);
-                    H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->kM);
-                    H5Dclose(dset);
-                    dset = H5Dopen(stat_file, "/kspace/dk", H5P_DEFAULT);
-                    H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->dk);
-                    H5Dclose(dset);
-                }
-                //endcpp
-                """
-        return None
-    def add_particles(
-            self,
-            integration_steps = 2,
-            neighbours = 1,
-            smoothness = 1):
-        assert(integration_steps > 0 and integration_steps < 6)
-        self.particle_species = 1
-        self.parameters['tracers0_integration_steps'] = int(integration_steps)
-        self.parameters['tracers0_neighbours'] = int(neighbours)
-        self.parameters['tracers0_smoothness'] = int(smoothness)
-        self.parameters['tracers0_interpolator'] = 'spline'
-        self.particle_includes += """
-                #include "particles/particles_system_builder.hpp"
-                #include "particles/particles_output_hdf5.hpp"
-                """
-        ## initialize
-        self.particle_start += """
-            DEBUG_MSG(
-                    "current fname is %s\\n and iteration is %d",
-                    fs->get_current_fname().c_str(),
-                    fs->iteration);
-            std::unique_ptr<abstract_particles_system<long long int, double>> ps = particles_system_builder(
-                    fs->cvelocity,              // (field object)
-                    fs->kk,                     // (kspace object, contains dkx, dky, dkz)
-                    tracers0_integration_steps, // to check coherency between parameters and hdf input file (nb rhs)
-                    (long long int)nparticles,                 // to check coherency between parameters and hdf input file
-                    fs->get_current_fname(),    // particles input filename
-                    std::string("/tracers0/state/") + std::to_string(fs->iteration), // dataset name for initial input
-                    std::string("/tracers0/rhs/")  + std::to_string(fs->iteration), // dataset name for initial input
-                    tracers0_neighbours,        // parameter (interpolation no neighbours)
-                    tracers0_smoothness,        // parameter
-                    MPI_COMM_WORLD,
-                    fs->iteration+1);
-            particles_output_hdf5<long long int, double,3,3> particles_output_writer_mpi(
-                        MPI_COMM_WORLD,
-                        "tracers0",
-                        nparticles,
-                        tracers0_integration_steps);
-                    """
-        self.particle_loop += """
-                fs->compute_velocity(fs->cvorticity);
-                fs->cvelocity->ift();
-                ps->completeLoop(dt);
-                """
-        self.particle_output = """
-                {
-                    particles_output_writer_mpi.open_file(fs->get_current_fname());
-                    particles_output_writer_mpi.save(ps->getParticlesPositions(),
-                                                     ps->getParticlesRhs(),
-                                                     ps->getParticlesIndexes(),
-                                                     ps->getLocalNbParticles(),
-                                                     fs->iteration);
-                    particles_output_writer_mpi.close_file();
-                }
-                           """
-        self.particle_end += 'ps.release();\n'
-        return None
-    def create_stat_output(
-            self,
-            dset_name,
-            data_buffer,
-            data_type = 'H5T_NATIVE_DOUBLE',
-            size_setup = None,
-            close_spaces = True):
-        new_stat_output_txt = 'Cdset = H5Dopen(stat_file, "{0}", H5P_DEFAULT);\n'.format(dset_name)
-        if not type(size_setup) == type(None):
-            new_stat_output_txt += (
-                    size_setup +
-                    'wspace = H5Dget_space(Cdset);\n' +
-                    'ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);\n' +
-                    'mspace = H5Screate_simple(ndims, count, NULL);\n' +
-                    'H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);\n')
-        new_stat_output_txt += ('H5Dwrite(Cdset, {0}, mspace, wspace, H5P_DEFAULT, {1});\n' +
-                                'H5Dclose(Cdset);\n').format(data_type, data_buffer)
-        if close_spaces:
-            new_stat_output_txt += ('H5Sclose(mspace);\n' +
-                                    'H5Sclose(wspace);\n')
-        return new_stat_output_txt
-    def write_fluid_stats(self):
-        self.fluid_includes += '#include <cmath>\n'
-        self.fluid_includes += '#include "fftw_tools.hpp"\n'
-        self.stat_src += """
-                //begincpp
-                hid_t stat_group;
-                if (myrank == 0)
-                    stat_group = H5Gopen(stat_file, "statistics", H5P_DEFAULT);
-                fs->compute_velocity(fs->cvorticity);
-                *tmp_vec_field = fs->cvelocity->get_cdata();
-                tmp_vec_field->compute_stats(
-                    fs->kk,
-                    stat_group,
-                    "velocity",
-                    fs->iteration / niter_stat,
-                    max_velocity_estimate/sqrt(3));
-                //endcpp
-                """
-        self.stat_src += """
-                //begincpp
-                *tmp_vec_field = fs->cvorticity->get_cdata();
-                tmp_vec_field->compute_stats(
-                    fs->kk,
-                    stat_group,
-                    "vorticity",
-                    fs->iteration / niter_stat,
-                    max_vorticity_estimate/sqrt(3));
-                //endcpp
-                """
-        self.stat_src += """
-                //begincpp
-                if (myrank == 0)
-                    H5Gclose(stat_group);
-                if (myrank == 0)
-                {{
-                    hid_t Cdset, wspace, mspace;
-                    int ndims;
-                    hsize_t count[4], offset[4], dims[4];
-                    offset[0] = fs->iteration/niter_stat;
-                    offset[1] = 0;
-                    offset[2] = 0;
-                    offset[3] = 0;
-                //endcpp
-                """.format(self.C_dtype)
-        if self.dtype == np.float32:
-            field_H5T = 'H5T_NATIVE_FLOAT'
-        elif self.dtype == np.float64:
-            field_H5T = 'H5T_NATIVE_DOUBLE'
-        self.stat_src += self.create_stat_output(
-                '/statistics/xlines/velocity',
-                'fs->rvelocity->get_rdata()',
-                data_type = field_H5T,
-                size_setup = """
-                    count[0] = 1;
-                    count[1] = nx;
-                    count[2] = 3;
-                    """,
-                close_spaces = False)
-        self.stat_src += self.create_stat_output(
-                '/statistics/xlines/vorticity',
-                'fs->rvorticity->get_rdata()',
-                data_type = field_H5T)
-        self.stat_src += '}\n'
-        ## checkpoint
-        self.stat_src += """
-                //begincpp
-                if (myrank == 0)
-                {
-                    std::string fname = (
-                        std::string("stop_") +
-                        std::string(simname));
-                    {
-                        struct stat file_buffer;
-                        stop_code_now = (stat(fname.c_str(), &file_buffer) == 0);
-                    }
-                }
-                MPI_Bcast(&stop_code_now, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
-                //endcpp
-                """
-        return None
-    def fill_up_fluid_code(self):
-        self.fluid_includes += '#include <cstring>\n'
-        self.fluid_variables += (
-                'vorticity_equation<{0}, FFTW> *fs;\n'.format(self.C_dtype) +
-                'field<{0}, FFTW, THREE> *tmp_vec_field;\n'.format(self.C_dtype) +
-                'field<{0}, FFTW, ONE> *tmp_scal_field;\n'.format(self.C_dtype))
-        self.fluid_definitions += """
-                    typedef struct {{
-                        {0} re;
-                        {0} im;
-                    }} tmp_complex_type;
-                    """.format(self.C_dtype)
-        self.write_fluid_stats()
-        if self.dtype == np.float32:
-            field_H5T = 'H5T_NATIVE_FLOAT'
-        elif self.dtype == np.float64:
-            field_H5T = 'H5T_NATIVE_DOUBLE'
-        self.variables += 'int checkpoint;\n'
-        self.variables += 'bool stop_code_now;\n'
-        self.read_checkpoint = """
-                //begincpp
-                if (myrank == 0)
-                {
-                    hid_t dset = H5Dopen(stat_file, "checkpoint", H5P_DEFAULT);
-                    H5Dread(
-                        dset,
-                        H5T_NATIVE_INT,
-                        H5S_ALL,
-                        H5S_ALL,
-                        H5P_DEFAULT,
-                        &checkpoint);
-                    H5Dclose(dset);
-                }
-                MPI_Bcast(&checkpoint, 1, MPI_INT, 0, MPI_COMM_WORLD);
-                fs->checkpoint = checkpoint;
-                //endcpp
-        """
-        self.store_checkpoint = """
-                //begincpp
-                checkpoint = fs->checkpoint;
-                if (myrank == 0)
-                {
-                    hid_t dset = H5Dopen(stat_file, "checkpoint", H5P_DEFAULT);
-                    H5Dwrite(
-                        dset,
-                        H5T_NATIVE_INT,
-                        H5S_ALL,
-                        H5S_ALL,
-                        H5P_DEFAULT,
-                        &checkpoint);
-                    H5Dclose(dset);
-                }
-                //endcpp
-        """
-        self.fluid_start += """
-                //begincpp
-                char fname[512];
-                fs = new vorticity_equation<{0}, FFTW>(
-                        simname,
-                        nx, ny, nz,
-                        dkx, dky, dkz,
-                        {1});
-                tmp_vec_field = new field<{0}, FFTW, THREE>(
-                        nx, ny, nz,
-                        MPI_COMM_WORLD,
-                        {1});
-                tmp_scal_field = new field<{0}, FFTW, ONE>(
-                        nx, ny, nz,
-                        MPI_COMM_WORLD,
-                        {1});
-                fs->checkpoints_per_file = checkpoints_per_file;
-                fs->nu = nu;
-                fs->fmode = fmode;
-                fs->famplitude = famplitude;
-                fs->fk0 = fk0;
-                fs->fk1 = fk1;
-                strncpy(fs->forcing_type, forcing_type, 128);
-                fs->iteration = iteration;
-                {2}
-                fs->cvorticity->real_space_representation = false;
-                fs->io_checkpoint();
-                //endcpp
-                """.format(
-                        self.C_dtype,
-                        self.fftw_plan_rigor,
-                        self.read_checkpoint)
-        self.fluid_start += self.store_kspace
-        self.fluid_start += 'stop_code_now = false;\n'
-        self.fluid_loop = 'fs->step(dt);\n'
-        self.fluid_loop += ('if (fs->iteration % niter_out == 0)\n{\n' +
-                            self.fluid_output +
-                            self.particle_output +
-                            self.store_checkpoint +
-                            '\n}\n' +
-                            'if (stop_code_now){\n' +
-                            'iteration = fs->iteration;\n' +
-                            'break;\n}\n')
-        self.fluid_end = ('if (fs->iteration % niter_out != 0)\n{\n' +
-                          self.fluid_output +
-                          self.particle_output +
-                          self.store_checkpoint +
-                          'DEBUG_MSG("checkpoint value is %d\\n", checkpoint);\n' +
-                          '\n}\n' +
-                          'delete fs;\n' +
-                          'delete tmp_vec_field;\n' +
-                          'delete tmp_scal_field;\n')
-        return None
-    def get_postprocess_file_name(self):
-        return os.path.join(self.work_dir, self.simname + '_postprocess.h5')
-    def get_postprocess_file(self):
-        return h5py.File(self.get_postprocess_file_name(), 'r')
-    def compute_statistics(self, iter0 = 0, iter1 = None):
-        """Run basic postprocessing on raw data.
-        The energy spectrum :math:`E(t, k)` and the enstrophy spectrum
-        :math:`\\frac{1}{2}\omega^2(t, k)` are computed from the
-
-        .. math::
-
-            \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{u_i} \\hat{u_j}^*, \\hskip .5cm
-            \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{\omega_i} \\hat{\\omega_j}^*
-
-        tensors, and the enstrophy spectrum is also used to
-        compute the dissipation :math:`\\varepsilon(t)`.
-        These basic quantities are stored in a newly created HDF5 file,
-        ``simname_postprocess.h5``.
-        """
-        if len(list(self.statistics.keys())) > 0:
-            return None
-        self.read_parameters()
-        with self.get_data_file() as data_file:
-            if 'moments' not in data_file['statistics'].keys():
-                return None
-            iter0 = min((data_file['statistics/moments/velocity'].shape[0] *
-                         self.parameters['niter_stat']-1),
-                        iter0)
-            if type(iter1) == type(None):
-                iter1 = data_file['iteration'].value
-            else:
-                iter1 = min(data_file['iteration'].value, iter1)
-            ii0 = iter0 // self.parameters['niter_stat']
-            ii1 = iter1 // self.parameters['niter_stat']
-            self.statistics['kshell'] = data_file['kspace/kshell'].value
-            self.statistics['kM'] = data_file['kspace/kM'].value
-            self.statistics['dk'] = data_file['kspace/dk'].value
-            computation_needed = True
-            pp_file = h5py.File(self.get_postprocess_file_name(), 'a')
-            if 'ii0' in pp_file.keys():
-                computation_needed =  not (ii0 == pp_file['ii0'].value and
-                                           ii1 == pp_file['ii1'].value)
-                if computation_needed:
-                    for k in pp_file.keys():
-                        del pp_file[k]
-            if computation_needed:
-                pp_file['iter0'] = iter0
-                pp_file['iter1'] = iter1
-                pp_file['ii0'] = ii0
-                pp_file['ii1'] = ii1
-                pp_file['t'] = (self.parameters['dt']*
-                                self.parameters['niter_stat']*
-                                (np.arange(ii0, ii1+1).astype(np.float)))
-                pp_file['energy(t, k)'] = (
-                    data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 0, 0] +
-                    data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 1, 1] +
-                    data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 2, 2])/2
-                pp_file['enstrophy(t, k)'] = (
-                    data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] +
-                    data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] +
-                    data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2
-                pp_file['vel_max(t)'] = data_file['statistics/moments/velocity']  [ii0:ii1+1, 9, 3]
-                pp_file['renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2
-            for k in ['t',
-                      'energy(t, k)',
-                      'enstrophy(t, k)',
-                      'vel_max(t)',
-                      'renergy(t)']:
-                if k in pp_file.keys():
-                    self.statistics[k] = pp_file[k].value
-            self.compute_time_averages()
-        return None
-    def compute_time_averages(self):
-        """Compute easy stats.
-
-        Further computation of statistics based on the contents of
-        ``simname_postprocess.h5``.
-        Standard quantities are as follows
-        (consistent with [Ishihara]_):
-
-        .. math::
-
-            U_{\\textrm{int}}(t) = \\sqrt{\\frac{2E(t)}{3}}, \\hskip .5cm
-            L_{\\textrm{int}}(t) = \\frac{\pi}{2U_{int}^2(t)} \\int \\frac{dk}{k} E(t, k), \\hskip .5cm
-            T_{\\textrm{int}}(t) =
-            \\frac{L_{\\textrm{int}}(t)}{U_{\\textrm{int}}(t)}
-
-            \\eta_K = \\left(\\frac{\\nu^3}{\\varepsilon}\\right)^{1/4}, \\hskip .5cm
-            \\tau_K = \\left(\\frac{\\nu}{\\varepsilon}\\right)^{1/2}, \\hskip .5cm
-            \\lambda = \\sqrt{\\frac{15 \\nu U_{\\textrm{int}}^2}{\\varepsilon}}
-
-            Re = \\frac{U_{\\textrm{int}} L_{\\textrm{int}}}{\\nu}, \\hskip
-            .5cm
-            R_{\\lambda} = \\frac{U_{\\textrm{int}} \\lambda}{\\nu}
-
-        .. [Ishihara] T. Ishihara et al,
-                      *Small-scale statistics in high-resolution direct numerical
-                      simulation of turbulence: Reynolds number dependence of
-                      one-point velocity gradient statistics*.
-                      J. Fluid Mech.,
-                      **592**, 335-366, 2007
-        """
-        for key in ['energy', 'enstrophy']:
-            self.statistics[key + '(t)'] = (self.statistics['dk'] *
-                                            np.sum(self.statistics[key + '(t, k)'], axis = 1))
-        self.statistics['Uint(t)'] = np.sqrt(2*self.statistics['energy(t)'] / 3)
-        self.statistics['Lint(t)'] = ((self.statistics['dk']*np.pi /
-                                       (2*self.statistics['Uint(t)']**2)) *
-                                      np.nansum(self.statistics['energy(t, k)'] /
-                                                self.statistics['kshell'][None, :], axis = 1))
-        for key in ['energy',
-                    'enstrophy',
-                    'vel_max',
-                    'Uint',
-                    'Lint']:
-            if key + '(t)' in self.statistics.keys():
-                self.statistics[key] = np.average(self.statistics[key + '(t)'], axis = 0)
-        for suffix in ['', '(t)']:
-            self.statistics['diss'    + suffix] = (self.parameters['nu'] *
-                                                   self.statistics['enstrophy' + suffix]*2)
-            self.statistics['etaK'    + suffix] = (self.parameters['nu']**3 /
-                                                   self.statistics['diss' + suffix])**.25
-            self.statistics['tauK'    + suffix] =  (self.parameters['nu'] /
-                                                    self.statistics['diss' + suffix])**.5
-            self.statistics['Re' + suffix] = (self.statistics['Uint' + suffix] *
-                                              self.statistics['Lint' + suffix] /
-                                              self.parameters['nu'])
-            self.statistics['lambda' + suffix] = (15 * self.parameters['nu'] *
-                                                  self.statistics['Uint' + suffix]**2 /
-                                                  self.statistics['diss' + suffix])**.5
-            self.statistics['Rlambda' + suffix] = (self.statistics['Uint' + suffix] *
-                                                   self.statistics['lambda' + suffix] /
-                                                   self.parameters['nu'])
-            self.statistics['kMeta' + suffix] = (self.statistics['kM'] *
-                                                 self.statistics['etaK' + suffix])
-            if self.parameters['dealias_type'] == 1:
-                self.statistics['kMeta' + suffix] *= 0.8
-        self.statistics['Tint'] = self.statistics['Lint'] / self.statistics['Uint']
-        self.statistics['Taylor_microscale'] = self.statistics['lambda']
-        return None
-    def set_plt_style(
-            self,
-            style = {'dashes' : (None, None)}):
-        self.style.update(style)
-        return None
-    def convert_complex_from_binary(
-            self,
-            field_name = 'vorticity',
-            iteration = 0,
-            file_name = None):
-        """read the Fourier representation of a vector field.
-
-        Read the binary file containing iteration ``iteration`` of the
-        field ``field_name``, and write it in a ``.h5`` file.
-        """
-        data = np.memmap(
-                os.path.join(self.work_dir,
-                             self.simname + '_{0}_i{1:0>5x}'.format('c' + field_name, iteration)),
-                dtype = self.ctype,
-                mode = 'r',
-                shape = (self.parameters['ny'],
-                         self.parameters['nz'],
-                         self.parameters['nx']//2+1,
-                         3))
-        if type(file_name) == type(None):
-            file_name = self.simname + '_{0}_i{1:0>5x}.h5'.format('c' + field_name, iteration)
-            file_name = os.path.join(self.work_dir, file_name)
-        f = h5py.File(file_name, 'a')
-        f[field_name + '/complex/{0}'.format(iteration)] = data
-        f.close()
-        return None
-    def write_par(
-            self,
-            iter0 = 0,
-            particle_ic = None):
-        _fluid_particle_base.write_par(self, iter0 = iter0)
-        with h5py.File(self.get_data_file_name(), 'r+') as ofile:
-            kspace = self.get_kspace()
-            nshells = kspace['nshell'].shape[0]
-            vec_stat_datasets = ['velocity', 'vorticity']
-            scal_stat_datasets = []
-            for k in vec_stat_datasets:
-                time_chunk = 2**20//(8*3*self.parameters['nx']) # FIXME: use proper size of self.dtype
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/xlines/' + k,
-                                     (1, self.parameters['nx'], 3),
-                                     chunks = (time_chunk, self.parameters['nx'], 3),
-                                     maxshape = (None, self.parameters['nx'], 3),
-                                     dtype = self.dtype)
-            for k in vec_stat_datasets:
-                time_chunk = 2**20//(8*3*3*nshells)
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/spectra/' + k + '_' + k,
-                                     (1, nshells, 3, 3),
-                                     chunks = (time_chunk, nshells, 3, 3),
-                                     maxshape = (None, nshells, 3, 3),
-                                     dtype = np.float64)
-                time_chunk = 2**20//(8*4*10)
-                time_chunk = max(time_chunk, 1)
-                a = ofile.create_dataset('statistics/moments/' + k,
-                                     (1, 10, 4),
-                                     chunks = (time_chunk, 10, 4),
-                                     maxshape = (None, 10, 4),
-                                     dtype = np.float64)
-                time_chunk = 2**20//(8*4*self.parameters['histogram_bins'])
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/histograms/' + k,
-                                     (1,
-                                      self.parameters['histogram_bins'],
-                                      4),
-                                     chunks = (time_chunk,
-                                               self.parameters['histogram_bins'],
-                                               4),
-                                     maxshape = (None,
-                                                 self.parameters['histogram_bins'],
-                                                 4),
-                                     dtype = np.int64)
-            ofile['checkpoint'] = int(0)
-        if self.particle_species == 0:
-            return None
-
-        if type(particle_ic) == type(None):
-            pbase_shape = (self.parameters['nparticles'],)
-            number_of_particles = self.parameters['nparticles']
-        else:
-            pbase_shape = particle_ic.shape[:-1]
-            assert(particle_ic.shape[-1] == 3)
-            number_of_particles = 1
-            for val in pbase_shape[1:]:
-                number_of_particles *= val
-        with h5py.File(self.get_checkpoint_0_fname(), 'a') as ofile:
-            s = 0
-            ofile.create_group('tracers{0}'.format(s))
-            ofile.create_group('tracers{0}/rhs'.format(s))
-            ofile.create_group('tracers{0}/state'.format(s))
-            ofile['tracers{0}/rhs'.format(s)].create_dataset(
-                    '0',
-                    shape = (
-                        (self.parameters['tracers{0}_integration_steps'.format(s)],) +
-                        pbase_shape +
-                        (3,)),
-                    dtype = np.float)
-            ofile['tracers{0}/state'.format(s)].create_dataset(
-                    '0',
-                    shape = (
-                        pbase_shape +
-                        (3,)),
-                    dtype = np.float)
-        return None
-    def specific_parser_arguments(
-            self,
-            parser):
-        _fluid_particle_base.specific_parser_arguments(self, parser)
-        parser.add_argument(
-                '--src-wd',
-                type = str,
-                dest = 'src_work_dir',
-                default = '')
-        parser.add_argument(
-                '--src-simname',
-                type = str,
-                dest = 'src_simname',
-                default = '')
-        parser.add_argument(
-                '--src-iteration',
-                type = int,
-                dest = 'src_iteration',
-                default = 0)
-        parser.add_argument(
-               '--njobs',
-               type = int, dest = 'njobs',
-               default = 1)
-        parser.add_argument(
-               '--kMeta',
-               type = float,
-               dest = 'kMeta',
-               default = 2.0)
-        parser.add_argument(
-               '--dtfactor',
-               type = float,
-               dest = 'dtfactor',
-               default = 0.5,
-               help = 'dt is computed as DTFACTOR / N')
-        parser.add_argument(
-               '--particle-rand-seed',
-               type = int,
-               dest = 'particle_rand_seed',
-               default = None)
-        parser.add_argument(
-               '--pclouds',
-               type = int,
-               dest = 'pclouds',
-               default = 1,
-               help = ('number of particle clouds. Particle "clouds" '
-                       'consist of particles distributed according to '
-                       'pcloud-type.'))
-        parser.add_argument(
-                '--pcloud-type',
-                choices = ['random-cube',
-                           'regular-cube'],
-                dest = 'pcloud_type',
-                default = 'random-cube')
-        parser.add_argument(
-               '--particle-cloud-size',
-               type = float,
-               dest = 'particle_cloud_size',
-               default = 2*np.pi)
-        parser.add_argument(
-                '--neighbours',
-                type = int,
-                dest = 'neighbours',
-                default = 1)
-        parser.add_argument(
-                '--smoothness',
-                type = int,
-                dest = 'smoothness',
-                default = 1)
-        return None
-    def prepare_launch(
-            self,
-            args = []):
-        """Set up reasonable parameters.
-
-        With the default Lundgren forcing applied in the band [2, 4],
-        we can estimate the dissipation, therefore we can estimate
-        :math:`k_M \\eta_K` and constrain the viscosity.
-
-        In brief, the command line parameter :math:`k_M \\eta_K` is
-        used in the following formula for :math:`\\nu` (:math:`N` is the
-        number of real space grid points per coordinate):
-
-        .. math::
-
-            \\nu = \\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/3}
-
-        With this choice, the average dissipation :math:`\\varepsilon`
-        will be close to 0.4, and the integral scale velocity will be
-        close to 0.77, yielding the approximate value for the Taylor
-        microscale and corresponding Reynolds number:
-
-        .. math::
-
-            \\lambda \\approx 4.75\\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/6}, \\hskip .5in
-            R_\\lambda \\approx 3.7 \\left(\\frac{N}{2 k_M \\eta_K} \\right)^{4/6}
-
-        """
-        opt = _code.prepare_launch(self, args = args)
-        self.parameters['nu'] = (opt.kMeta * 2 / opt.n)**(4./3)
-        self.parameters['dt'] = (opt.dtfactor / opt.n)
-        # custom famplitude for 288 and 576
-        if opt.n == 288:
-            self.parameters['famplitude'] = 0.45
-        elif opt.n == 576:
-            self.parameters['famplitude'] = 0.47
-        if ((self.parameters['niter_todo'] % self.parameters['niter_out']) != 0):
-            self.parameters['niter_out'] = self.parameters['niter_todo']
-        if len(opt.src_work_dir) == 0:
-            opt.src_work_dir = os.path.realpath(opt.work_dir)
-        self.pars_from_namespace(opt)
-        return opt
-    def launch(
-            self,
-            args = [],
-            **kwargs):
-        opt = self.prepare_launch(args = args)
-        if type(opt.nparticles) != type(None):
-            if opt.nparticles > 0:
-                self.name += '-particles'
-                self.add_particles(
-                    integration_steps = 4,
-                    neighbours = opt.neighbours,
-                    smoothness = opt.smoothness)
-        self.fill_up_fluid_code()
-        self.finalize_code()
-        self.launch_jobs(opt = opt, **kwargs)
-        return None
-    def get_checkpoint_0_fname(self):
-        return os.path.join(
-                    self.work_dir,
-                    self.simname + '_checkpoint_0.h5')
-    def generate_tracer_state(
-            self,
-            rseed = None,
-            iteration = 0,
-            species = 0,
-            write_to_file = False,
-            ncomponents = 3,
-            testing = False,
-            data = None):
-        if (type(data) == type(None)):
-            if not type(rseed) == type(None):
-                np.random.seed(rseed)
-            #point with problems: 5.37632864e+00,   6.10414710e+00,   6.25256493e+00]
-            data = np.zeros(self.parameters['nparticles']*ncomponents).reshape(-1, ncomponents)
-            data[:, :3] = np.random.random((self.parameters['nparticles'], 3))*2*np.pi
-        if testing:
-            #data[0] = np.array([3.26434, 4.24418, 3.12157])
-            data[:] = np.array([ 0.72086101,  2.59043666,  6.27501953])
-        with h5py.File(self.get_checkpoint_0_fname(), 'a') as data_file:
-            data_file['tracers{0}/state/0'.format(species)][:] = data
-        if write_to_file:
-            data.tofile(
-                    os.path.join(
-                        self.work_dir,
-                        "tracers{0}_state_i{1:0>5x}".format(species, iteration)))
-        return data
-    def launch_jobs(
-            self,
-            opt = None,
-            particle_initial_condition = None):
-        if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')):
-            # take care of fields' initial condition
-            if not os.path.exists(self.get_checkpoint_0_fname()):
-                f = h5py.File(self.get_checkpoint_0_fname(), 'w')
-                if len(opt.src_simname) > 0:
-                    source_cp = 0
-                    src_file = 'not_a_file'
-                    while True:
-                        src_file = os.path.join(
-                            os.path.realpath(opt.src_work_dir),
-                            opt.src_simname + '_checkpoint_{0}.h5'.format(source_cp))
-                        f0 = h5py.File(src_file, 'r')
-                        if '{0}'.format(opt.src_iteration) in f0['vorticity/complex'].keys():
-                            f0.close()
-                            break
-                        source_cp += 1
-                    f['vorticity/complex/{0}'.format(0)] = h5py.ExternalLink(
-                            src_file,
-                            'vorticity/complex/{0}'.format(opt.src_iteration))
-                else:
-                    data = self.generate_vector_field(
-                           write_to_file = False,
-                           spectra_slope = 2.0,
-                           amplitude = 0.05)
-                    f['vorticity/complex/{0}'.format(0)] = data
-                f.close()
-            # take care of particles' initial condition
-            if opt.pclouds > 1:
-                np.random.seed(opt.particle_rand_seed)
-                if opt.pcloud_type == 'random-cube':
-                    particle_initial_condition = (
-                        np.random.random((opt.pclouds, 1, 3))*2*np.pi +
-                        np.random.random((1, self.parameters['nparticles'], 3))*opt.particle_cloud_size)
-                elif opt.pcloud_type == 'regular-cube':
-                    onedarray = np.linspace(
-                            -opt.particle_cloud_size/2,
-                            opt.particle_cloud_size/2,
-                            self.parameters['nparticles'])
-                    particle_initial_condition = np.zeros(
-                            (opt.pclouds,
-                             self.parameters['nparticles'],
-                             self.parameters['nparticles'],
-                             self.parameters['nparticles'], 3),
-                            dtype = np.float64)
-                    particle_initial_condition[:] = \
-                        np.random.random((opt.pclouds, 1, 1, 1, 3))*2*np.pi
-                    particle_initial_condition[..., 0] += onedarray[None, None, None, :]
-                    particle_initial_condition[..., 1] += onedarray[None, None, :, None]
-                    particle_initial_condition[..., 2] += onedarray[None, :, None, None]
-            self.write_par(
-                    particle_ic = particle_initial_condition)
-            if self.parameters['nparticles'] > 0:
-                data = self.generate_tracer_state(
-                        species = 0,
-                        rseed = opt.particle_rand_seed,
-                        data = particle_initial_condition)
-                for s in range(1, self.particle_species):
-                    self.generate_tracer_state(species = s, data = data)
-        self.run(
-                nb_processes = opt.nb_processes,
-                nb_threads_per_process = opt.nb_threads_per_process,
-                njobs = opt.njobs,
-                hours = opt.minutes // 60,
-                minutes = opt.minutes % 60,
-                no_submit = opt.no_submit)
-        return None
-
-if __name__ == '__main__':
-    pass
-
diff --git a/bfps/NavierStokes.py b/bfps/NavierStokes.py
deleted file mode 100644
index c30adbe2ec41dac86993399a0235a18d20820269..0000000000000000000000000000000000000000
--- a/bfps/NavierStokes.py
+++ /dev/null
@@ -1,1263 +0,0 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
-
-
-
-import sys
-import os
-import numpy as np
-import h5py
-import argparse
-
-import bfps
-import bfps.tools
-from ._code import _code
-from ._fluid_base import _fluid_particle_base
-
-class NavierStokes(_fluid_particle_base):
-    """Objects of this class can be used to generate production DNS codes.
-    Any functionality that users require should be available through this class,
-    in the sense that they can implement whatever they need by simply inheriting
-    this class.
-    """
-    def __init__(
-            self,
-            name = 'NavierStokes-v' + bfps.__version__,
-            work_dir = './',
-            simname = 'test',
-            fluid_precision = 'single',
-            fftw_plan_rigor = 'FFTW_MEASURE',
-            frozen_fields = False,
-            use_fftw_wisdom = True,
-            QR_stats_on = False,
-            Lag_acc_stats_on = False):
-        self.QR_stats_on = QR_stats_on
-        self.Lag_acc_stats_on = Lag_acc_stats_on
-        self.frozen_fields = frozen_fields
-        self.fftw_plan_rigor = fftw_plan_rigor
-        _fluid_particle_base.__init__(
-                self,
-                name = name + '-' + fluid_precision,
-                work_dir = work_dir,
-                simname = simname,
-                dtype = fluid_precision,
-                use_fftw_wisdom = use_fftw_wisdom)
-        self.parameters['nu'] = 0.1
-        self.parameters['fmode'] = 1
-        self.parameters['famplitude'] = 0.5
-        self.parameters['fk0'] = 2.0
-        self.parameters['fk1'] = 4.0
-        self.parameters['forcing_type'] = 'linear'
-        self.parameters['histogram_bins'] = 256
-        self.parameters['max_velocity_estimate'] = 1.0
-        self.parameters['max_vorticity_estimate'] = 1.0
-        self.parameters['max_Lag_acc_estimate'] = 1.0
-        self.parameters['max_pressure_estimate'] = 1.0
-        self.parameters['QR2D_histogram_bins'] = 64
-        self.parameters['max_trS2_estimate'] = 1.0
-        self.parameters['max_Q_estimate'] = 1.0
-        self.parameters['max_R_estimate'] = 1.0
-        self.file_datasets_grow = """
-                //begincpp
-                hid_t group;
-                group = H5Gopen(stat_file, "/statistics", H5P_DEFAULT);
-                H5Ovisit(group, H5_INDEX_NAME, H5_ITER_NATIVE, grow_statistics_dataset, NULL);
-                H5Gclose(group);
-                //endcpp
-                """
-        self.style = {}
-        self.statistics = {}
-        self.fluid_output = 'fs->write(\'v\', \'c\');\n'
-        return None
-    def create_stat_output(
-            self,
-            dset_name,
-            data_buffer,
-            data_type = 'H5T_NATIVE_DOUBLE',
-            size_setup = None,
-            close_spaces = True):
-        new_stat_output_txt = 'Cdset = H5Dopen(stat_file, "{0}", H5P_DEFAULT);\n'.format(dset_name)
-        if not type(size_setup) == type(None):
-            new_stat_output_txt += (
-                    size_setup +
-                    'wspace = H5Dget_space(Cdset);\n' +
-                    'ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);\n' +
-                    'mspace = H5Screate_simple(ndims, count, NULL);\n' +
-                    'H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);\n')
-        new_stat_output_txt += ('H5Dwrite(Cdset, {0}, mspace, wspace, H5P_DEFAULT, {1});\n' +
-                                'H5Dclose(Cdset);\n').format(data_type, data_buffer)
-        if close_spaces:
-            new_stat_output_txt += ('H5Sclose(mspace);\n' +
-                                    'H5Sclose(wspace);\n')
-        return new_stat_output_txt
-    def write_fluid_stats(self):
-        self.fluid_includes += '#include <cmath>\n'
-        self.fluid_includes += '#include "fftw_tools.hpp"\n'
-        self.stat_src += """
-                //begincpp
-                hid_t stat_group;
-                if (myrank == 0)
-                    stat_group = H5Gopen(stat_file, "statistics", H5P_DEFAULT);
-                fs->compute_velocity(fs->cvorticity);
-                std::vector<double> max_estimate_vector;
-                max_estimate_vector.resize(4);
-                *tmp_vec_field = fs->cvelocity;
-                switch(fs->dealias_type)
-                {
-                    case 0:
-                        tmp_vec_field->compute_stats(
-                            kk_two_thirds,
-                            stat_group,
-                            "velocity",
-                            fs->iteration / niter_stat,
-                            max_velocity_estimate/sqrt(3));
-                        break;
-                    case 1:
-                        tmp_vec_field->compute_stats(
-                            kk_smooth,
-                            stat_group,
-                            "velocity",
-                            fs->iteration / niter_stat,
-                            max_velocity_estimate/sqrt(3));
-                        break;
-                }
-                //endcpp
-                """
-        if self.Lag_acc_stats_on:
-            self.stat_src += """
-                    //begincpp
-                    tmp_vec_field->real_space_representation = false;
-                    fs->compute_Lagrangian_acceleration(tmp_vec_field->get_cdata());
-                    switch(fs->dealias_type)
-                    {
-                        case 0:
-                            tmp_vec_field->compute_stats(
-                                kk_two_thirds,
-                                stat_group,
-                                "Lagrangian_acceleration",
-                                fs->iteration / niter_stat,
-                                max_Lag_acc_estimate);
-                            break;
-                        case 1:
-                            tmp_vec_field->compute_stats(
-                                kk_smooth,
-                                stat_group,
-                                "Lagrangian_acceleration",
-                                fs->iteration / niter_stat,
-                                max_Lag_acc_estimate);
-                            break;
-                    }
-                    tmp_scal_field->real_space_representation = false;
-                    fs->compute_velocity(fs->cvorticity);
-                    fs->ift_velocity();
-                    fs->compute_pressure(tmp_scal_field->get_cdata());
-                    switch(fs->dealias_type)
-                    {
-                        case 0:
-                            tmp_scal_field->compute_stats(
-                                kk_two_thirds,
-                                stat_group,
-                                "pressure",
-                                fs->iteration / niter_stat,
-                                max_pressure_estimate);
-                            break;
-                        case 1:
-                            tmp_scal_field->compute_stats(
-                                kk_smooth,
-                                stat_group,
-                                "pressure",
-                                fs->iteration / niter_stat,
-                                max_pressure_estimate);
-                            break;
-                    }
-                    //endcpp
-                    """
-        self.stat_src += """
-                //begincpp
-                *tmp_vec_field = fs->cvorticity;
-                switch(fs->dealias_type)
-                {
-                    case 0:
-                        tmp_vec_field->compute_stats(
-                            kk_two_thirds,
-                            stat_group,
-                            "vorticity",
-                            fs->iteration / niter_stat,
-                            max_vorticity_estimate/sqrt(3));
-                        break;
-                    case 1:
-                        tmp_vec_field->compute_stats(
-                            kk_smooth,
-                            stat_group,
-                            "vorticity",
-                            fs->iteration / niter_stat,
-                            max_vorticity_estimate/sqrt(3));
-                        break;
-                }
-                //endcpp
-                """
-        if self.QR_stats_on:
-            self.stat_src += """
-                //begincpp
-                double *trS2_Q_R_moments  = new double[10*3];
-                double *gradu_moments     = new double[10*9];
-                ptrdiff_t *hist_trS2_Q_R  = new ptrdiff_t[histogram_bins*3];
-                ptrdiff_t *hist_gradu     = new ptrdiff_t[histogram_bins*9];
-                ptrdiff_t *hist_QR2D      = new ptrdiff_t[QR2D_histogram_bins*QR2D_histogram_bins];
-                double trS2QR_max_estimates[3];
-                double gradu_max_estimates[9];
-                trS2QR_max_estimates[0] = max_trS2_estimate;
-                trS2QR_max_estimates[1] = max_Q_estimate;
-                trS2QR_max_estimates[2] = max_R_estimate;
-                std::fill_n(gradu_max_estimates, 9, sqrt(3*max_trS2_estimate));
-                fs->compute_gradient_statistics(
-                    fs->cvelocity,
-                    gradu_moments,
-                    trS2_Q_R_moments,
-                    hist_gradu,
-                    hist_trS2_Q_R,
-                    hist_QR2D,
-                    trS2QR_max_estimates,
-                    gradu_max_estimates,
-                    histogram_bins,
-                    QR2D_histogram_bins);
-                //endcpp
-                """
-        self.stat_src += """
-                //begincpp
-                if (myrank == 0)
-                    H5Gclose(stat_group);
-                if (fs->cd->myrank == 0)
-                {{
-                    hid_t Cdset, wspace, mspace;
-                    int ndims;
-                    hsize_t count[4], offset[4], dims[4];
-                    offset[0] = fs->iteration/niter_stat;
-                    offset[1] = 0;
-                    offset[2] = 0;
-                    offset[3] = 0;
-                //endcpp
-                """.format(self.C_dtype)
-        if self.dtype == np.float32:
-            field_H5T = 'H5T_NATIVE_FLOAT'
-        elif self.dtype == np.float64:
-            field_H5T = 'H5T_NATIVE_DOUBLE'
-        if self.QR_stats_on:
-            self.stat_src += self.create_stat_output(
-                    '/statistics/moments/trS2_Q_R',
-                    'trS2_Q_R_moments',
-                    size_setup ="""
-                        count[0] = 1;
-                        count[1] = 10;
-                        count[2] = 3;
-                        """)
-            self.stat_src += self.create_stat_output(
-                    '/statistics/moments/velocity_gradient',
-                    'gradu_moments',
-                    size_setup ="""
-                        count[0] = 1;
-                        count[1] = 10;
-                        count[2] = 3;
-                        count[3] = 3;
-                        """)
-            self.stat_src += self.create_stat_output(
-                    '/statistics/histograms/trS2_Q_R',
-                    'hist_trS2_Q_R',
-                    data_type = 'H5T_NATIVE_INT64',
-                    size_setup = """
-                        count[0] = 1;
-                        count[1] = histogram_bins;
-                        count[2] = 3;
-                        """)
-            self.stat_src += self.create_stat_output(
-                    '/statistics/histograms/velocity_gradient',
-                    'hist_gradu',
-                    data_type = 'H5T_NATIVE_INT64',
-                    size_setup = """
-                        count[0] = 1;
-                        count[1] = histogram_bins;
-                        count[2] = 3;
-                        count[3] = 3;
-                        """)
-            self.stat_src += self.create_stat_output(
-                    '/statistics/histograms/QR2D',
-                    'hist_QR2D',
-                    data_type = 'H5T_NATIVE_INT64',
-                    size_setup = """
-                        count[0] = 1;
-                        count[1] = QR2D_histogram_bins;
-                        count[2] = QR2D_histogram_bins;
-                        """)
-        self.stat_src += '}\n'
-        if self.QR_stats_on:
-            self.stat_src += """
-                //begincpp
-                delete[] trS2_Q_R_moments;
-                delete[] gradu_moments;
-                delete[] hist_trS2_Q_R;
-                delete[] hist_gradu;
-                delete[] hist_QR2D;
-                //endcpp
-                """
-        return None
-    def fill_up_fluid_code(self):
-        self.fluid_includes += '#include <cstring>\n'
-        self.fluid_variables += (
-                'fluid_solver<{0}> *fs;\n'.format(self.C_dtype) +
-                'field<{0}, FFTW, THREE> *tmp_vec_field;\n'.format(self.C_dtype) +
-                'field<{0}, FFTW, ONE> *tmp_scal_field;\n'.format(self.C_dtype) +
-                'kspace<FFTW, SMOOTH> *kk_smooth;\n' +
-                'kspace<FFTW, TWO_THIRDS> *kk_two_thirds;\n')
-        self.fluid_definitions += """
-                    typedef struct {{
-                        {0} re;
-                        {0} im;
-                    }} tmp_complex_type;
-                    """.format(self.C_dtype)
-        self.write_fluid_stats()
-        if self.dtype == np.float32:
-            field_H5T = 'H5T_NATIVE_FLOAT'
-        elif self.dtype == np.float64:
-            field_H5T = 'H5T_NATIVE_DOUBLE'
-        self.fluid_start += """
-                //begincpp
-                char fname[512];
-                fs = new fluid_solver<{0}>(
-                        simname,
-                        nx, ny, nz,
-                        dkx, dky, dkz,
-                        dealias_type,
-                        {1});
-                tmp_vec_field = new field<{0}, FFTW, THREE>(
-                        nx, ny, nz,
-                        MPI_COMM_WORLD,
-                        {1});
-                tmp_scal_field = new field<{0}, FFTW, ONE>(
-                        nx, ny, nz,
-                        MPI_COMM_WORLD,
-                        {1});
-                kk_smooth = new kspace<FFTW, SMOOTH>(
-                        tmp_vec_field->clayout,
-                        fs->dkx, fs->dky, fs->dkz);
-                kk_two_thirds = new kspace<FFTW, TWO_THIRDS>(
-                        tmp_vec_field->clayout,
-                        fs->dkx, fs->dky, fs->dkz);
-                fs->nu = nu;
-                fs->fmode = fmode;
-                fs->famplitude = famplitude;
-                fs->fk0 = fk0;
-                fs->fk1 = fk1;
-                strncpy(fs->forcing_type, forcing_type, 128);
-                fs->iteration = iteration;
-                fs->read('v', 'c');
-                //endcpp
-                """.format(self.C_dtype, self.fftw_plan_rigor, field_H5T)
-        self.fluid_start += self.store_kspace
-        if not self.frozen_fields:
-            self.fluid_loop = 'fs->step(dt);\n'
-        else:
-            self.fluid_loop = ''
-        self.fluid_loop += ('if (fs->iteration % niter_out == 0)\n{\n' +
-                            self.fluid_output + '\n}\n')
-        self.fluid_end = ('if (fs->iteration % niter_out != 0)\n{\n' +
-                          self.fluid_output + '\n}\n' +
-                          'delete fs;\n' +
-                          'delete tmp_vec_field;\n' +
-                          'delete tmp_scal_field;\n' +
-                          'delete kk_smooth;\n' +
-                          'delete kk_two_thirds;\n')
-        return None
-    def add_3D_rFFTW_field(
-            self,
-            name = 'rFFTW_acc'):
-        if self.dtype == np.float32:
-            FFTW = 'fftwf'
-        elif self.dtype == np.float64:
-            FFTW = 'fftw'
-        self.fluid_variables += '{0} *{1};\n'.format(self.C_dtype, name)
-        self.fluid_start += '{0} = {1}_alloc_real(2*fs->cd->local_size);\n'.format(name, FFTW)
-        self.fluid_end   += '{0}_free({1});\n'.format(FFTW, name)
-        return None
-    def add_interpolator(
-            self,
-            interp_type = 'spline',
-            neighbours = 1,
-            smoothness = 1,
-            name = 'field_interpolator',
-            field_name = 'fs->rvelocity',
-            class_name = 'rFFTW_interpolator'):
-        self.fluid_includes += '#include "{0}.hpp"\n'.format(class_name)
-        self.fluid_variables += '{0} <{1}, {2}> *{3};\n'.format(
-                class_name, self.C_dtype, neighbours, name)
-        self.parameters[name + '_type'] = interp_type
-        self.parameters[name + '_neighbours'] = neighbours
-        if interp_type == 'spline':
-            self.parameters[name + '_smoothness'] = smoothness
-            beta_name = 'beta_n{0}_m{1}'.format(neighbours, smoothness)
-        elif interp_type == 'Lagrange':
-            beta_name = 'beta_Lagrange_n{0}'.format(neighbours)
-        self.fluid_start += '{0} = new {1}<{2}, {3}>(fs, {4}, {5});\n'.format(
-                name,
-                class_name,
-                self.C_dtype,
-                neighbours,
-                beta_name,
-                field_name)
-        self.fluid_end += 'delete {0};\n'.format(name)
-        return None
-    def add_particles(
-            self,
-            integration_steps = 2,
-            kcut = None,
-            interpolator = 'field_interpolator',
-            frozen_particles = False,
-            acc_name = None,
-            class_name = 'particles'):
-        """Adds code for tracking a series of particle species, each
-        consisting of `nparticles` particles.
-
-        :type integration_steps: int, list of int
-        :type kcut: None (default), str, list of str
-        :type interpolator: str, list of str
-        :type frozen_particles: bool
-        :type acc_name: str
-
-        .. warning:: if not None, kcut must be a list of decreasing
-                     wavenumbers, since filtering is done sequentially
-                     on the same complex FFTW field.
-        """
-        if self.dtype == np.float32:
-            FFTW = 'fftwf'
-        elif self.dtype == np.float64:
-            FFTW = 'fftw'
-        s0 = self.particle_species
-        if type(integration_steps) == int:
-            integration_steps = [integration_steps]
-        if type(kcut) == str:
-            kcut = [kcut]
-        if type(interpolator) == str:
-            interpolator = [interpolator]
-        nspecies = max(len(integration_steps), len(interpolator))
-        if type(kcut) == list:
-            nspecies = max(nspecies, len(kcut))
-        if len(integration_steps) == 1:
-            integration_steps = [integration_steps[0] for s in range(nspecies)]
-        if len(interpolator) == 1:
-            interpolator = [interpolator[0] for s in range(nspecies)]
-        if type(kcut) == list:
-            if len(kcut) == 1:
-                kcut = [kcut[0] for s in range(nspecies)]
-        assert(len(integration_steps) == nspecies)
-        assert(len(interpolator) == nspecies)
-        if type(kcut) == list:
-            assert(len(kcut) == nspecies)
-        for s in range(nspecies):
-            neighbours = self.parameters[interpolator[s] + '_neighbours']
-            if type(kcut) == list:
-                self.parameters['tracers{0}_kcut'.format(s0 + s)] = kcut[s]
-            self.parameters['tracers{0}_interpolator'.format(s0 + s)] = interpolator[s]
-            self.parameters['tracers{0}_acc_on'.format(s0 + s)] = int(not type(acc_name) == type(None))
-            self.parameters['tracers{0}_integration_steps'.format(s0 + s)] = integration_steps[s]
-            self.file_datasets_grow += """
-                        //begincpp
-                        group = H5Gopen(particle_file, "/tracers{0}", H5P_DEFAULT);
-                        grow_particle_datasets(group, "", NULL, NULL);
-                        H5Gclose(group);
-                        //endcpp
-                        """.format(s0 + s)
-
-        #### code that outputs statistics
-        output_vel_acc = '{\n'
-        # array for putting sampled velocity in
-        # must compute velocity, just in case it was messed up by some
-        # other particle species before the stats
-        output_vel_acc += 'fs->compute_velocity(fs->cvorticity);\n'
-        if not type(kcut) == list:
-            output_vel_acc += 'fs->ift_velocity();\n'
-        if not type(acc_name) == type(None):
-            # array for putting sampled acceleration in
-            # must compute acceleration
-            output_vel_acc += 'fs->compute_Lagrangian_acceleration({0});\n'.format(acc_name)
-        for s in range(nspecies):
-            if type(kcut) == list:
-                output_vel_acc += 'fs->low_pass_Fourier(fs->cvelocity, 3, {0});\n'.format(kcut[s])
-                output_vel_acc += 'fs->ift_velocity();\n'
-            output_vel_acc += """
-                {0}->read_rFFTW(fs->rvelocity);
-                ps{1}->sample({0}, "velocity");
-                """.format(interpolator[s], s0 + s)
-            if not type(acc_name) == type(None):
-                output_vel_acc += """
-                    {0}->read_rFFTW({1});
-                    ps{2}->sample({0}, "acceleration");
-                    """.format(interpolator[s], acc_name, s0 + s)
-        output_vel_acc += '}\n'
-
-        #### initialize, stepping and finalize code
-        if not type(kcut) == list:
-            update_fields = ('fs->compute_velocity(fs->cvorticity);\n' +
-                             'fs->ift_velocity();\n')
-            self.particle_start += update_fields
-            self.particle_loop  += update_fields
-        else:
-            self.particle_loop += 'fs->compute_velocity(fs->cvorticity);\n'
-        self.particle_includes += '#include "{0}.hpp"\n'.format(class_name)
-        self.particle_stat_src += (
-                'if (ps0->iteration % niter_part == 0)\n' +
-                '{\n')
-        for s in range(nspecies):
-            neighbours = self.parameters[interpolator[s] + '_neighbours']
-            self.particle_start += 'sprintf(fname, "tracers{0}");\n'.format(s0 + s)
-            self.particle_end += ('ps{0}->write();\n' +
-                                  'delete ps{0};\n').format(s0 + s)
-            self.particle_variables += '{0}<VELOCITY_TRACER, {1}, {2}> *ps{3};\n'.format(
-                    class_name,
-                    self.C_dtype,
-                    neighbours,
-                    s0 + s)
-            self.particle_start += ('ps{0} = new {1}<VELOCITY_TRACER, {2}, {3}>(\n' +
-                                    'fname, particle_file, {4},\n' +
-                                    'niter_part, tracers{0}_integration_steps);\n').format(
-                                            s0 + s,
-                                            class_name,
-                                            self.C_dtype,
-                                            neighbours,
-                                            interpolator[s])
-            self.particle_start += ('ps{0}->dt = dt;\n' +
-                                    'ps{0}->iteration = iteration;\n' +
-                                    'ps{0}->read();\n').format(s0 + s)
-            if not frozen_particles:
-                if type(kcut) == list:
-                    update_field = ('fs->low_pass_Fourier(fs->cvelocity, 3, {0});\n'.format(kcut[s]) +
-                                    'fs->ift_velocity();\n')
-                    self.particle_loop += update_field
-                self.particle_loop += '{0}->read_rFFTW(fs->rvelocity);\n'.format(interpolator[s])
-                self.particle_loop += 'ps{0}->step();\n'.format(s0 + s)
-            self.particle_stat_src += 'ps{0}->write(false);\n'.format(s0 + s)
-        self.particle_stat_src += output_vel_acc
-        self.particle_stat_src += '}\n'
-        self.particle_species += nspecies
-        return None
-    def get_cache_file_name(self):
-        return os.path.join(self.work_dir, self.simname + '_cache.h5')
-    def get_cache_file(self):
-        return h5py.File(self.get_postprocess_file_name(), 'r')
-    def get_postprocess_file_name(self):
-        return self.get_cache_file_name()
-    def get_postprocess_file(self):
-        return h5py.File(self.get_postprocess_file_name(), 'r')
-    def compute_statistics(self, iter0 = 0, iter1 = None):
-        """Run basic postprocessing on raw data.
-        The energy spectrum :math:`E(t, k)` and the enstrophy spectrum
-        :math:`\\frac{1}{2}\omega^2(t, k)` are computed from the
-
-        .. math::
-
-            \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{u_i} \\hat{u_j}^*, \\hskip .5cm
-            \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{\omega_i} \\hat{\\omega_j}^*
-
-        tensors, and the enstrophy spectrum is also used to
-        compute the dissipation :math:`\\varepsilon(t)`.
-        These basic quantities are stored in a newly created HDF5 file,
-        ``simname_cache.h5``.
-        """
-        if len(list(self.statistics.keys())) > 0:
-            return None
-        if not os.path.exists(self.get_data_file_name()):
-            if os.path.exists(self.get_cache_file_name()):
-                self.read_parameters(fname = self.get_cache_file_name())
-                with self.get_cache_file() as pp_file:
-                    for k in ['t',
-                              'energy(t)',
-                              'energy(k)',
-                              'enstrophy(t)',
-                              'enstrophy(k)',
-                              'R_ij(t)',
-                              'vel_max(t)',
-                              'renergy(t)']:
-                        if k in pp_file.keys():
-                            self.statistics[k] = pp_file[k].value
-                    self.statistics['kM'] = pp_file['kspace/kM'].value
-                    self.statistics['dk'] = pp_file['kspace/dk'].value
-                    self.statistics['kshell'] = pp_file['kspace/kshell'].value
-                    self.statistics['nshell'] = pp_file['kspace/nshell'].value
-        else:
-            self.read_parameters()
-            with self.get_data_file() as data_file:
-                if 'moments' not in data_file['statistics'].keys():
-                    return None
-                iter0 = min((data_file['statistics/moments/velocity'].shape[0] *
-                             self.parameters['niter_stat']-1),
-                            iter0)
-                if type(iter1) == type(None):
-                    iter1 = data_file['iteration'].value
-                else:
-                    iter1 = min(data_file['iteration'].value, iter1)
-                ii0 = iter0 // self.parameters['niter_stat']
-                ii1 = iter1 // self.parameters['niter_stat']
-                self.statistics['kshell'] = data_file['kspace/kshell'].value
-                self.statistics['nshell'] = data_file['kspace/nshell'].value
-                for kk in [-1, -2]:
-                    if (self.statistics['kshell'][kk] == 0):
-                        self.statistics['kshell'][kk] = np.nan
-                self.statistics['kM'] = data_file['kspace/kM'].value
-                self.statistics['dk'] = data_file['kspace/dk'].value
-                computation_needed = True
-                pp_file = h5py.File(self.get_postprocess_file_name(), 'a')
-                if not ('parameters' in pp_file.keys()):
-                    data_file.copy('parameters', pp_file)
-                    data_file.copy('kspace', pp_file)
-                if 'ii0' in pp_file.keys():
-                    computation_needed =  not (ii0 == pp_file['ii0'].value and
-                                               ii1 == pp_file['ii1'].value)
-                    if computation_needed:
-                        for k in ['t', 'vel_max(t)', 'renergy(t)',
-                                  'energy(t)', 'enstrophy(t)',
-                                  'energy(k)', 'enstrophy(k)',
-                                  'energy(t, k)',
-                                  'enstrophy(t, k)',
-                                  'R_ij(t)',
-                                  'ii0', 'ii1', 'iter0', 'iter1']:
-                            if k in pp_file.keys():
-                                del pp_file[k]
-                if computation_needed:
-                    pp_file['iter0'] = iter0
-                    pp_file['iter1'] = iter1
-                    pp_file['ii0'] = ii0
-                    pp_file['ii1'] = ii1
-                    pp_file['t'] = (self.parameters['dt']*
-                                    self.parameters['niter_stat']*
-                                    (np.arange(ii0, ii1+1).astype(np.float)))
-                    phi_ij = data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1]
-                    pp_file['R_ij(t)'] = np.sum(phi_ij, axis = 1)
-                    energy_tk = (
-                        phi_ij[:, :, 0, 0] +
-                        phi_ij[:, :, 1, 1] +
-                        phi_ij[:, :, 2, 2])/2
-                    pp_file['energy(t)'] = np.sum(energy_tk, axis = 1)
-                    pp_file['energy(k)'] = np.mean(energy_tk, axis = 0)*(4*np.pi*self.statistics['kshell']**2) / (self.statistics['dk']*self.statistics['nshell'])
-                    enstrophy_tk = (
-                        data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] +
-                        data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] +
-                        data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2
-                    pp_file['enstrophy(t)'] = np.sum(enstrophy_tk, axis = 1)
-                    pp_file['enstrophy(k)'] = np.mean(enstrophy_tk, axis = 0)*(4*np.pi*self.statistics['kshell']**2) / (self.statistics['dk']*self.statistics['nshell'])
-                    pp_file['vel_max(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 9, 3]
-                    pp_file['renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2
-                    if 'trS2_Q_R' in data_file['statistics/moments'].keys():
-                        pp_file['mean_trS2(t)'] = data_file['statistics/moments/trS2_Q_R'][:, 1, 0]
-        for k in ['t',
-                  'energy(t)',
-                  'energy(k)',
-                  'enstrophy(t)',
-                  'enstrophy(k)',
-                  'R_ij(t)',
-                  'vel_max(t)',
-                  'renergy(t)',
-                  'mean_trS2(t)']:
-            if k in pp_file.keys():
-                self.statistics[k] = pp_file[k].value
-        # sanity check --- Parseval theorem check
-        assert(np.max(np.abs(
-                self.statistics['renergy(t)'] -
-                self.statistics['energy(t)']) / self.statistics['energy(t)']) < 1e-5)
-        self.compute_time_averages()
-        return None
-    def compute_Reynolds_stress_invariants(
-            self):
-        Rij = self.statistics['R_ij(t)']
-        Rij /= (2*self.statistics['energy(t)'][:, None, None])
-        Rij[:, 0, 0] -= 1./3
-        Rij[:, 1, 1] -= 1./3
-        Rij[:, 2, 2] -= 1./3
-        self.statistics['I2(t)'] = np.sqrt(np.einsum('...ij,...ij', Rij, Rij, optimize = True) / 6)
-        self.statistics['I3(t)'] = np.cbrt(np.einsum('...ij,...jk,...ki', Rij, Rij, Rij, optimize = True) / 6)
-        return None
-    def compute_time_averages(self):
-        """Compute easy stats.
-
-        Further computation of statistics based on the contents of
-        ``simname_cache.h5``.
-        Standard quantities are as follows
-        (consistent with [Ishihara]_):
-
-        .. math::
-
-            U_{\\textrm{int}}(t) = \\sqrt{\\frac{2E(t)}{3}}, \\hskip .5cm
-            L_{\\textrm{int}} = \\frac{\pi}{2U_{int}^2} \\int \\frac{dk}{k} E(k), \\hskip .5cm
-            T_{\\textrm{int}} =
-            \\frac{L_{\\textrm{int}}}{U_{\\textrm{int}}}
-
-            \\eta_K = \\left(\\frac{\\nu^3}{\\varepsilon}\\right)^{1/4}, \\hskip .5cm
-            \\tau_K = \\left(\\frac{\\nu}{\\varepsilon}\\right)^{1/2}, \\hskip .5cm
-            \\lambda = \\sqrt{\\frac{15 \\nu U_{\\textrm{int}}^2}{\\varepsilon}}
-
-            Re = \\frac{U_{\\textrm{int}} L_{\\textrm{int}}}{\\nu}, \\hskip
-            .5cm
-            R_{\\lambda} = \\frac{U_{\\textrm{int}} \\lambda}{\\nu}
-
-        .. [Ishihara] T. Ishihara et al,
-                      *Small-scale statistics in high-resolution direct numerical
-                      simulation of turbulence: Reynolds number dependence of
-                      one-point velocity gradient statistics*.
-                      J. Fluid Mech.,
-                      **592**, 335-366, 2007
-        """
-        self.statistics['Uint(t)'] = np.sqrt(2*self.statistics['energy(t)'] / 3)
-        for key in ['energy',
-                    'enstrophy',
-                    'mean_trS2',
-                    'Uint']:
-            if key + '(t)' in self.statistics.keys():
-                self.statistics[key] = np.average(self.statistics[key + '(t)'], axis = 0)
-        self.statistics['vel_max'] = np.max(self.statistics['vel_max(t)'])
-        for suffix in ['', '(t)']:
-            self.statistics['diss'    + suffix] = (self.parameters['nu'] *
-                                                   self.statistics['enstrophy' + suffix]*2)
-            self.statistics['etaK'    + suffix] = (self.parameters['nu']**3 /
-                                                   self.statistics['diss' + suffix])**.25
-            self.statistics['tauK'    + suffix] =  (self.parameters['nu'] /
-                                                    self.statistics['diss' + suffix])**.5
-            self.statistics['lambda' + suffix] = (15 * self.parameters['nu'] *
-                                                  self.statistics['Uint' + suffix]**2 /
-                                                  self.statistics['diss' + suffix])**.5
-            self.statistics['Rlambda' + suffix] = (self.statistics['Uint' + suffix] *
-                                                   self.statistics['lambda' + suffix] /
-                                                   self.parameters['nu'])
-            self.statistics['kMeta' + suffix] = (self.statistics['kM'] *
-                                                 self.statistics['etaK' + suffix])
-            if self.parameters['dealias_type'] == 1:
-                self.statistics['kMeta' + suffix] *= 0.8
-        self.statistics['Lint'] = ((np.pi /
-                                    (2*self.statistics['Uint']**2)) *
-                                   np.nansum(self.statistics['energy(k)'] /
-                                                self.statistics['kshell']))
-        self.statistics['Re'] = (self.statistics['Uint'] *
-                                 self.statistics['Lint'] /
-                                 self.parameters['nu'])
-        self.statistics['Tint'] = self.statistics['Lint'] / self.statistics['Uint']
-        self.statistics['Taylor_microscale'] = self.statistics['lambda']
-        return None
-    def set_plt_style(
-            self,
-            style = {'dashes' : (None, None)}):
-        self.style.update(style)
-        return None
-    def read_cfield(
-            self,
-            field_name = 'vorticity',
-            iteration = 0):
-        """read the Fourier representation of a vector field.
-
-        Read the binary file containing iteration ``iteration`` of the
-        field ``field_name``, and return it as a properly shaped
-        ``numpy.memmap`` object.
-        """
-        return np.memmap(
-                os.path.join(self.work_dir,
-                             self.simname + '_{0}_i{1:0>5x}'.format('c' + field_name, iteration)),
-                dtype = self.ctype,
-                mode = 'r',
-                shape = (self.parameters['ny'],
-                         self.parameters['nz'],
-                         self.parameters['nx']//2+1,
-                         3))
-    def write_par(
-            self,
-            iter0 = 0,
-            particle_ic = None):
-        _fluid_particle_base.write_par(self, iter0 = iter0)
-        with h5py.File(self.get_data_file_name(), 'r+') as ofile:
-            kspace = self.get_kspace()
-            nshells = kspace['nshell'].shape[0]
-            vec_stat_datasets = ['velocity', 'vorticity']
-            scal_stat_datasets = []
-            for k in vec_stat_datasets:
-                time_chunk = 2**20 // (
-                        self.dtype.itemsize*3*
-                        self.parameters['nx']*self.parameters['ny'])
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/0slices/' + k + '/real',
-                                     (1, self.parameters['ny'], self.parameters['nx'], 3),
-                                     chunks = (time_chunk, self.parameters['ny'], self.parameters['nx'], 3),
-                                     maxshape = (None, self.parameters['ny'], self.parameters['nx'], 3),
-                                     dtype = self.dtype)
-            if self.Lag_acc_stats_on:
-                vec_stat_datasets += ['Lagrangian_acceleration']
-                scal_stat_datasets += ['pressure']
-            for k in vec_stat_datasets:
-                time_chunk = 2**20//(8*3*3*nshells)
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/spectra/' + k + '_' + k,
-                                     (1, nshells, 3, 3),
-                                     chunks = (time_chunk, nshells, 3, 3),
-                                     maxshape = (None, nshells, 3, 3),
-                                     dtype = np.float64)
-                time_chunk = 2**20//(8*4*10)
-                time_chunk = max(time_chunk, 1)
-                a = ofile.create_dataset('statistics/moments/' + k,
-                                     (1, 10, 4),
-                                     chunks = (time_chunk, 10, 4),
-                                     maxshape = (None, 10, 4),
-                                     dtype = np.float64)
-                time_chunk = 2**20//(8*4*self.parameters['histogram_bins'])
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/histograms/' + k,
-                                     (1,
-                                      self.parameters['histogram_bins'],
-                                      4),
-                                     chunks = (time_chunk,
-                                               self.parameters['histogram_bins'],
-                                               4),
-                                     maxshape = (None,
-                                                 self.parameters['histogram_bins'],
-                                                 4),
-                                     dtype = np.int64)
-            for k in scal_stat_datasets:
-                time_chunk = 2**20//(8*nshells)
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/spectra/' + k + '_' + k,
-                                     (1, nshells),
-                                     chunks = (time_chunk, nshells),
-                                     maxshape = (None, nshells),
-                                     dtype = np.float64)
-                time_chunk = 2**20//(8*10)
-                time_chunk = max(time_chunk, 1)
-                a = ofile.create_dataset('statistics/moments/' + k,
-                                     (1, 10),
-                                     chunks = (time_chunk, 10),
-                                     maxshape = (None, 10),
-                                     dtype = np.float64)
-                time_chunk = 2**20//(8*self.parameters['histogram_bins'])
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/histograms/' + k,
-                                     (1,
-                                      self.parameters['histogram_bins']),
-                                     chunks = (time_chunk,
-                                               self.parameters['histogram_bins']),
-                                     maxshape = (None,
-                                                 self.parameters['histogram_bins']),
-                                     dtype = np.int64)
-            if self.QR_stats_on:
-                time_chunk = 2**20//(8*3*self.parameters['histogram_bins'])
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/histograms/trS2_Q_R',
-                                     (1,
-                                      self.parameters['histogram_bins'],
-                                      3),
-                                     chunks = (time_chunk,
-                                               self.parameters['histogram_bins'],
-                                               3),
-                                     maxshape = (None,
-                                                 self.parameters['histogram_bins'],
-                                                 3),
-                                     dtype = np.int64)
-                time_chunk = 2**20//(8*9*self.parameters['histogram_bins'])
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/histograms/velocity_gradient',
-                                     (1,
-                                      self.parameters['histogram_bins'],
-                                      3,
-                                      3),
-                                     chunks = (time_chunk,
-                                               self.parameters['histogram_bins'],
-                                               3,
-                                               3),
-                                     maxshape = (None,
-                                                 self.parameters['histogram_bins'],
-                                                 3,
-                                                 3),
-                                     dtype = np.int64)
-                time_chunk = 2**20//(8*3*10)
-                time_chunk = max(time_chunk, 1)
-                a = ofile.create_dataset('statistics/moments/trS2_Q_R',
-                                     (1, 10, 3),
-                                     chunks = (time_chunk, 10, 3),
-                                     maxshape = (None, 10, 3),
-                                     dtype = np.float64)
-                time_chunk = 2**20//(8*9*10)
-                time_chunk = max(time_chunk, 1)
-                a = ofile.create_dataset('statistics/moments/velocity_gradient',
-                                     (1, 10, 3, 3),
-                                     chunks = (time_chunk, 10, 3, 3),
-                                     maxshape = (None, 10, 3, 3),
-                                     dtype = np.float64)
-                time_chunk = 2**20//(8*self.parameters['QR2D_histogram_bins']**2)
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/histograms/QR2D',
-                                     (1,
-                                      self.parameters['QR2D_histogram_bins'],
-                                      self.parameters['QR2D_histogram_bins']),
-                                     chunks = (time_chunk,
-                                               self.parameters['QR2D_histogram_bins'],
-                                               self.parameters['QR2D_histogram_bins']),
-                                     maxshape = (None,
-                                                 self.parameters['QR2D_histogram_bins'],
-                                                 self.parameters['QR2D_histogram_bins']),
-                                     dtype = np.int64)
-        if self.particle_species == 0:
-            return None
-
-        if type(particle_ic) == type(None):
-            pbase_shape = (self.parameters['nparticles'],)
-            number_of_particles = self.parameters['nparticles']
-        else:
-            pbase_shape = particle_ic.shape[:-1]
-            assert(particle_ic.shape[-1] == 3)
-            if len(pbase_shape) == 1:
-                number_of_particles = pbase_shape[0]
-            else:
-                number_of_particles = 1
-                for val in pbase_shape[1:]:
-                    number_of_particles *= val
-
-        with h5py.File(self.get_particle_file_name(), 'a') as ofile:
-            for s in range(self.particle_species):
-                ofile.create_group('tracers{0}'.format(s))
-                time_chunk = 2**20 // (8*3*number_of_particles)
-                time_chunk = max(time_chunk, 1)
-                dims = ((1,
-                         self.parameters['tracers{0}_integration_steps'.format(s)]) +
-                        pbase_shape + (3,))
-                maxshape = (h5py.h5s.UNLIMITED,) + dims[1:]
-                if len(pbase_shape) > 1:
-                    chunks = (time_chunk, 1, 1) + dims[3:]
-                else:
-                    chunks = (time_chunk, 1) + dims[2:]
-                bfps.tools.create_alloc_early_dataset(
-                        ofile,
-                        '/tracers{0}/rhs'.format(s),
-                        dims, maxshape, chunks)
-                if len(pbase_shape) > 1:
-                    chunks = (time_chunk, 1) + pbase_shape[1:] + (3,)
-                else:
-                    chunks = (time_chunk, pbase_shape[0], 3)
-                bfps.tools.create_alloc_early_dataset(
-                        ofile,
-                        '/tracers{0}/state'.format(s),
-                        (1,) + pbase_shape + (3,),
-                        (h5py.h5s.UNLIMITED,) + pbase_shape + (3,),
-                        chunks)
-                # "velocity" is sampled, single precision is enough
-                # for the results we are interested in.
-                bfps.tools.create_alloc_early_dataset(
-                        ofile,
-                        '/tracers{0}/velocity'.format(s),
-                        (1,) + pbase_shape + (3,),
-                        (h5py.h5s.UNLIMITED,) + pbase_shape + (3,),
-                        chunks,
-                        dset_dtype = h5py.h5t.IEEE_F32LE)
-                if self.parameters['tracers{0}_acc_on'.format(s)]:
-                    bfps.tools.create_alloc_early_dataset(
-                            ofile,
-                            '/tracers{0}/acceleration'.format(s),
-                            (1,) + pbase_shape + (3,),
-                            (h5py.h5s.UNLIMITED,) + pbase_shape + (3,),
-                            chunks,
-                            dset_dtype = h5py.h5t.IEEE_F32LE)
-        return None
-    def add_particle_fields(
-            self,
-            interp_type = 'spline',
-            kcut = None,
-            neighbours = 1,
-            smoothness = 1,
-            name = 'particle_field',
-            field_class = 'rFFTW_interpolator',
-            acc_field_name = 'rFFTW_acc'):
-        self.fluid_includes += '#include "{0}.hpp"\n'.format(field_class)
-        self.fluid_variables += field_class + '<{0}, {1}> *vel_{2}, *acc_{2};\n'.format(
-                self.C_dtype, neighbours, name)
-        self.parameters[name + '_type'] = interp_type
-        self.parameters[name + '_neighbours'] = neighbours
-        if interp_type == 'spline':
-            self.parameters[name + '_smoothness'] = smoothness
-            beta_name = 'beta_n{0}_m{1}'.format(neighbours, smoothness)
-        elif interp_type == 'Lagrange':
-            beta_name = 'beta_Lagrange_n{0}'.format(neighbours)
-        if field_class == 'rFFTW_interpolator':
-            self.fluid_start += ('vel_{0} = new {1}<{2}, {3}>(fs, {4}, fs->rvelocity);\n' +
-                                 'acc_{0} = new {1}<{2}, {3}>(fs, {4}, {5});\n').format(name,
-                                                                                   field_class,
-                                                                                   self.C_dtype,
-                                                                                   neighbours,
-                                                                                   beta_name,
-                                                                                   acc_field_name)
-        elif field_class == 'interpolator':
-            self.fluid_start += ('vel_{0} = new {1}<{2}, {3}>(fs, {4});\n' +
-                                 'acc_{0} = new {1}<{2}, {3}>(fs, {4});\n').format(name,
-                                                                                   field_class,
-                                                                                   self.C_dtype,
-                                                                                   neighbours,
-                                                                                   beta_name,
-                                                                                   acc_field_name)
-        self.fluid_end += ('delete vel_{0};\n' +
-                           'delete acc_{0};\n').format(name)
-        update_fields = 'fs->compute_velocity(fs->cvorticity);\n'
-        if not type(kcut) == type(None):
-            update_fields += 'fs->low_pass_Fourier(fs->cvelocity, 3, {0});\n'.format(kcut)
-        update_fields += ('fs->ift_velocity();\n' +
-                          'fs->compute_Lagrangian_acceleration(acc_{0}->field);\n').format(name)
-        self.fluid_start += update_fields
-        self.fluid_loop += update_fields
-        return None
-    def specific_parser_arguments(
-            self,
-            parser):
-        _fluid_particle_base.specific_parser_arguments(self, parser)
-        parser.add_argument(
-                '--src-wd',
-                type = str,
-                dest = 'src_work_dir',
-                default = '')
-        parser.add_argument(
-                '--src-simname',
-                type = str,
-                dest = 'src_simname',
-                default = '')
-        parser.add_argument(
-                '--src-iteration',
-                type = int,
-                dest = 'src_iteration',
-                default = 0)
-        parser.add_argument(
-               '--njobs',
-               type = int, dest = 'njobs',
-               default = 1)
-        parser.add_argument(
-               '--QR-stats',
-               action = 'store_true',
-               dest = 'QR_stats',
-               help = 'add this option if you want to compute velocity gradient and QR stats')
-        parser.add_argument(
-               '--Lag-acc-stats',
-               action = 'store_true',
-               dest = 'Lag_acc_stats',
-               help = 'add this option if you want to compute Lagrangian acceleration statistics')
-        parser.add_argument(
-               '--kMeta',
-               type = float,
-               dest = 'kMeta',
-               default = 2.0)
-        parser.add_argument(
-               '--dtfactor',
-               type = float,
-               dest = 'dtfactor',
-               default = 0.5,
-               help = 'dt is computed as DTFACTOR / N')
-        parser.add_argument(
-               '--particle-rand-seed',
-               type = int,
-               dest = 'particle_rand_seed',
-               default = None)
-        parser.add_argument(
-               '--pclouds',
-               type = int,
-               dest = 'pclouds',
-               default = 1,
-               help = ('number of particle clouds. Particle "clouds" '
-                       'consist of particles distributed according to '
-                       'pcloud-type.'))
-        parser.add_argument(
-                '--pcloud-type',
-                choices = ['random-cube',
-                           'regular-cube'],
-                dest = 'pcloud_type',
-                default = 'random-cube')
-        parser.add_argument(
-               '--particle-cloud-size',
-               type = float,
-               dest = 'particle_cloud_size',
-               default = 2*np.pi)
-        parser.add_argument(
-                '--neighbours',
-                type = int,
-                dest = 'neighbours',
-                default = 1)
-        parser.add_argument(
-                '--smoothness',
-                type = int,
-                dest = 'smoothness',
-                default = 1)
-        return None
-    def prepare_launch(
-            self,
-            args = []):
-        """Set up reasonable parameters.
-
-        With the default Lundgren forcing applied in the band [2, 4],
-        we can estimate the dissipation, therefore we can estimate
-        :math:`k_M \\eta_K` and constrain the viscosity.
-        Also, if velocity gradient statistics are computed, the
-        dissipation is used for estimating the bins of the QR histogram.
-
-        In brief, the command line parameter :math:`k_M \\eta_K` is
-        used in the following formula for :math:`\\nu` (:math:`N` is the
-        number of real space grid points per coordinate):
-
-        .. math::
-
-            \\nu = \\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/3}
-
-        With this choice, the average dissipation :math:`\\varepsilon`
-        will be close to 0.4, and the integral scale velocity will be
-        close to 0.77, yielding the approximate value for the Taylor
-        microscale and corresponding Reynolds number:
-
-        .. math::
-
-            \\lambda \\approx 4.75\\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/6}, \\hskip .5in
-            R_\\lambda \\approx 3.7 \\left(\\frac{N}{2 k_M \\eta_K} \\right)^{4/6}
-
-        """
-        opt = _code.prepare_launch(self, args = args)
-        self.QR_stats_on = opt.QR_stats
-        self.Lag_acc_stats_on = opt.Lag_acc_stats
-        self.parameters['nu'] = (opt.kMeta * 2 / opt.n)**(4./3)
-        self.parameters['dt'] = (opt.dtfactor / opt.n)
-        # custom famplitude for 288 and 576
-        if opt.n == 288:
-            self.parameters['famplitude'] = 0.45
-        elif opt.n == 576:
-            self.parameters['famplitude'] = 0.47
-        if ((self.parameters['niter_todo'] % self.parameters['niter_out']) != 0):
-            self.parameters['niter_out'] = self.parameters['niter_todo']
-        if self.QR_stats_on:
-            # max_Q_estimate and max_R_estimate are just used for the 2D pdf
-            # therefore I just want them to be small multiples of mean trS2
-            # I'm already estimating the dissipation with kMeta...
-            meantrS2 = (opt.n//2 / opt.kMeta)**4 * self.parameters['nu']**2
-            self.parameters['max_Q_estimate'] = meantrS2
-            self.parameters['max_R_estimate'] = .4*meantrS2**1.5
-            # add QR suffix to code name, since we now expect additional
-            # datasets in the .h5 file
-            self.name += '-QR'
-        if self.Lag_acc_stats_on:
-            self.name += '-Lag_acc'
-        if len(opt.src_work_dir) == 0:
-            opt.src_work_dir = os.path.realpath(opt.work_dir)
-        self.pars_from_namespace(opt)
-        return opt
-    def launch(
-            self,
-            args = [],
-            noparticles = False,
-            **kwargs):
-        opt = self.prepare_launch(args = args)
-        self.fill_up_fluid_code()
-        if noparticles:
-            opt.nparticles = 0
-        elif type(opt.nparticles) == int:
-            if opt.nparticles > 0:
-                self.name += '-particles'
-                self.add_3D_rFFTW_field(
-                        name = 'rFFTW_acc')
-                self.add_interpolator(
-                        name = 'cubic_spline',
-                        neighbours = opt.neighbours,
-                        smoothness = opt.smoothness,
-                        class_name = 'rFFTW_interpolator')
-                self.add_particles(
-                        integration_steps = [4],
-                        interpolator = 'cubic_spline',
-                        acc_name = 'rFFTW_acc',
-                        class_name = 'rFFTW_distributed_particles')
-                self.variables += 'hid_t particle_file;\n'
-                self.main_start += """
-                    if (myrank == 0)
-                    {
-                        // set caching parameters
-                        hid_t fapl = H5Pcreate(H5P_FILE_ACCESS);
-                        herr_t cache_err = H5Pset_cache(fapl, 0, 521, 134217728, 1.0);
-                        DEBUG_MSG("when setting cache for particles I got %d\\n", cache_err);
-                        sprintf(fname, "%s_particles.h5", simname);
-                        particle_file = H5Fopen(fname, H5F_ACC_RDWR, fapl);
-                    }
-                    """
-                self.main_end = ('if (myrank == 0)\n' +
-                                 '{\n' +
-                                 'H5Fclose(particle_file);\n' +
-                                 '}\n') + self.main_end
-        self.finalize_code()
-        self.launch_jobs(opt = opt, **kwargs)
-        return None
-    def launch_jobs(
-            self,
-            opt = None,
-            particle_initial_condition = None):
-        if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')):
-            if opt.pclouds > 1:
-                np.random.seed(opt.particle_rand_seed)
-                if opt.pcloud_type == 'random-cube':
-                    particle_initial_condition = (
-                        np.random.random((opt.pclouds, 1, 3))*2*np.pi +
-                        np.random.random((1, self.parameters['nparticles'], 3))*opt.particle_cloud_size)
-                elif opt.pcloud_type == 'regular-cube':
-                    onedarray = np.linspace(
-                            -opt.particle_cloud_size/2,
-                            opt.particle_cloud_size/2,
-                            self.parameters['nparticles'])
-                    particle_initial_condition = np.zeros(
-                            (opt.pclouds,
-                             self.parameters['nparticles'],
-                             self.parameters['nparticles'],
-                             self.parameters['nparticles'], 3),
-                            dtype = np.float64)
-                    particle_initial_condition[:] = \
-                        np.random.random((opt.pclouds, 1, 1, 1, 3))*2*np.pi
-                    particle_initial_condition[..., 0] += onedarray[None, None, None, :]
-                    particle_initial_condition[..., 1] += onedarray[None, None, :, None]
-                    particle_initial_condition[..., 2] += onedarray[None, :, None, None]
-            self.write_par(
-                    particle_ic = particle_initial_condition)
-            if self.parameters['nparticles'] > 0:
-                data = self.generate_tracer_state(
-                        species = 0,
-                        rseed = opt.particle_rand_seed,
-                        data = particle_initial_condition)
-                for s in range(1, self.particle_species):
-                    self.generate_tracer_state(species = s, data = data)
-            init_condition_file = os.path.join(
-                    self.work_dir,
-                    self.simname + '_cvorticity_i{0:0>5x}'.format(0))
-            if not os.path.exists(init_condition_file):
-                if len(opt.src_simname) > 0:
-                    src_file = os.path.join(
-                            os.path.realpath(opt.src_work_dir),
-                            opt.src_simname + '_cvorticity_i{0:0>5x}'.format(opt.src_iteration))
-                    os.symlink(src_file, init_condition_file)
-                else:
-                   self.generate_vector_field(
-                           write_to_file = True,
-                           spectra_slope = 2.0,
-                           amplitude = 0.05)
-        self.run(
-                nb_processes = opt.nb_processes,
-                nb_threads_per_process = opt.nb_threads_per_process,
-                njobs = opt.njobs,
-                hours = opt.minutes // 60,
-                minutes = opt.minutes % 60,
-                no_submit = opt.no_submit)
-        return None
-
diff --git a/bfps/PP.py b/bfps/PP.py
index 27a359287dca65f01f2a66eaaef1fe56c13862fc..5716a7fe793c71413b823e4aad10dc6886294ef4 100644
--- a/bfps/PP.py
+++ b/bfps/PP.py
@@ -118,6 +118,7 @@ class PP(_code):
         return None
     def generate_default_parameters(self):
         # these parameters are relevant for all PP classes
+        self.parameters['fftw_plan_rigor'] = 'FFTW_ESTIMATE'
         self.parameters['dealias_type'] = int(1)
         self.parameters['dkx'] = float(1.0)
         self.parameters['dky'] = float(1.0)
diff --git a/bfps/TEST.py b/bfps/TEST.py
index cd4d3e4a82874c53b9dff5134e1a1da0067a61a7..66b2b4aad6b308c78735005f55765bfa3d3eb98c 100644
--- a/bfps/TEST.py
+++ b/bfps/TEST.py
@@ -119,6 +119,7 @@ class TEST(_code):
         return None
     def generate_default_parameters(self):
         # these parameters are relevant for all TEST classes
+        self.parameters['fftw_plan_rigor'] = 'FFTW_ESTIMATE'
         self.parameters['dealias_type'] = int(1)
         self.parameters['dkx'] = float(1.0)
         self.parameters['dky'] = float(1.0)
diff --git a/bfps/__init__.py b/bfps/__init__.py
index 29dc62a16cafafa3361480ce0bc7904fc92bc521..babbc203cc6ea1f788458415147d4a48f3b328f1 100644
--- a/bfps/__init__.py
+++ b/bfps/__init__.py
@@ -49,8 +49,5 @@ from host_information import host_info
 from .DNS import DNS
 from .PP import PP
 from .TEST import TEST
-from .FluidConvert import FluidConvert
-from .NavierStokes import NavierStokes
-from .NSVorticityEquation import NSVorticityEquation
 
 #import test
diff --git a/bfps/__main__.py b/bfps/__main__.py
index 03f68a971351ea8eb23eb6621c597e660dc7c825..16a7cf7d099c49a39368a8ff09cb05bf890feb6f 100644
--- a/bfps/__main__.py
+++ b/bfps/__main__.py
@@ -31,10 +31,6 @@ import bfps
 from .DNS import DNS
 from .PP import PP
 from .TEST import TEST
-from .NavierStokes import NavierStokes
-from .NSVorticityEquation import NSVorticityEquation
-from .FluidConvert import FluidConvert
-from .NSManyParticles import NSManyParticles
 
 def main():
     parser = argparse.ArgumentParser(prog = 'bfps')
@@ -42,29 +38,9 @@ def main():
             '-v', '--version',
             action = 'version',
             version = '%(prog)s ' + bfps.__version__)
-    NSoptions = ['NavierStokes',
-                 'NavierStokes-single',
-                 'NavierStokes-double',
-                 'NS',
-                 'NS-single',
-                 'NS-double']
-    NSVEoptions = ['NSVorticityEquation',
-                 'NSVorticityEquation-single',
-                 'NSVorticityEquation-double',
-                 'NSVE',
-                 'NSVE-single',
-                 'NSVE-double']
-    FCoptions = ['FluidConvert']
-    NSMPopt = ['NSManyParticles',
-               'NSManyParticles-single',
-               'NSManyParticles-double']
     parser.add_argument(
             'base_class',
-            choices = ['DNS', 'PP', 'TEST'] +
-                      NSoptions +
-                      NSVEoptions +
-                      FCoptions +
-                      NSMPopt,
+            choices = ['DNS', 'PP', 'TEST'],
             type = str)
     # first option is the choice of base class or -h or -v
     # all other options are passed on to the base_class instance
@@ -73,29 +49,10 @@ def main():
     # cannot be executed by mistake.
     if opt.base_class == 'DNS':
         c = DNS()
-        c.launch(args = sys.argv[2:])
-        return None
     if opt.base_class == 'PP':
         c = PP()
-        c.launch(args = sys.argv[2:])
-        return None
     if opt.base_class == 'TEST':
         c = TEST()
-        c.launch(args = sys.argv[2:])
-        return None
-    if 'double' in opt.base_class:
-        precision = 'double'
-    else:
-        precision = 'single'
-    if opt.base_class in NSoptions:
-        base_class = NavierStokes
-    if opt.base_class in NSVEoptions:
-        base_class = NSVorticityEquation
-    elif opt.base_class in FCoptions:
-        base_class = FluidConvert
-    elif opt.base_class in NSMPopt:
-        base_class = NSManyParticles
-    c = base_class(fluid_precision = precision)
     c.launch(args = sys.argv[2:])
     return None
 
diff --git a/bfps/_code.py b/bfps/_code.py
index fed603e3da2b6fe7e32a1dd398d59cdd38a5b49b..143ef29a5d4fc9e3da1c9c00b8e4df915b532beb 100644
--- a/bfps/_code.py
+++ b/bfps/_code.py
@@ -443,7 +443,7 @@ class _code(_base):
         script_file.write('mpiexec.hydra '
             + ' -np {} '.format(nb_mpi_processes)
             + ' -ppn {} '.format(nb_processes_per_node)
-            + ' -ordered-output -prepend-rank '
+            #+ ' -ordered-output -prepend-rank '
             + os.path.join(
                 self.work_dir,
                 command_atoms[0]) +
diff --git a/bfps/_fluid_base.py b/bfps/_fluid_base.py
deleted file mode 100644
index 757e6cb81e6c605cbcb3c2e9d19bd7487add115f..0000000000000000000000000000000000000000
--- a/bfps/_fluid_base.py
+++ /dev/null
@@ -1,503 +0,0 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
-
-
-
-from ._code import _code
-from bfps import tools
-
-import os
-import numpy as np
-import h5py
-
-class _fluid_particle_base(_code):
-    """This class is meant to put together all common code between the
-    different C++ solvers/postprocessing tools, so that development of
-    specific functionalities is not overwhelming.
-    """
-    def __init__(
-            self,
-            name = 'solver',
-            work_dir = './',
-            simname = 'test',
-            dtype = np.float32,
-            use_fftw_wisdom = True):
-        _code.__init__(
-                self,
-                work_dir = work_dir,
-                simname = simname)
-        self.use_fftw_wisdom = use_fftw_wisdom
-        self.name = name
-        self.particle_species = 0
-        if dtype in [np.float32, np.float64]:
-            self.dtype = dtype
-        elif dtype in ['single', 'double']:
-            if dtype == 'single':
-                self.dtype = np.dtype(np.float32)
-            elif dtype == 'double':
-                self.dtype = np.dtype(np.float64)
-        self.rtype = self.dtype
-        if self.rtype == np.float32:
-            self.ctype = np.dtype(np.complex64)
-            self.C_dtype = 'float'
-        elif self.rtype == np.float64:
-            self.ctype = np.dtype(np.complex128)
-            self.C_dtype = 'double'
-        self.parameters['dealias_type'] = 1
-        self.parameters['dkx'] = 1.0
-        self.parameters['dky'] = 1.0
-        self.parameters['dkz'] = 1.0
-        self.parameters['niter_todo'] = 8
-        self.parameters['niter_part'] = 1
-        self.parameters['niter_stat'] = 1
-        self.parameters['niter_out'] = 1024
-        self.parameters['nparticles'] = 0
-        self.parameters['dt'] = 0.01
-        self.fluid_includes = '#include "fluid_solver.hpp"\n'
-        self.fluid_includes = '#include "field.hpp"\n'
-        self.fluid_variables = ''
-        self.fluid_definitions = ''
-        self.fluid_start = ''
-        self.fluid_loop = ''
-        self.fluid_end  = ''
-        self.fluid_output = ''
-        self.stat_src = ''
-        self.particle_includes = ''
-        self.particle_variables = ''
-        self.particle_definitions = ''
-        self.particle_start = ''
-        self.particle_loop = ''
-        self.particle_output = ''
-        self.particle_end  = ''
-        self.particle_stat_src = ''
-        self.file_datasets_grow   = ''
-        self.store_kspace = """
-                //begincpp
-                if (myrank == 0 && iteration == 0)
-                {
-                    TIMEZONE("fuild_base::store_kspace");
-                    hsize_t dims[4];
-                    hid_t space, dset;
-                    // store kspace information
-                    hid_t parameter_file = stat_file;
-                    //char fname[256];
-                    //sprintf(fname, "%s.h5", simname);
-                    //parameter_file = H5Fopen(fname, H5F_ACC_RDWR, H5P_DEFAULT);
-                    dset = H5Dopen(parameter_file, "/kspace/kshell", H5P_DEFAULT);
-                    space = H5Dget_space(dset);
-                    H5Sget_simple_extent_dims(space, dims, NULL);
-                    H5Sclose(space);
-                    if (fs->nshells != dims[0])
-                    {
-                        DEBUG_MSG(
-                            "ERROR: computed nshells %d not equal to data file nshells %d\\n",
-                            fs->nshells, dims[0]);
-                    }
-                    H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, fs->kshell);
-                    H5Dclose(dset);
-                    dset = H5Dopen(parameter_file, "/kspace/nshell", H5P_DEFAULT);
-                    H5Dwrite(dset, H5T_NATIVE_INT64, H5S_ALL, H5S_ALL, H5P_DEFAULT, fs->nshell);
-                    H5Dclose(dset);
-                    dset = H5Dopen(parameter_file, "/kspace/kM", H5P_DEFAULT);
-                    H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kMspec);
-                    H5Dclose(dset);
-                    dset = H5Dopen(parameter_file, "/kspace/dk", H5P_DEFAULT);
-                    H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->dk);
-                    H5Dclose(dset);
-                    //H5Fclose(parameter_file);
-                }
-                //endcpp
-                """
-        return None
-    def get_data_file_name(self):
-        return os.path.join(self.work_dir, self.simname + '.h5')
-    def get_data_file(self):
-        return h5py.File(self.get_data_file_name(), 'r')
-    def get_particle_file_name(self):
-        return os.path.join(self.work_dir, self.simname + '_particles.h5')
-    def get_particle_file(self):
-        return h5py.File(self.get_particle_file_name(), 'r')
-    def finalize_code(
-            self,
-            postprocess_mode = False):
-        self.includes   += self.fluid_includes
-        self.includes   += '#include <ctime>\n'
-        self.variables  += self.fluid_variables
-        self.definitions += ('int grow_single_dataset(hid_t dset, int tincrement)\n{\n' +
-                             'int ndims;\n' +
-                             'hsize_t space;\n' +
-                             'space = H5Dget_space(dset);\n' +
-                             'ndims = H5Sget_simple_extent_ndims(space);\n' +
-                             'hsize_t *dims = new hsize_t[ndims];\n' +
-                             'H5Sget_simple_extent_dims(space, dims, NULL);\n' +
-                             'dims[0] += tincrement;\n' +
-                             'H5Dset_extent(dset, dims);\n' +
-                             'H5Sclose(space);\n' +
-                             'delete[] dims;\n' +
-                             'return EXIT_SUCCESS;\n}\n')
-        self.definitions+= self.fluid_definitions
-        if self.particle_species > 0:
-            self.includes    += self.particle_includes
-            self.variables   += self.particle_variables
-            self.definitions += self.particle_definitions
-        self.definitions += ('herr_t grow_statistics_dataset(hid_t o_id, const char *name, const H5O_info_t *info, void *op_data)\n{\n' +
-                             'if (info->type == H5O_TYPE_DATASET)\n{\n' +
-                             'hsize_t dset = H5Dopen(o_id, name, H5P_DEFAULT);\n' +
-                             'grow_single_dataset(dset, niter_todo/niter_stat);\n'
-                             'H5Dclose(dset);\n}\n' +
-                             'return 0;\n}\n')
-        self.definitions += ('herr_t grow_particle_datasets(hid_t g_id, const char *name, const H5L_info_t *info, void *op_data)\n{\n' +
-                             'hsize_t dset;\n')
-        for key in ['state', 'velocity', 'acceleration']:
-            self.definitions += ('if (H5Lexists(g_id, "{0}", H5P_DEFAULT))\n'.format(key) +
-                                 '{\n' +
-                                 'dset = H5Dopen(g_id, "{0}", H5P_DEFAULT);\n'.format(key) +
-                                 'grow_single_dataset(dset, niter_todo/niter_part);\n' +
-                                 'H5Dclose(dset);\n}\n')
-        self.definitions += ('if (H5Lexists(g_id, "rhs", H5P_DEFAULT))\n{\n' +
-                             'dset = H5Dopen(g_id, "rhs", H5P_DEFAULT);\n' +
-                             'grow_single_dataset(dset, 1);\n' +
-                             'H5Dclose(dset);\n}\n' +
-                             'return 0;\n}\n')
-        self.definitions += ('int grow_file_datasets()\n{\n' +
-                             'int file_problems = 0;\n' +
-                             self.file_datasets_grow +
-                             'return file_problems;\n'
-                             '}\n')
-        self.definitions += 'void do_stats()\n{\n' + self.stat_src + '}\n'
-        self.definitions += 'void do_particle_stats()\n{\n' + self.particle_stat_src + '}\n'
-        # take care of wisdom
-        if self.use_fftw_wisdom:
-            if self.dtype == np.float32:
-                fftw_prefix = 'fftwf_'
-            elif self.dtype == np.float64:
-                fftw_prefix = 'fftw_'
-            self.main_start += """
-                        //begincpp
-                        if (myrank == 0)
-                        {{
-                            char fname[256];
-                            sprintf(fname, "%s_fftw_wisdom.txt", simname);
-                            {0}import_wisdom_from_filename(fname);
-                        }}
-                        {0}mpi_broadcast_wisdom(MPI_COMM_WORLD);
-                        //endcpp
-                        """.format(fftw_prefix)
-            self.main_end = """
-                        //begincpp
-                        {0}mpi_gather_wisdom(MPI_COMM_WORLD);
-                        MPI_Barrier(MPI_COMM_WORLD);
-                        if (myrank == 0)
-                        {{
-                            char fname[256];
-                            sprintf(fname, "%s_fftw_wisdom.txt", simname);
-                            {0}export_wisdom_to_filename(fname);
-                        }}
-                        //endcpp
-                        """.format(fftw_prefix) + self.main_end
-        self.main        = """
-                           //begincpp
-                           int data_file_problem;
-                           clock_t time0, time1;
-                           double time_difference, local_time_difference;
-                           time0 = clock();
-                           if (myrank == 0) data_file_problem = grow_file_datasets();
-                           MPI_Bcast(&data_file_problem, 1, MPI_INT, 0, MPI_COMM_WORLD);
-                           if (data_file_problem > 0)
-                           {
-                               std::cerr << data_file_problem << " problems growing file datasets.\\ntrying to exit now." << std::endl;
-                               MPI_Finalize();
-                               return EXIT_SUCCESS;
-                           }
-                           //endcpp
-                           """
-        self.main       += self.fluid_start
-        if self.particle_species > 0:
-            self.main   += self.particle_start
-        output_time_difference = ('time1 = clock();\n' +
-                                  'local_time_difference = ((unsigned int)(time1 - time0))/((double)CLOCKS_PER_SEC);\n' +
-                                  'time_difference = 0.0;\n' +
-                                  'MPI_Allreduce(&local_time_difference, &time_difference, ' +
-                                      '1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);\n' +
-                                  'if (myrank == 0) std::cout << "iteration " ' +
-                                      '<< {0} << " took " ' +
-                                      '<< time_difference/nprocs << " seconds" << std::endl;\n' +
-                                  'if (myrank == 0) std::cerr << "iteration " ' +
-                                      '<< {0} << " took " ' +
-                                      '<< time_difference/nprocs << " seconds" << std::endl;\n' +
-                                  'time0 = time1;\n')
-        if not postprocess_mode:
-            self.main       += 'for (int max_iter = iteration+niter_todo-iteration%niter_todo; iteration < max_iter; iteration++)\n'
-            self.main       += '{\n'
-
-            self.main       += """
-                                #ifdef USE_TIMINGOUTPUT
-                                const std::string loopLabel = "code::main_start::loop-" + std::to_string(iteration);
-                                TIMEZONE(loopLabel.c_str());
-                                #endif
-                                """
-            self.main       += 'if (iteration % niter_stat == 0) do_stats();\n'
-            if self.particle_species > 0:
-                self.main       += 'if (iteration % niter_part == 0) do_particle_stats();\n'
-                self.main   += self.particle_loop
-            self.main       += self.fluid_loop
-            self.main       += output_time_difference.format('iteration')
-            self.main       += '}\n'
-            self.main       += 'do_stats();\n'
-            self.main       += 'do_particle_stats();\n'
-            self.main       += output_time_difference.format('iteration')
-        else:
-            self.main       += 'for (int frame_index = iter0; frame_index <= iter1; frame_index += niter_out)\n'
-            self.main       += '{\n'
-            self.main       += """
-                                #ifdef USE_TIMINGOUTPUT
-                                const std::string loopLabel = "code::main_start::loop-" + std::to_string(frame_index);
-                                TIMEZONE(loopLabel.c_str());
-                                #endif
-                                """
-            if self.particle_species > 0:
-                self.main   += self.particle_loop
-            self.main       += self.fluid_loop
-            self.main       += output_time_difference.format('frame_index')
-            self.main       += '}\n'
-        self.main       += self.fluid_end
-        if self.particle_species > 0:
-            self.main   += self.particle_end
-        return None
-    def read_rfield(
-            self,
-            field = 'velocity',
-            iteration = 0,
-            filename = None):
-        """
-            :note: assumes field is a vector field
-        """
-        if type(filename) == type(None):
-            filename = os.path.join(
-                    self.work_dir,
-                    self.simname + '_r' + field + '_i{0:0>5x}'.format(iteration))
-        return np.memmap(
-                filename,
-                dtype = self.dtype,
-                mode = 'r',
-                shape = (self.parameters['nz'],
-                         self.parameters['ny'],
-                         self.parameters['nx'], 3))
-    def transpose_frame(
-            self,
-            field = 'velocity',
-            iteration = 0,
-            filename = None,
-            ofile = None):
-        Rdata = self.read_rfield(
-                field = field,
-                iteration = iteration,
-                filename = filename)
-        new_data = np.zeros(
-                (3,
-                 self.parameters['nz'],
-                 self.parameters['ny'],
-                 self.parameters['nx']),
-                dtype = self.dtype)
-        for i in range(3):
-            new_data[i] = Rdata[..., i]
-        if type(ofile) == type(None):
-            ofile = os.path.join(
-                    self.work_dir,
-                    self.simname + '_r' + field + '_i{0:0>5x}_3xNZxNYxNX'.format(iteration))
-        else:
-            new_data.tofile(ofile)
-        return new_data
-    def plot_vel_cut(
-            self,
-            axis,
-            field = 'velocity',
-            iteration = 0,
-            yval = 13,
-            filename = None):
-        axis.set_axis_off()
-        Rdata0 = self.read_rfield(field = field, iteration = iteration, filename = filename)
-        energy = np.sum(Rdata0[:, yval, :, :]**2, axis = 2)*.5
-        axis.imshow(energy, interpolation='none')
-        axis.set_title('{0}'.format(np.average(Rdata0[..., 0]**2 +
-                                               Rdata0[..., 1]**2 +
-                                               Rdata0[..., 2]**2)*.5))
-        return Rdata0
-    def generate_vector_field(
-            self,
-            rseed = 7547,
-            spectra_slope = 1.,
-            amplitude = 1.,
-            iteration = 0,
-            field_name = 'vorticity',
-            write_to_file = False,
-            # to switch to constant field, use generate_data_3D_uniform
-            # for scalar_generator
-            scalar_generator = tools.generate_data_3D):
-        """generate vector field.
-
-        The generated field is not divergence free, but it has the proper
-        shape.
-
-        :param rseed: seed for random number generator
-        :param spectra_slope: spectrum of field will look like k^(-p)
-        :param amplitude: all amplitudes are multiplied with this value
-        :param iteration: the field is written at this iteration
-        :param field_name: the name of the field being generated
-        :param write_to_file: should we write the field to file?
-        :param scalar_generator: which function to use for generating the
-            individual components.
-            Possible values: bfps.tools.generate_data_3D,
-            bfps.tools.generate_data_3D_uniform
-        :type rseed: int
-        :type spectra_slope: float
-        :type amplitude: float
-        :type iteration: int
-        :type field_name: str
-        :type write_to_file: bool
-        :type scalar_generator: function
-
-        :returns: ``Kdata``, a complex valued 4D ``numpy.array`` that uses the
-            transposed FFTW layout.
-            Kdata[ky, kz, kx, i] is the amplitude of mode (kx, ky, kz) for
-            the i-th component of the field.
-            (i.e. x is the fastest index and z the slowest index in the
-            real-space representation).
-        """
-        np.random.seed(rseed)
-        Kdata00 = scalar_generator(
-                self.parameters['nz']//2,
-                self.parameters['ny']//2,
-                self.parameters['nx']//2,
-                p = spectra_slope,
-                amplitude = amplitude).astype(self.ctype)
-        Kdata01 = scalar_generator(
-                self.parameters['nz']//2,
-                self.parameters['ny']//2,
-                self.parameters['nx']//2,
-                p = spectra_slope,
-                amplitude = amplitude).astype(self.ctype)
-        Kdata02 = scalar_generator(
-                self.parameters['nz']//2,
-                self.parameters['ny']//2,
-                self.parameters['nx']//2,
-                p = spectra_slope,
-                amplitude = amplitude).astype(self.ctype)
-        Kdata0 = np.zeros(
-                Kdata00.shape + (3,),
-                Kdata00.dtype)
-        Kdata0[..., 0] = Kdata00
-        Kdata0[..., 1] = Kdata01
-        Kdata0[..., 2] = Kdata02
-        Kdata1 = tools.padd_with_zeros(
-                Kdata0,
-                self.parameters['nz'],
-                self.parameters['ny'],
-                self.parameters['nx'])
-        if write_to_file:
-            Kdata1.tofile(
-                    os.path.join(self.work_dir,
-                                 self.simname + "_c{0}_i{1:0>5x}".format(field_name, iteration)))
-        return Kdata1
-    def generate_tracer_state(
-            self,
-            rseed = None,
-            iteration = 0,
-            species = 0,
-            write_to_file = False,
-            ncomponents = 3,
-            testing = False,
-            data = None):
-        if (type(data) == type(None)):
-            if not type(rseed) == type(None):
-                np.random.seed(rseed)
-            #point with problems: 5.37632864e+00,   6.10414710e+00,   6.25256493e+00]
-            data = np.zeros(self.parameters['nparticles']*ncomponents).reshape(-1, ncomponents)
-            data[:, :3] = np.random.random((self.parameters['nparticles'], 3))*2*np.pi
-        if testing:
-            #data[0] = np.array([3.26434, 4.24418, 3.12157])
-            data[0] = np.array([ 0.72086101,  2.59043666,  6.27501953])
-        with h5py.File(self.get_particle_file_name(), 'r+') as data_file:
-            data_file['tracers{0}/state'.format(species)][0] = data
-        if write_to_file:
-            data.tofile(
-                    os.path.join(
-                        self.work_dir,
-                        "tracers{0}_state_i{1:0>5x}".format(species, iteration)))
-        return data
-    def generate_initial_condition(self):
-        self.generate_vector_field(write_to_file = True)
-        for species in range(self.particle_species):
-            self.generate_tracer_state(
-                    species = species,
-                    write_to_file = False)
-        return None
-    def get_kspace(self):
-        kspace = {}
-        if self.parameters['dealias_type'] == 1:
-            kMx = self.parameters['dkx']*(self.parameters['nx']//2 - 1)
-            kMy = self.parameters['dky']*(self.parameters['ny']//2 - 1)
-            kMz = self.parameters['dkz']*(self.parameters['nz']//2 - 1)
-        else:
-            kMx = self.parameters['dkx']*(self.parameters['nx']//3 - 1)
-            kMy = self.parameters['dky']*(self.parameters['ny']//3 - 1)
-            kMz = self.parameters['dkz']*(self.parameters['nz']//3 - 1)
-        kspace['kM'] = max(kMx, kMy, kMz)
-        kspace['dk'] = min(self.parameters['dkx'],
-                           self.parameters['dky'],
-                           self.parameters['dkz'])
-        nshells = int(kspace['kM'] / kspace['dk']) + 2
-        kspace['nshell'] = np.zeros(nshells, dtype = np.int64)
-        kspace['kshell'] = np.zeros(nshells, dtype = np.float64)
-        kspace['kx'] = np.arange( 0,
-                                  self.parameters['nx']//2 + 1).astype(np.float64)*self.parameters['dkx']
-        kspace['ky'] = np.arange(-self.parameters['ny']//2 + 1,
-                                  self.parameters['ny']//2 + 1).astype(np.float64)*self.parameters['dky']
-        kspace['ky'] = np.roll(kspace['ky'], self.parameters['ny']//2+1)
-        kspace['kz'] = np.arange(-self.parameters['nz']//2 + 1,
-                                  self.parameters['nz']//2 + 1).astype(np.float64)*self.parameters['dkz']
-        kspace['kz'] = np.roll(kspace['kz'], self.parameters['nz']//2+1)
-        return kspace
-    def write_par(self, iter0 = 0):
-        assert (self.parameters['niter_todo'] % self.parameters['niter_stat'] == 0)
-        assert (self.parameters['niter_todo'] % self.parameters['niter_out']  == 0)
-        assert (self.parameters['niter_todo'] % self.parameters['niter_part'] == 0)
-        assert (self.parameters['niter_out']  % self.parameters['niter_stat'] == 0)
-        assert (self.parameters['niter_out']  % self.parameters['niter_part'] == 0)
-        _code.write_par(self, iter0 = iter0)
-        with h5py.File(os.path.join(self.work_dir, self.simname + '.h5'), 'r+') as ofile:
-            ofile['bfps_info/exec_name'] = self.name
-            ofile['field_dtype'] = np.dtype(self.dtype).str
-            kspace = self.get_kspace()
-            for k in kspace.keys():
-                ofile['kspace/' + k] = kspace[k]
-            nshells = kspace['nshell'].shape[0]
-            ofile.close()
-        return None
-    def specific_parser_arguments(
-            self,
-            parser):
-        _code.specific_parser_arguments(self, parser)
-        return None
-
diff --git a/bfps/cpp/distributed_particles.cpp b/bfps/cpp/distributed_particles.cpp
deleted file mode 100644
index 73fd0275d8138d41bb4ee7fbc28e2d41e8017661..0000000000000000000000000000000000000000
--- a/bfps/cpp/distributed_particles.cpp
+++ /dev/null
@@ -1,472 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-//#define NDEBUG
-
-#include <cmath>
-#include <cassert>
-#include <cstring>
-#include <string>
-#include <sstream>
-#include <array>
-
-#include "base.hpp"
-#include "distributed_particles.hpp"
-#include "fftw_tools.hpp"
-#include "scope_timer.hpp"
-
-
-extern int myrank, nprocs;
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-distributed_particles<particle_type, rnumber, interp_neighbours>::distributed_particles(
-        const char *NAME,
-        const hid_t data_file_id,
-        interpolator<rnumber, interp_neighbours> *VEL,
-        const int TRAJ_SKIP,
-        const int INTEGRATION_STEPS) : particles_io_base<particle_type>(
-            NAME,
-            TRAJ_SKIP,
-            data_file_id,
-            VEL->descriptor->comm)
-{
-    assert((INTEGRATION_STEPS <= 6) &&
-           (INTEGRATION_STEPS >= 1));
-    this->vel = VEL;
-    this->rhs.resize(INTEGRATION_STEPS);
-    this->integration_steps = INTEGRATION_STEPS;
-    this->state.reserve(2*this->nparticles / this->nprocs);
-    for (unsigned int i=0; i<this->rhs.size(); i++)
-        this->rhs[i].reserve(2*this->nparticles / this->nprocs);
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-distributed_particles<particle_type, rnumber, interp_neighbours>::~distributed_particles()
-{
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::sample(
-        interpolator<rnumber, interp_neighbours> *field,
-        const std::unordered_map<int, single_particle_state<particle_type>> &x,
-        std::unordered_map<int, single_particle_state<POINT3D>> &y)
-{
-    std::array<double, 3> yy;
-    y.clear();
-    for (auto &pp: x)
-    {
-        (*field)(pp.second.data, &yy.front());
-        y[pp.first] = &yy.front();
-    }
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::get_rhs(
-        const std::unordered_map<int, single_particle_state<particle_type>> &x,
-        std::unordered_map<int, single_particle_state<particle_type>> &y)
-{
-    std::unordered_map<int, single_particle_state<POINT3D>> yy;
-    switch(particle_type)
-    {
-        case VELOCITY_TRACER:
-            this->sample(this->vel, this->state, yy);
-            y.clear();
-            for (auto &pp: x)
-                y[pp.first] = yy[pp.first].data;
-            break;
-    }
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::sample(
-        interpolator<rnumber, interp_neighbours> *field,
-        const char *dset_name)
-{
-    std::unordered_map<int, single_particle_state<POINT3D>> y;
-    this->sample(field, this->state, y);
-    this->write(dset_name, y);
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::roll_rhs()
-{
-    for (int i=this->integration_steps-2; i>=0; i--)
-        rhs[i+1] = rhs[i];
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::redistribute(
-        std::unordered_map<int, single_particle_state<particle_type>> &x,
-        std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals)
-{
-    TIMEZONE("distributed_particles::redistribute");
-    //DEBUG_MSG("entered redistribute\n");
-    /* neighbouring rank offsets */
-    int ro[2];
-    ro[0] = -1;
-    ro[1] = 1;
-    /* neighbouring ranks */
-    int nr[2];
-    nr[0] = MOD(this->myrank+ro[0], this->nprocs);
-    nr[1] = MOD(this->myrank+ro[1], this->nprocs);
-    /* particles to send, particles to receive */
-    std::vector<int> ps[2], pr[2];
-    /* number of particles to send, number of particles to receive */
-    int nps[2], npr[2];
-    int rsrc, rdst;
-    /* get list of id-s to send */
-    for (auto &pp: x)
-        for (unsigned int i=0; i<2; i++)
-            if (this->vel->get_rank(pp.second.data[2]) == nr[i])
-                ps[i].push_back(pp.first);
-    /* prepare data for send recv */
-    for (unsigned int i=0; i<2; i++)
-        nps[i] = ps[i].size();
-    for (rsrc = 0; rsrc<this->nprocs; rsrc++)
-        for (unsigned int i=0; i<2; i++)
-        {
-            rdst = MOD(rsrc+ro[i], this->nprocs);
-            if (this->myrank == rsrc)
-                MPI_Send(
-                        nps+i,
-                        1,
-                        MPI_INTEGER,
-                        rdst,
-                        2*(rsrc*this->nprocs + rdst)+i,
-                        this->comm);
-            if (this->myrank == rdst)
-                MPI_Recv(
-                        npr+1-i,
-                        1,
-                        MPI_INTEGER,
-                        rsrc,
-                        2*(rsrc*this->nprocs + rdst)+i,
-                        this->comm,
-                        MPI_STATUS_IGNORE);
-        }
-    //DEBUG_MSG("I have to send %d %d particles\n", nps[0], nps[1]);
-    //DEBUG_MSG("I have to recv %d %d particles\n", npr[0], npr[1]);
-    for (unsigned int i=0; i<2; i++)
-        pr[i].resize(npr[i]);
-
-    int buffer_size = (nps[0] > nps[1]) ? nps[0] : nps[1];
-    buffer_size = (buffer_size > npr[0])? buffer_size : npr[0];
-    buffer_size = (buffer_size > npr[1])? buffer_size : npr[1];
-    //DEBUG_MSG("buffer size is %d\n", buffer_size);
-    double *buffer = new double[buffer_size*state_dimension(particle_type)*(1+vals.size())];
-    for (rsrc = 0; rsrc<this->nprocs; rsrc++)
-        for (unsigned int i=0; i<2; i++)
-        {
-            rdst = MOD(rsrc+ro[i], this->nprocs);
-            if (this->myrank == rsrc && nps[i] > 0)
-            {
-                MPI_Send(
-                        &ps[i].front(),
-                        nps[i],
-                        MPI_INTEGER,
-                        rdst,
-                        2*(rsrc*this->nprocs + rdst),
-                        this->comm);
-                int pcounter = 0;
-                for (int p: ps[i])
-                {
-                    std::copy(x[p].data,
-                              x[p].data + state_dimension(particle_type),
-                              buffer + pcounter*(1+vals.size())*state_dimension(particle_type));
-                    x.erase(p);
-                    for (unsigned int tindex=0; tindex<vals.size(); tindex++)
-                    {
-                        std::copy(vals[tindex][p].data,
-                                  vals[tindex][p].data + state_dimension(particle_type),
-                                  buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type));
-                        vals[tindex].erase(p);
-                    }
-                    pcounter++;
-                }
-                MPI_Send(
-                        buffer,
-                        nps[i]*(1+vals.size())*state_dimension(particle_type),
-                        MPI_DOUBLE,
-                        rdst,
-                        2*(rsrc*this->nprocs + rdst)+1,
-                        this->comm);
-            }
-            if (this->myrank == rdst && npr[1-i] > 0)
-            {
-                MPI_Recv(
-                        &pr[1-i].front(),
-                        npr[1-i],
-                        MPI_INTEGER,
-                        rsrc,
-                        2*(rsrc*this->nprocs + rdst),
-                        this->comm,
-                        MPI_STATUS_IGNORE);
-                MPI_Recv(
-                        buffer,
-                        npr[1-i]*(1+vals.size())*state_dimension(particle_type),
-                        MPI_DOUBLE,
-                        rsrc,
-                        2*(rsrc*this->nprocs + rdst)+1,
-                        this->comm,
-                        MPI_STATUS_IGNORE);
-                unsigned int pcounter = 0;
-                for (int p: pr[1-i])
-                {
-                    x[p] = buffer + (pcounter*(1+vals.size()))*state_dimension(particle_type);
-                    for (unsigned int tindex=0; tindex<vals.size(); tindex++)
-                    {
-                        vals[tindex][p] = buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type);
-                    }
-                    pcounter++;
-                }
-            }
-        }
-    delete[] buffer;
-
-
-#ifndef NDEBUG
-    /* check that all particles at x are local */
-    for (auto &pp: x)
-        if (this->vel->get_rank(pp.second.data[2]) != this->myrank)
-        {
-            DEBUG_MSG("found particle %d with rank %d\n",
-                    pp.first,
-                    this->vel->get_rank(pp.second.data[2]));
-            assert(false);
-        }
-#endif
-    //DEBUG_MSG("exiting redistribute\n");
-}
-
-
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth(
-        const int nsteps)
-{
-    this->get_rhs(this->state, this->rhs[0]);
-    for (auto &pp: this->state)
-        for (unsigned int i=0; i<state_dimension(particle_type); i++)
-            switch(nsteps)
-            {
-                case 1:
-                    pp.second[i] += this->dt*this->rhs[0][pp.first][i];
-                    break;
-                case 2:
-                    pp.second[i] += this->dt*(3*this->rhs[0][pp.first][i]
-                                            -   this->rhs[1][pp.first][i])/2;
-                    break;
-                case 3:
-                    pp.second[i] += this->dt*(23*this->rhs[0][pp.first][i]
-                                            - 16*this->rhs[1][pp.first][i]
-                                            +  5*this->rhs[2][pp.first][i])/12;
-                    break;
-                case 4:
-                    pp.second[i] += this->dt*(55*this->rhs[0][pp.first][i]
-                                            - 59*this->rhs[1][pp.first][i]
-                                            + 37*this->rhs[2][pp.first][i]
-                                            -  9*this->rhs[3][pp.first][i])/24;
-                    break;
-                case 5:
-                    pp.second[i] += this->dt*(1901*this->rhs[0][pp.first][i]
-                                            - 2774*this->rhs[1][pp.first][i]
-                                            + 2616*this->rhs[2][pp.first][i]
-                                            - 1274*this->rhs[3][pp.first][i]
-                                            +  251*this->rhs[4][pp.first][i])/720;
-                    break;
-                case 6:
-                    pp.second[i] += this->dt*(4277*this->rhs[0][pp.first][i]
-                                            - 7923*this->rhs[1][pp.first][i]
-                                            + 9982*this->rhs[2][pp.first][i]
-                                            - 7298*this->rhs[3][pp.first][i]
-                                            + 2877*this->rhs[4][pp.first][i]
-                                            -  475*this->rhs[5][pp.first][i])/1440;
-                    break;
-            }
-    this->redistribute(this->state, this->rhs);
-    this->roll_rhs();
-}
-
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::step()
-{
-    TIMEZONE("distributed_particles::step");
-    this->AdamsBashforth((this->iteration < this->integration_steps) ?
-                            this->iteration+1 :
-                            this->integration_steps);
-    this->iteration++;
-}
-
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::read()
-{
-    double *temp = new double[this->chunk_size*state_dimension(particle_type)];
-    for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
-    {
-        //read state
-        if (this->myrank == 0)
-            this->read_state_chunk(cindex, temp);
-        MPI_Bcast(
-                temp,
-                this->chunk_size*state_dimension(particle_type),
-                MPI_DOUBLE,
-                0,
-                this->comm);
-        for (unsigned int p=0; p<this->chunk_size; p++)
-        {
-            if (this->vel->get_rank(temp[state_dimension(particle_type)*p+2]) == this->myrank)
-                this->state[p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p;
-        }
-        //read rhs
-        if (this->iteration > 0)
-            for (int i=0; i<this->integration_steps; i++)
-            {
-                if (this->myrank == 0)
-                    this->read_rhs_chunk(cindex, i, temp);
-                MPI_Bcast(
-                        temp,
-                        this->chunk_size*state_dimension(particle_type),
-                        MPI_DOUBLE,
-                        0,
-                        this->comm);
-                for (unsigned int p=0; p<this->chunk_size; p++)
-                {
-                    auto pp = this->state.find(p+cindex*this->chunk_size);
-                    if (pp != this->state.end())
-                        this->rhs[i][p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p;
-                }
-            }
-    }
-    DEBUG_MSG("%s->state.size = %ld\n", this->name.c_str(), this->state.size());
-    delete[] temp;
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::write(
-        const char *dset_name,
-        std::unordered_map<int, single_particle_state<POINT3D>> &y)
-{
-    TIMEZONE("distributed_particles::write");
-    double *data = new double[this->nparticles*3];
-    double *yy = new double[this->nparticles*3];
-    for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
-    {
-        std::fill_n(yy, this->chunk_size*3, 0);
-        for (unsigned int p=0; p<this->chunk_size; p++)
-        {
-            auto pp = y.find(p+cindex*this->chunk_size);
-            if (pp != y.end())
-                std::copy(pp->second.data,
-                          pp->second.data + 3,
-                          yy + pp->first*3);
-        }
-        MPI_Allreduce(
-                yy,
-                data,
-                3*this->nparticles,
-                MPI_DOUBLE,
-                MPI_SUM,
-                this->comm);
-        if (this->myrank == 0)
-            this->write_point3D_chunk(dset_name, cindex, data);
-    }
-    delete[] yy;
-    delete[] data;
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::write(
-        const bool write_rhs)
-{
-    TIMEZONE("distributed_particles::write2");
-    double *temp0 = new double[this->chunk_size*state_dimension(particle_type)];
-    double *temp1 = new double[this->chunk_size*state_dimension(particle_type)];
-    for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
-    {
-        //write state
-        std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0);
-        for (unsigned int p=0; p<this->chunk_size; p++)
-        {
-            auto pp = this->state.find(p + cindex*this->chunk_size);
-            if (pp != this->state.end())
-                std::copy(pp->second.data,
-                          pp->second.data + state_dimension(particle_type),
-                          temp0 + p*state_dimension(particle_type));
-        }
-        MPI_Allreduce(
-                temp0,
-                temp1,
-                state_dimension(particle_type)*this->chunk_size,
-                MPI_DOUBLE,
-                MPI_SUM,
-                this->comm);
-        if (this->myrank == 0)
-            this->write_state_chunk(cindex, temp1);
-        //write rhs
-        if (write_rhs)
-            for (int i=0; i<this->integration_steps; i++)
-            {
-                std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0);
-                for (unsigned int p=0; p<this->chunk_size; p++)
-                {
-                    auto pp = this->rhs[i].find(p + cindex*this->chunk_size);
-                    if (pp != this->rhs[i].end())
-                        std::copy(pp->second.data,
-                                  pp->second.data + state_dimension(particle_type),
-                                  temp0 + p*state_dimension(particle_type));
-                }
-                MPI_Allreduce(
-                        temp0,
-                        temp1,
-                        state_dimension(particle_type)*this->chunk_size,
-                        MPI_DOUBLE,
-                        MPI_SUM,
-                        this->comm);
-                if (this->myrank == 0)
-                    this->write_rhs_chunk(cindex, i, temp1);
-            }
-    }
-    delete[] temp0;
-    delete[] temp1;
-}
-
-
-/*****************************************************************************/
-template class distributed_particles<VELOCITY_TRACER, float, 1>;
-template class distributed_particles<VELOCITY_TRACER, float, 2>;
-template class distributed_particles<VELOCITY_TRACER, float, 3>;
-template class distributed_particles<VELOCITY_TRACER, float, 4>;
-template class distributed_particles<VELOCITY_TRACER, float, 5>;
-template class distributed_particles<VELOCITY_TRACER, float, 6>;
-template class distributed_particles<VELOCITY_TRACER, double, 1>;
-template class distributed_particles<VELOCITY_TRACER, double, 2>;
-template class distributed_particles<VELOCITY_TRACER, double, 3>;
-template class distributed_particles<VELOCITY_TRACER, double, 4>;
-template class distributed_particles<VELOCITY_TRACER, double, 5>;
-template class distributed_particles<VELOCITY_TRACER, double, 6>;
-/*****************************************************************************/
diff --git a/bfps/cpp/distributed_particles.hpp b/bfps/cpp/distributed_particles.hpp
deleted file mode 100644
index cf6e124a7744c049b6fcf0c84c1618a0a214c30e..0000000000000000000000000000000000000000
--- a/bfps/cpp/distributed_particles.hpp
+++ /dev/null
@@ -1,105 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <iostream>
-#include <unordered_map>
-#include <vector>
-#include <hdf5.h>
-#include "base.hpp"
-#include "particles_base.hpp"
-#include "fluid_solver_base.hpp"
-#include "interpolator.hpp"
-
-#ifndef DISTRIBUTED_PARTICLES
-
-#define DISTRIBUTED_PARTICLES
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-class distributed_particles: public particles_io_base<particle_type>
-{
-    private:
-        std::unordered_map<int, single_particle_state<particle_type> > state;
-        std::vector<std::unordered_map<int, single_particle_state<particle_type>>> rhs;
-
-    public:
-        int integration_steps;
-        // this class only works with buffered interpolator
-        interpolator<rnumber, interp_neighbours> *vel;
-
-        /* simulation parameters */
-        double dt;
-
-        /* methods */
-
-        /* constructor and destructor.
-         * allocate and deallocate:
-         *  this->state
-         *  this->rhs
-         * */
-        distributed_particles(
-                const char *NAME,
-                const hid_t data_file_id,
-                interpolator<rnumber, interp_neighbours> *FIELD,
-                const int TRAJ_SKIP,
-                const int INTEGRATION_STEPS = 2);
-        ~distributed_particles();
-
-        void sample(
-                interpolator<rnumber, interp_neighbours> *field,
-                const char *dset_name);
-        void sample(
-                interpolator<rnumber, interp_neighbours> *field,
-                const std::unordered_map<int, single_particle_state<particle_type>> &x,
-                std::unordered_map<int, single_particle_state<POINT3D>> &y);
-        void get_rhs(
-                const std::unordered_map<int, single_particle_state<particle_type>> &x,
-                std::unordered_map<int, single_particle_state<particle_type>> &y);
-
-        void redistribute(
-                std::unordered_map<int, single_particle_state<particle_type>> &x,
-                std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals);
-
-
-        /* input/output */
-        void read();
-        void write(
-                const char *dset_name,
-                std::unordered_map<int, single_particle_state<POINT3D>> &y);
-        void write(
-                const char *dset_name,
-                std::unordered_map<int, single_particle_state<particle_type>> &y);
-        void write(const bool write_rhs = true);
-
-        /* solvers */
-        void step();
-        void roll_rhs();
-        void AdamsBashforth(const int nsteps);
-};
-
-#endif//DISTRIBUTED_PARTICLES
-
diff --git a/bfps/cpp/fftw_interface.hpp b/bfps/cpp/fftw_interface.hpp
index 7f5e014400c86ffe4caa594a67e1756c890c6503..0a840dd5ba3d864b36271515faa7cb81f3042c01 100644
--- a/bfps/cpp/fftw_interface.hpp
+++ b/bfps/cpp/fftw_interface.hpp
@@ -26,6 +26,8 @@
 #define FFTW_INTERFACE_HPP
 
 #include <fftw3-mpi.h>
+#include <map>
+#include <string>
 
 #ifdef USE_FFTWESTIMATE
 #define DEFAULT_FFTW_FLAG FFTW_ESTIMATE
diff --git a/bfps/cpp/fftw_tools.cpp b/bfps/cpp/fftw_tools.cpp
index 61e03d292f81aed1fa4b2dfcab880fb7105b676e..55794b41ebf8ebfa03977d5a79704aa38b39af52 100644
--- a/bfps/cpp/fftw_tools.cpp
+++ b/bfps/cpp/fftw_tools.cpp
@@ -31,192 +31,10 @@
 
 #define NDEBUG
 
-template <class rnumber>
-int clip_zero_padding(
-        field_descriptor<rnumber> *f,
-        rnumber *a,
-        int howmany)
-{
-    if (f->ndims < 3)
-        return EXIT_FAILURE;
-    rnumber *b = a;
-    ptrdiff_t copy_size = f->sizes[2] * howmany;
-    ptrdiff_t skip_size = copy_size + 2*howmany;
-    for (int i0 = 0; i0 < f->subsizes[0]; i0++)
-        for (int i1 = 0; i1 < f->sizes[1]; i1++)
-        {
-            std::copy(a, a + copy_size, b);
-            a += skip_size;
-            b += copy_size;
-        }
-    return EXIT_SUCCESS;
-}
-
-template
-int clip_zero_padding<float>(
-        field_descriptor<float> *f,
-        float *a,
-        int howmany);
-
-template
-int clip_zero_padding<double>(
-        field_descriptor<double> *f,
-        double *a,
-        int howmany);
-
-
-
-template <class rnumber>
-int copy_complex_array(
-        field_descriptor<rnumber> *fi,
-        rnumber (*ai)[2],
-field_descriptor<rnumber> *fo,
-rnumber (*ao)[2],
-int howmany)
-{
-    DEBUG_MSG("entered copy_complex_array\n");
-    typename fftw_interface<rnumber>::complex *buffer;
-    buffer = fftw_interface<rnumber>::alloc_complex(fi->slice_size*howmany);
-
-    int min_fast_dim;
-    min_fast_dim =
-            (fi->sizes[2] > fo->sizes[2]) ?
-                fo->sizes[2] : fi->sizes[2];
-
-    /* clean up destination, in case we're padding with zeros
-       (even if only for one dimension) */
-    std::fill_n((rnumber*)ao, fo->local_size*2, 0.0);
-
-    int64_t ii0, ii1;
-    int64_t oi0, oi1;
-    int64_t delta1, delta0;
-    int irank, orank;
-    delta0 = (fo->sizes[0] - fi->sizes[0]);
-    delta1 = (fo->sizes[1] - fi->sizes[1]);
-    for (ii0=0; ii0 < fi->sizes[0]; ii0++)
-    {
-        if (ii0 <= fi->sizes[0]/2)
-        {
-            oi0 = ii0;
-            if (oi0 > fo->sizes[0]/2)
-                continue;
-        }
-        else
-        {
-            oi0 = ii0 + delta0;
-            if ((oi0 < 0) || ((fo->sizes[0] - oi0) >= fo->sizes[0]/2))
-                continue;
-        }
-        irank = fi->rank[ii0];
-        orank = fo->rank[oi0];
-        if ((irank == orank) &&
-                (irank == fi->myrank))
-        {
-            std::copy(
-                        (rnumber*)(ai + (ii0 - fi->starts[0]    )*fi->slice_size),
-                    (rnumber*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size),
-                    (rnumber*)buffer);
-        }
-        else
-        {
-            if (fi->myrank == irank)
-            {
-                MPI_Send(
-                            (void*)(ai + (ii0-fi->starts[0])*fi->slice_size),
-                        fi->slice_size,
-                        mpi_real_type<rnumber>::complex(),
-                        orank,
-                        ii0,
-                        fi->comm);
-            }
-            if (fi->myrank == orank)
-            {
-                MPI_Recv(
-                            (void*)(buffer),
-                            fi->slice_size,
-                            mpi_real_type<rnumber>::complex(),
-                            irank,
-                            ii0,
-                            fi->comm,
-                            MPI_STATUS_IGNORE);
-            }
-        }
-        if (fi->myrank == orank)
-        {
-            for (ii1 = 0; ii1 < fi->sizes[1]; ii1++)
-            {
-                if (ii1 <= fi->sizes[1]/2)
-                {
-                    oi1 = ii1;
-                    if (oi1 > fo->sizes[1]/2)
-                        continue;
-                }
-                else
-                {
-                    oi1 = ii1 + delta1;
-                    if ((oi1 < 0) || ((fo->sizes[1] - oi1) >= fo->sizes[1]/2))
-                        continue;
-                }
-                std::copy(
-                            (rnumber*)(buffer + (ii1*fi->sizes[2]*howmany)),
-                        (rnumber*)(buffer + (ii1*fi->sizes[2] + min_fast_dim)*howmany),
-                        (rnumber*)(ao +
-                                   ((oi0 - fo->starts[0])*fo->sizes[1] +
-                        oi1)*fo->sizes[2]*howmany));
-            }
-        }
-    }
-    fftw_interface<rnumber>::free(buffer);
-    MPI_Barrier(fi->comm);
-
-    DEBUG_MSG("exiting copy_complex_array\n");
-    return EXIT_SUCCESS;
-}
-
-template
-int copy_complex_array<float>(
-        field_descriptor<float> *fi,
-        float (*ai)[2],
-        field_descriptor<float> *fo,
-        float (*ao)[2],
-        int howmany);
-
-template
-int copy_complex_array<double>(
-        field_descriptor<double> *fi,
-        double (*ai)[2],
-        field_descriptor<double> *fo,
-        double (*ao)[2],
-        int howmany);
-
-
-template <class rnumber>
-int get_descriptors_3D(
-        int n0, int n1, int n2,
-        field_descriptor<rnumber> **fr,
-        field_descriptor<rnumber> **fc)
-{
-    int ntmp[3];
-    ntmp[0] = n0;
-    ntmp[1] = n1;
-    ntmp[2] = n2;
-    *fr = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), MPI_COMM_WORLD);
-    ntmp[0] = n0;
-    ntmp[1] = n1;
-    ntmp[2] = n2/2+1;
-    *fc = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::complex(), MPI_COMM_WORLD);
-    return EXIT_SUCCESS;
-}
-
-template
-int get_descriptors_3D<float>(
-        int n0, int n1, int n2,
-        field_descriptor<float> **fr,
-        field_descriptor<float> **fc);
-
-template
-int get_descriptors_3D<double>(
-        int n0, int n1, int n2,
-        field_descriptor<double> **fr,
-        field_descriptor<double> **fc);
+std::map<std::string, unsigned> fftw_planner_string_to_flag = {
+    {"FFTW_ESTIMATE", FFTW_ESTIMATE},
+    {"FFTW_MEASURE", FFTW_MEASURE},
+    {"FFTW_PATIENT", FFTW_PATIENT},
+    {"parameter does not exist", DEFAULT_FFTW_FLAG},
+};
 
diff --git a/bfps/cpp/fftw_tools.hpp b/bfps/cpp/fftw_tools.hpp
index d0f3dbf30df3ee95f3d7934f0dd7fca633858b44..e32500fd734803a5884877398fc13fff22aa44c4 100644
--- a/bfps/cpp/fftw_tools.hpp
+++ b/bfps/cpp/fftw_tools.hpp
@@ -34,37 +34,7 @@
 
 extern int myrank, nprocs;
 
-/* given two arrays of the same dimension, we do a simple resize in
- * Fourier space: either chop off high modes, or pad with zeros.
- * the arrays are assumed to use 3D mpi fftw layout.
- * */
-template <class rnumber>
-int copy_complex_array(
-        field_descriptor<rnumber> *fi,
-        rnumber (*ai)[2],
-        field_descriptor<rnumber> *fo,
-        rnumber (*ao)[2],
-        int howmany=1);
-
-template <class rnumber>
-int clip_zero_padding(
-        field_descriptor<rnumber> *f,
-        rnumber *a,
-        int howmany=1);
-
-/* function to get pair of descriptors for real and Fourier space
- * arrays used with fftw.
- * the n0, n1, n2 correspond to the real space data WITHOUT the zero
- * padding that FFTW needs.
- * IMPORTANT: the real space array must be allocated with
- * 2*fc->local_size, and then the zeros cleaned up before trying
- * to write data.
- * */
-template <class rnumber>
-int get_descriptors_3D(
-        int n0, int n1, int n2,
-        field_descriptor<rnumber> **fr,
-        field_descriptor<rnumber> **fc);
+extern std::map<std::string, unsigned> fftw_planner_string_to_flag;
 
 #endif//FFTW_TOOLS
 
diff --git a/bfps/cpp/field.cpp b/bfps/cpp/field.cpp
index b70825cb472316171e37e5150e09eba9d4b48eee..d5bc78a58fb84e28d9ffc2fc5cb6cd2517420fdf 100644
--- a/bfps/cpp/field.cpp
+++ b/bfps/cpp/field.cpp
@@ -23,6 +23,9 @@
 **********************************************************************/
 
 
+
+#define NDEBUG
+
 #include <sys/stat.h>
 #include <cmath>
 #include <cstdlib>
diff --git a/bfps/cpp/field_layout.cpp b/bfps/cpp/field_layout.cpp
index 908904991d5d95b0c89ba679b402d8d5727b8c85..61dd3f2ac1094e5f93a375fa295cffab669b34f9 100644
--- a/bfps/cpp/field_layout.cpp
+++ b/bfps/cpp/field_layout.cpp
@@ -23,10 +23,15 @@
 **********************************************************************/
 
 
+
+#define NDEBUG
+
 #include <cassert>
 #include "field_layout.hpp"
 #include "scope_timer.hpp"
 
+
+
 template <field_components fc>
 field_layout<fc>::field_layout(
         const hsize_t *SIZES,
diff --git a/bfps/cpp/fluid_solver.cpp b/bfps/cpp/fluid_solver.cpp
deleted file mode 100644
index 7ec0c978102f2d0cad00d57d837fad6c141f91fb..0000000000000000000000000000000000000000
--- a/bfps/cpp/fluid_solver.cpp
+++ /dev/null
@@ -1,1057 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-//#define NDEBUG
-
-#include <cassert>
-#include <cmath>
-#include <cstring>
-#include "fluid_solver.hpp"
-#include "fftw_tools.hpp"
-#include "scope_timer.hpp"
-#include "shared_array.hpp"
-
-
-template <class rnumber>
-void fluid_solver<rnumber>::impose_zero_modes()
-{
-    if (this->cd->myrank == this->cd->rank[0])
-    {
-        std::fill_n((rnumber*)(this->cu), 6, 0.0);
-        std::fill_n((rnumber*)(this->cv[0]), 6, 0.0);
-        std::fill_n((rnumber*)(this->cv[1]), 6, 0.0);
-        std::fill_n((rnumber*)(this->cv[2]), 6, 0.0);
-    }
-}
-/*****************************************************************************/
-/* macro for specializations to numeric types compatible with FFTW           */
-
-template <class rnumber>
-fluid_solver<rnumber>::fluid_solver(
-        const char *NAME,
-        int nx,
-        int ny,
-        int nz,
-        double DKX,
-        double DKY,
-        double DKZ,
-        int DEALIAS_TYPE,
-        unsigned FFTW_PLAN_RIGOR) : fluid_solver_base<rnumber>(
-                                        NAME,
-                                        nx , ny , nz,
-                                        DKX, DKY, DKZ,
-                                        DEALIAS_TYPE,
-                                        FFTW_PLAN_RIGOR)
-{
-    TIMEZONE("fluid_solver::fluid_solver");
-    this->cvorticity = fftw_interface<rnumber>::alloc_complex(this->cd->local_size);
-    this->cvelocity  = fftw_interface<rnumber>::alloc_complex(this->cd->local_size);
-    this->rvorticity = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2);
-    /*this->rvelocity  = (rnumber*)(this->cvelocity);*/
-    this->rvelocity  = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2);
-
-    this->ru = this->rvelocity;
-    this->cu = this->cvelocity;
-
-    this->rv[0] = this->rvorticity;
-    this->rv[3] = this->rvorticity;
-    this->cv[0] = this->cvorticity;
-    this->cv[3] = this->cvorticity;
-
-    this->cv[1] = fftw_interface<rnumber>::alloc_complex(this->cd->local_size);
-    this->cv[2] = this->cv[1];
-    this->rv[1] = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2);
-    this->rv[2] = this->rv[1];
-
-    this->c2r_vorticity = new typename fftw_interface<rnumber>::many_plan;
-    this->r2c_vorticity = new typename fftw_interface<rnumber>::many_plan;
-    this->c2r_velocity  = new typename fftw_interface<rnumber>::many_plan;
-    this->r2c_velocity  = new typename fftw_interface<rnumber>::many_plan;
-
-    ptrdiff_t sizes[] = {nz,
-                         ny,
-                         nx};
-
-    *this->c2r_vorticity = fftw_interface<rnumber>::mpi_plan_many_dft_c2r(
-                3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                this->cvorticity, this->rvorticity,
-                MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN);
-
-    *this->r2c_vorticity = fftw_interface<rnumber>::mpi_plan_many_dft_r2c(
-                3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                this->rvorticity, this->cvorticity,
-                MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT);
-
-    *this->c2r_velocity = fftw_interface<rnumber>::mpi_plan_many_dft_c2r(
-                3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                this->cvelocity, this->rvelocity,
-                MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN);
-
-    *this->r2c_velocity = fftw_interface<rnumber>::mpi_plan_many_dft_r2c(
-                3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                this->rvelocity, this->cvelocity,
-                MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT);
-
-    this->uc2r = this->c2r_velocity;
-    this->ur2c = this->r2c_velocity;
-    this->vc2r[0] = this->c2r_vorticity;
-    this->vr2c[0] = this->r2c_vorticity;
-
-    this->vc2r[1] = new typename fftw_interface<rnumber>::many_plan;
-    this->vr2c[1] = new typename fftw_interface<rnumber>::many_plan;
-    this->vc2r[2] = new typename fftw_interface<rnumber>::many_plan;
-    this->vr2c[2] = new typename fftw_interface<rnumber>::many_plan;
-
-    *(this->vc2r[1]) = fftw_interface<rnumber>::mpi_plan_many_dft_c2r(
-                3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                this->cv[1], this->rv[1],
-            MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN);
-
-    *this->vc2r[2] = fftw_interface<rnumber>::mpi_plan_many_dft_c2r(
-                3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                this->cv[2], this->rv[2],
-            MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN);
-
-    *this->vr2c[1] = fftw_interface<rnumber>::mpi_plan_many_dft_r2c(
-                3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                this->rv[1], this->cv[1],
-            MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT);
-
-    *this->vr2c[2] = fftw_interface<rnumber>::mpi_plan_many_dft_r2c(
-                3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                this->rv[2], this->cv[2],
-            MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT);
-
-    /* ``physical'' parameters etc, initialized here just in case */
-
-    this->nu = 0.1;
-    this->fmode = 1;
-    this->famplitude = 1.0;
-    this->fk0  = 0;
-    this->fk1 = 3.0;
-    /* initialization of fields must be done AFTER planning */
-    std::fill_n((rnumber*)this->cvorticity, this->cd->local_size*2, 0.0);
-    std::fill_n((rnumber*)this->cvelocity, this->cd->local_size*2, 0.0);
-    std::fill_n(this->rvelocity, this->cd->local_size*2, 0.0);
-    std::fill_n(this->rvorticity, this->cd->local_size*2, 0.0);
-    std::fill_n((rnumber*)this->cv[1], this->cd->local_size*2, 0.0);
-    std::fill_n(this->rv[1], this->cd->local_size*2, 0.0);
-    std::fill_n(this->rv[2], this->cd->local_size*2, 0.0);
-}
-
-template <class rnumber>
-fluid_solver<rnumber>::~fluid_solver()
-{
-    fftw_interface<rnumber>::destroy_plan(*this->c2r_vorticity);
-    fftw_interface<rnumber>::destroy_plan(*this->r2c_vorticity);
-    fftw_interface<rnumber>::destroy_plan(*this->c2r_velocity );
-    fftw_interface<rnumber>::destroy_plan(*this->r2c_velocity );
-    fftw_interface<rnumber>::destroy_plan(*this->vc2r[1]);
-    fftw_interface<rnumber>::destroy_plan(*this->vr2c[1]);
-    fftw_interface<rnumber>::destroy_plan(*this->vc2r[2]);
-    fftw_interface<rnumber>::destroy_plan(*this->vr2c[2]);
-
-    delete this->c2r_vorticity;
-    delete this->r2c_vorticity;
-    delete this->c2r_velocity ;
-    delete this->r2c_velocity ;
-    delete this->vc2r[1];
-    delete this->vr2c[1];
-    delete this->vc2r[2];
-    delete this->vr2c[2];
-
-    fftw_interface<rnumber>::free(this->cv[1]);
-    fftw_interface<rnumber>::free(this->rv[1]);
-    fftw_interface<rnumber>::free(this->cvorticity);
-    fftw_interface<rnumber>::free(this->rvorticity);
-    fftw_interface<rnumber>::free(this->cvelocity);
-    fftw_interface<rnumber>::free(this->rvelocity);
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::compute_vorticity()
-{
-    TIMEZONE("fluid_solver::compute_vorticity");
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){
-        // cindex indexing is thread safe (and tindex too) + it is a write
-        ptrdiff_t tindex = 3*cindex;
-        if (k2 <= this->kM2)
-        {
-            this->cvorticity[tindex+0][0] = -(this->ky[yindex]*this->cu[tindex+2][1] - this->kz[zindex]*this->cu[tindex+1][1]);
-            this->cvorticity[tindex+1][0] = -(this->kz[zindex]*this->cu[tindex+0][1] - this->kx[xindex]*this->cu[tindex+2][1]);
-            this->cvorticity[tindex+2][0] = -(this->kx[xindex]*this->cu[tindex+1][1] - this->ky[yindex]*this->cu[tindex+0][1]);
-            this->cvorticity[tindex+0][1] =  (this->ky[yindex]*this->cu[tindex+2][0] - this->kz[zindex]*this->cu[tindex+1][0]);
-            this->cvorticity[tindex+1][1] =  (this->kz[zindex]*this->cu[tindex+0][0] - this->kx[xindex]*this->cu[tindex+2][0]);
-            this->cvorticity[tindex+2][1] =  (this->kx[xindex]*this->cu[tindex+1][0] - this->ky[yindex]*this->cu[tindex+0][0]);
-        }
-        else{
-            std::fill_n((rnumber*)(this->cvorticity+tindex), 6, 0.0);
-        }
-    }
-    );
-    this->symmetrize(this->cvorticity, 3);
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::compute_velocity(rnumber (*__restrict__ vorticity)[2])
-{
-    TIMEZONE("fluid_solver::compute_velocity");
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){
-        // cindex indexing is thread safe (and tindex too) + it is a write
-        ptrdiff_t tindex = 3*cindex;
-        if (k2 <= this->kM2 && k2 > 0)
-        {
-            this->cu[tindex+0][0] = -(this->ky[yindex]*vorticity[tindex+2][1] - this->kz[zindex]*vorticity[tindex+1][1]) / k2;
-            this->cu[tindex+1][0] = -(this->kz[zindex]*vorticity[tindex+0][1] - this->kx[xindex]*vorticity[tindex+2][1]) / k2;
-            this->cu[tindex+2][0] = -(this->kx[xindex]*vorticity[tindex+1][1] - this->ky[yindex]*vorticity[tindex+0][1]) / k2;
-            this->cu[tindex+0][1] =  (this->ky[yindex]*vorticity[tindex+2][0] - this->kz[zindex]*vorticity[tindex+1][0]) / k2;
-            this->cu[tindex+1][1] =  (this->kz[zindex]*vorticity[tindex+0][0] - this->kx[xindex]*vorticity[tindex+2][0]) / k2;
-            this->cu[tindex+2][1] =  (this->kx[xindex]*vorticity[tindex+1][0] - this->ky[yindex]*vorticity[tindex+0][0]) / k2;
-        }
-        else
-            std::fill_n((rnumber*)(this->cu+tindex), 6, 0.0);
-    }
-    );
-    /*this->symmetrize(this->cu, 3);*/
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::ift_velocity()
-{
-    TIMEZONE("fluid_solver::ift_velocity");
-    fftw_interface<rnumber>::execute(*(this->c2r_velocity ));
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::ift_vorticity()
-{
-    TIMEZONE("fluid_solver::ift_vorticity");
-    std::fill_n(this->rvorticity, this->cd->local_size*2, 0.0);
-    fftw_interface<rnumber>::execute(*(this->c2r_vorticity ));
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::dft_velocity()
-{
-    TIMEZONE("fluid_solver::dft_velocity");
-    fftw_interface<rnumber>::execute(*(this->r2c_velocity ));
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::dft_vorticity()
-{
-    TIMEZONE("fluid_solver::dft_vorticity");
-    std::fill_n((rnumber*)this->cvorticity, this->cd->local_size*2, 0.0);
-    fftw_interface<rnumber>::execute(*(this->r2c_vorticity ));
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::add_forcing(
-        rnumber (*__restrict__ acc_field)[2], rnumber (*__restrict__ vort_field)[2], rnumber factor)
-{
-    TIMEZONE("fluid_solver::add_forcing");
-    if (strcmp(this->forcing_type, "none") == 0)
-        return;
-    if (strcmp(this->forcing_type, "Kolmogorov") == 0)
-    {
-        ptrdiff_t cindex;
-        if (this->cd->myrank == this->cd->rank[this->fmode])
-        {
-            cindex = ((this->fmode - this->cd->starts[0]) * this->cd->sizes[1])*this->cd->sizes[2]*3;
-            acc_field[cindex+2][0] -= this->famplitude*factor/2;
-        }
-        if (this->cd->myrank == this->cd->rank[this->cd->sizes[0] - this->fmode])
-        {
-            cindex = ((this->cd->sizes[0] - this->fmode - this->cd->starts[0]) * this->cd->sizes[1])*this->cd->sizes[2]*3;
-            acc_field[cindex+2][0] -= this->famplitude*factor/2;
-        }
-        return;
-    }
-    if (strcmp(this->forcing_type, "linear") == 0)
-    {
-        CLOOP(
-                    this,
-                    [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){
-            // cindex indexing is thread safe (and cindex*3+c too)
-            double knorm = sqrt(this->kx[xindex]*this->kx[xindex] +
-                         this->ky[yindex]*this->ky[yindex] +
-                         this->kz[zindex]*this->kz[zindex]);
-            if ((this->fk0 <= knorm) && (this->fk1 >= knorm))
-                for (int c=0; c<3; c++)
-                    for (int i=0; i<2; i++)
-                        acc_field[cindex*3+c][i] += this->famplitude*vort_field[cindex*3+c][i]*factor;
-        }
-        );
-        return;
-    }
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::omega_nonlin(
-        int src)
-{
-    TIMEZONE("fluid_solver::omega_nonlin");
-    assert(src >= 0 && src < 3);
-    this->compute_velocity(this->cv[src]);
-    /* get fields from Fourier space to real space */
-    {
-        TIMEZONE("fluid_solver::omega_nonlin::fftw");
-        fftw_interface<rnumber>::execute(*(this->c2r_velocity ));
-        fftw_interface<rnumber>::execute(*(this->vc2r[src]));
-    }
-    /* compute cross product $u \times \omega$, and normalize */
-    {
-        TIMEZONE("fluid_solver::omega_nonlin::RLOOP");
-        RLOOP (
-                    this,
-                    [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-            ptrdiff_t tindex = 3*rindex;
-            rnumber tmp[3][2];
-            for (int cc=0; cc<3; cc++)
-                tmp[cc][0] = (this->ru[tindex+(cc+1)%3]*this->rv[src][tindex+(cc+2)%3] -
-                        this->ru[tindex+(cc+2)%3]*this->rv[src][tindex+(cc+1)%3]);
-            // Access to rindex is thread safe so there is no overlap between threads
-            for (int cc=0; cc<3; cc++)
-                this->ru[(3*rindex)+cc] = tmp[cc][0] / this->normalization_factor;
-        }
-        );
-    }
-    /* go back to Fourier space */
-    this->clean_up_real_space(this->ru, 3);
-    {
-        TIMEZONE("fluid_solver::omega_nonlin::fftw-2");
-        fftw_interface<rnumber>::execute(*(this->r2c_velocity ));
-    }
-    this->dealias(this->cu, 3);
-    /* $\imath k \times Fourier(u \times \omega)$ */
-    {
-        TIMEZONE("fluid_solver::omega_nonlin::CLOOP");
-        CLOOP(
-                    this,
-                    [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){
-            rnumber tmp[3][2];
-            ptrdiff_t tindex = 3*cindex;
-            {
-                tmp[0][0] = -(this->ky[yindex]*this->cu[tindex+2][1] - this->kz[zindex]*this->cu[tindex+1][1]);
-                tmp[1][0] = -(this->kz[zindex]*this->cu[tindex+0][1] - this->kx[xindex]*this->cu[tindex+2][1]);
-                tmp[2][0] = -(this->kx[xindex]*this->cu[tindex+1][1] - this->ky[yindex]*this->cu[tindex+0][1]);
-                tmp[0][1] =  (this->ky[yindex]*this->cu[tindex+2][0] - this->kz[zindex]*this->cu[tindex+1][0]);
-                tmp[1][1] =  (this->kz[zindex]*this->cu[tindex+0][0] - this->kx[xindex]*this->cu[tindex+2][0]);
-                tmp[2][1] =  (this->kx[xindex]*this->cu[tindex+1][0] - this->ky[yindex]*this->cu[tindex+0][0]);
-            }
-            // cindex indexing is thread safe so it is 3*cindex so there is no overlap between threads
-            for (int cc=0; cc<3; cc++)
-                for (int i=0; i<2; i++)
-                    this->cu[tindex+cc][i] = tmp[cc][i];
-        }
-        );
-    }
-    {
-        TIMEZONE("fluid_solver::omega_nonlin::add_forcing");
-        this->add_forcing(this->cu, this->cv[src], 1.0);
-    }
-    {
-        TIMEZONE("fluid_solver::omega_nonlin::force_divfree");
-        this->force_divfree(this->cu);
-    }
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::step(double dt)
-{
-    TIMEZONE("fluid_solver::step");
-    std::fill_n((rnumber*)this->cv[1], this->cd->local_size*2, 0.0);
-    this->omega_nonlin(0);
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){
-        if (k2 <= this->kM2)
-        {
-            double factor0 = exp(-this->nu * k2 * dt);
-            // cindex indexing is thread safe so there is no overlap between threads
-            for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++)
-                this->cv[1][3*cindex+cc][i] = (this->cv[0][3*cindex+cc][i] +
-                    dt*this->cu[3*cindex+cc][i])*factor0;
-        }
-    }
-    );
-
-    this->omega_nonlin(1);
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){
-        if (k2 <= this->kM2)
-        {
-            double factor0 = exp(-this->nu * k2 * dt/2);
-            double factor1 = exp( this->nu * k2 * dt/2);
-            // cindex indexing is thread safe so there is no overlap between threads
-            for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++)
-                this->cv[2][3*cindex+cc][i] = (3*this->cv[0][3*cindex+cc][i]*factor0 +
-                    (this->cv[1][3*cindex+cc][i] +
-                    dt*this->cu[3*cindex+cc][i])*factor1)*0.25;
-        }
-    }
-    );
-
-    this->omega_nonlin(2);
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){
-        if (k2 <= this->kM2)
-        {
-            double factor0 = exp(-this->nu * k2 * dt * 0.5);
-            // cindex indexing is thread safe so there is no overlap between threads
-            for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++)
-                this->cv[3][3*cindex+cc][i] = (this->cv[0][3*cindex+cc][i]*factor0 +
-                    2*(this->cv[2][3*cindex+cc][i] +
-                    dt*this->cu[3*cindex+cc][i]))*factor0/3;
-        }
-    }
-    );
-
-    this->force_divfree(this->cvorticity);
-    this->symmetrize(this->cvorticity, 3);
-    this->iteration++;
-}
-
-template <class rnumber>
-int fluid_solver<rnumber>::read(char field, char representation)
-{
-    TIMEZONE("fluid_solver::read");
-    char fname[512];
-    int read_result;
-    if (field == 'v')
-    {
-        if (representation == 'c')
-        {
-            this->fill_up_filename("cvorticity", fname);
-            read_result = this->cd->read(fname, (void*)this->cvorticity);
-            if (read_result != EXIT_SUCCESS)
-                return read_result;
-        }
-        if (representation == 'r')
-        {
-            read_result = this->read_base("rvorticity", this->rvorticity);
-            if (read_result != EXIT_SUCCESS)
-                return read_result;
-            else
-                fftw_interface<rnumber>::execute(*(this->r2c_vorticity ));
-        }
-        this->low_pass_Fourier(this->cvorticity, 3, this->kM);
-        this->force_divfree(this->cvorticity);
-        this->symmetrize(this->cvorticity, 3);
-        return EXIT_SUCCESS;
-    }
-    if ((field == 'u') && (representation == 'c'))
-    {
-        read_result = this->read_base("cvelocity", this->cvelocity);
-        this->low_pass_Fourier(this->cvelocity, 3, this->kM);
-        this->force_divfree(this->cvorticity);
-        this->symmetrize(this->cvorticity, 3);
-        return read_result;
-    }
-    if ((field == 'u') && (representation == 'r'))
-        return this->read_base("rvelocity", this->rvelocity);
-    return EXIT_FAILURE;
-}
-
-template <class rnumber>
-int fluid_solver<rnumber>::write(char field, char representation)
-{
-    TIMEZONE("fluid_solver::write");
-    char fname[512];
-    if ((field == 'v') && (representation == 'c'))
-    {
-        this->fill_up_filename("cvorticity", fname);
-        return this->cd->write(fname, (void*)this->cvorticity);
-    }
-    if ((field == 'v') && (representation == 'r'))
-    {
-        fftw_interface<rnumber>::execute(*(this->c2r_vorticity ));
-        clip_zero_padding<rnumber>(this->rd, this->rvorticity, 3);
-        this->fill_up_filename("rvorticity", fname);
-        return this->rd->write(fname, this->rvorticity);
-    }
-    this->compute_velocity(this->cvorticity);
-    if ((field == 'u') && (representation == 'c'))
-    {
-        this->fill_up_filename("cvelocity", fname);
-        return this->cd->write(fname, this->cvelocity);
-    }
-    if ((field == 'u') && (representation == 'r'))
-    {
-        this->ift_velocity();
-        clip_zero_padding<rnumber>(this->rd, this->rvelocity, 3);
-        this->fill_up_filename("rvelocity", fname);
-        return this->rd->write(fname, this->rvelocity);
-    }
-    return EXIT_FAILURE;
-}
-
-template <class rnumber>
-int fluid_solver<rnumber>::write_rTrS2()
-{
-    TIMEZONE("fluid_solver::write_rTrS2");
-    char fname[512];
-    this->fill_up_filename("rTrS2", fname);
-    typename fftw_interface<rnumber>::complex *ca;
-    rnumber *ra;
-    ca = fftw_interface<rnumber>::alloc_complex(this->cd->local_size*3);
-    ra = (rnumber*)(ca);
-    this->compute_velocity(this->cvorticity);
-    this->compute_vector_gradient(ca, this->cvelocity);
-    for (int cc=0; cc<3; cc++)
-    {
-        std::copy(
-                    (rnumber*)(ca + cc*this->cd->local_size),
-                    (rnumber*)(ca + (cc+1)*this->cd->local_size),
-                    (rnumber*)this->cv[1]);
-        fftw_interface<rnumber>::execute(*(this->vc2r[1]));
-        std::copy(
-                    this->rv[1],
-                this->rv[1] + this->cd->local_size*2,
-                ra + cc*this->cd->local_size*2);
-    }
-    /* velocity gradient is now stored, in real space, in ra */
-    rnumber *dx_u, *dy_u, *dz_u;
-    dx_u = ra;
-    dy_u = ra + 2*this->cd->local_size;
-    dz_u = ra + 4*this->cd->local_size;
-    rnumber *trS2 = fftw_interface<rnumber>::alloc_real((this->cd->local_size/3)*2);
-    shared_array<double> average_local(1, [&](double* data){
-        data[0] = 0;
-    });
-
-    RLOOP(
-                this,
-                [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-        rnumber AxxAxx;
-        rnumber AyyAyy;
-        rnumber AzzAzz;
-        rnumber Sxy;
-        rnumber Syz;
-        rnumber Szx;
-        ptrdiff_t tindex = 3*rindex;
-        AxxAxx = dx_u[tindex+0]*dx_u[tindex+0];
-        AyyAyy = dy_u[tindex+1]*dy_u[tindex+1];
-        AzzAzz = dz_u[tindex+2]*dz_u[tindex+2];
-        Sxy = dx_u[tindex+1]+dy_u[tindex+0];
-        Syz = dy_u[tindex+2]+dz_u[tindex+1];
-        Szx = dz_u[tindex+0]+dx_u[tindex+2];
-        // rindex is thread safe + No overlap between thread it is a write
-        trS2[rindex] = (AxxAxx + AyyAyy + AzzAzz +
-                        (Sxy*Sxy + Syz*Syz + Szx*Szx)/2);
-        average_local.getMine()[0] += trS2[rindex];
-    }
-    );
-    average_local.mergeParallel();
-    double average;
-    MPI_Allreduce(
-                average_local.getMasterData(),
-                &average,
-                1,
-                MPI_DOUBLE, MPI_SUM, this->cd->comm);
-    DEBUG_MSG("average TrS2 is %g\n", average);
-    fftw_interface<rnumber>::free(ca);
-    /* output goes here */
-    int ntmp[3];
-    ntmp[0] = this->rd->sizes[0];
-    ntmp[1] = this->rd->sizes[1];
-    ntmp[2] = this->rd->sizes[2];
-    field_descriptor<rnumber> *scalar_descriptor = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), this->cd->comm);
-    clip_zero_padding<rnumber>(scalar_descriptor, trS2, 1);
-    int return_value = scalar_descriptor->write(fname, trS2);
-    delete scalar_descriptor;
-    fftw_interface<rnumber>::free(trS2);
-    return return_value;
-}
-
-template <class rnumber>
-int fluid_solver<rnumber>::write_renstrophy()
-{
-    TIMEZONE("fluid_solver::write_renstrophy");
-    char fname[512];
-    this->fill_up_filename("renstrophy", fname);
-    rnumber *enstrophy = fftw_interface<rnumber>::alloc_real((this->cd->local_size/3)*2);
-    this->ift_vorticity();
-    shared_array<double> average_local(1, [&](double* data){
-        data[0] = 0;
-    });
-
-    RLOOP(
-                this,
-                [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-        ptrdiff_t tindex = 3*rindex;
-        // rindex indexing is thread safe so there is no overlap between threads
-        enstrophy[rindex] = (
-                    this->rvorticity[tindex+0]*this->rvorticity[tindex+0] +
-                this->rvorticity[tindex+1]*this->rvorticity[tindex+1] +
-                this->rvorticity[tindex+2]*this->rvorticity[tindex+2]
-                )/2;
-        average_local.getMine()[0] += enstrophy[rindex];
-    }
-    );
-    average_local.mergeParallel();
-    double average;
-    MPI_Allreduce(
-                average_local.getMasterData(),
-                &average,
-                1,
-                MPI_DOUBLE, MPI_SUM, this->cd->comm);
-    DEBUG_MSG("average enstrophy is %g\n", average);
-    /* output goes here */
-    int ntmp[3];
-    ntmp[0] = this->rd->sizes[0];
-    ntmp[1] = this->rd->sizes[1];
-    ntmp[2] = this->rd->sizes[2];
-    field_descriptor<rnumber> *scalar_descriptor = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), this->cd->comm);
-    clip_zero_padding<rnumber>(scalar_descriptor, enstrophy, 1);
-    int return_value = scalar_descriptor->write(fname, enstrophy);
-    delete scalar_descriptor;
-    fftw_interface<rnumber>::free(enstrophy);
-    return return_value;
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::compute_pressure(rnumber (*__restrict__ pressure)[2])
-{
-    TIMEZONE("fluid_solver::compute_pressure");
-    /* assume velocity is already in real space representation */
-    /* diagonal terms 11 22 33 */
-    RLOOP (
-                this,
-                [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-        // rindex indexing is thread safe so there is no overlap between threads
-        ptrdiff_t tindex = 3*rindex;
-        for (int cc=0; cc<3; cc++)
-            this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+cc];
-    }
-    );
-    this->clean_up_real_space(this->rv[1], 3);
-    {
-        TIMEZONE("fftw_interface<rnumber>::execute");
-        fftw_interface<rnumber>::execute(*(this->vr2c[1]));
-    }
-    this->dealias(this->cv[1], 3);
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){
-        if (k2 <= this->kM2 && k2 > 0)
-        {
-            // cindex indexing is thread safe so there is no overlap between threads
-            ptrdiff_t tindex = 3*cindex;
-            for (int i=0; i<2; i++)
-            {
-                pressure[cindex][i] = -(this->kx[xindex]*this->kx[xindex]*this->cv[1][tindex+0][i] +
-                        this->ky[yindex]*this->ky[yindex]*this->cv[1][tindex+1][i] +
-                        this->kz[zindex]*this->kz[zindex]*this->cv[1][tindex+2][i]);
-            }
-        }
-        else
-            std::fill_n((rnumber*)(pressure+cindex), 2, 0.0);
-    }
-    );
-    /* off-diagonal terms 12 23 31 */
-    RLOOP (
-                this,
-                [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-        // rindex indexing is thread safe so there is no overlap between threads
-        ptrdiff_t tindex = 3*rindex;
-        for (int cc=0; cc<3; cc++)
-            this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+(cc+1)%3];
-    }
-    );
-    this->clean_up_real_space(this->rv[1], 3);
-    {
-        TIMEZONE("fftw_interface<rnumber>::execute");
-        fftw_interface<rnumber>::execute(*(this->vr2c[1]));
-    }
-    this->dealias(this->cv[1], 3);
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){
-        if (k2 <= this->kM2 && k2 > 0)
-        {
-            // cindex indexing is thread safe so there is no overlap between threads
-            ptrdiff_t tindex = 3*cindex;
-            for (int i=0; i<2; i++)
-            {
-                pressure[cindex][i] -= 2*(this->kx[xindex]*this->ky[yindex]*this->cv[1][tindex+0][i] +
-                        this->ky[yindex]*this->kz[zindex]*this->cv[1][tindex+1][i] +
-                        this->kz[zindex]*this->kx[xindex]*this->cv[1][tindex+2][i]);
-                pressure[cindex][i] /= this->normalization_factor*k2;
-            }
-        }
-    }
-    );
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::compute_gradient_statistics(
-        rnumber (*__restrict__ vec)[2],
-double *gradu_moments,
-double *trS2QR_moments,
-ptrdiff_t *gradu_hist,
-ptrdiff_t *trS2QR_hist,
-ptrdiff_t *QR2D_hist,
-double trS2QR_max_estimates[],
-double gradu_max_estimates[],
-int nbins,
-int QR2D_nbins)
-{
-    TIMEZONE("fluid_solver::compute_gradient_statistics");
-    typename fftw_interface<rnumber>::complex *ca;
-    rnumber *ra;
-    ca = fftw_interface<rnumber>::alloc_complex(this->cd->local_size*3);
-    ra = (rnumber*)(ca);
-    this->compute_vector_gradient(ca, vec);
-    for (int cc=0; cc<3; cc++)
-    {
-        std::copy(
-                    (rnumber*)(ca + cc*this->cd->local_size),
-                    (rnumber*)(ca + (cc+1)*this->cd->local_size),
-                    (rnumber*)this->cv[1]);
-        fftw_interface<rnumber>::execute(*(this->vc2r[1]));
-        std::copy(
-                    this->rv[1],
-                this->rv[1] + this->cd->local_size*2,
-                ra + cc*this->cd->local_size*2);
-    }
-    /* velocity gradient is now stored, in real space, in ra */
-    std::fill_n(this->rv[1], 2*this->cd->local_size, 0.0);
-    rnumber *dx_u, *dy_u, *dz_u;
-    dx_u = ra;
-    dy_u = ra + 2*this->cd->local_size;
-    dz_u = ra + 4*this->cd->local_size;
-    double binsize[2];
-    double tmp_max_estimate[3];
-    tmp_max_estimate[0] = trS2QR_max_estimates[0];
-    tmp_max_estimate[1] = trS2QR_max_estimates[1];
-    tmp_max_estimate[2] = trS2QR_max_estimates[2];
-    binsize[0] = 2*tmp_max_estimate[2] / QR2D_nbins;
-    binsize[1] = 2*tmp_max_estimate[1] / QR2D_nbins;
-    ptrdiff_t *local_hist = new ptrdiff_t[QR2D_nbins*QR2D_nbins];
-    std::fill_n(local_hist, QR2D_nbins*QR2D_nbins, 0);
-    RLOOP(
-                this,
-                [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-        rnumber AxxAxx;
-        rnumber AyyAyy;
-        rnumber AzzAzz;
-        rnumber AxyAyx;
-        rnumber AyzAzy;
-        rnumber AzxAxz;
-        rnumber Sxy;
-        rnumber Syz;
-        rnumber Szx;
-        // rindex indexing is thread safe so there is no overlap between threads
-        // tindex[0:2] is thread safe too
-        ptrdiff_t tindex = 3*rindex;
-        AxxAxx = dx_u[tindex+0]*dx_u[tindex+0];
-        AyyAyy = dy_u[tindex+1]*dy_u[tindex+1];
-        AzzAzz = dz_u[tindex+2]*dz_u[tindex+2];
-        AxyAyx = dx_u[tindex+1]*dy_u[tindex+0];
-        AyzAzy = dy_u[tindex+2]*dz_u[tindex+1];
-        AzxAxz = dz_u[tindex+0]*dx_u[tindex+2];
-        this->rv[1][tindex+1] = - (AxxAxx + AyyAyy + AzzAzz)/2 - AxyAyx - AyzAzy - AzxAxz;
-        this->rv[1][tindex+2] = - (dx_u[tindex+0]*(AxxAxx/3 + AxyAyx + AzxAxz) +
-                dy_u[tindex+1]*(AyyAyy/3 + AxyAyx + AyzAzy) +
-                dz_u[tindex+2]*(AzzAzz/3 + AzxAxz + AyzAzy) +
-                dx_u[tindex+1]*dy_u[tindex+2]*dz_u[tindex+0] +
-                dx_u[tindex+2]*dy_u[tindex+0]*dz_u[tindex+1]);
-        int bin0 = int(floor((this->rv[1][tindex+2] + tmp_max_estimate[2]) / binsize[0]));
-        int bin1 = int(floor((this->rv[1][tindex+1] + tmp_max_estimate[1]) / binsize[1]));
-        if ((bin0 >= 0 && bin0 < QR2D_nbins) &&
-                (bin1 >= 0 && bin1 < QR2D_nbins))
-            local_hist[bin1*QR2D_nbins + bin0]++;
-        Sxy = dx_u[tindex+1]+dy_u[tindex+0];
-        Syz = dy_u[tindex+2]+dz_u[tindex+1];
-        Szx = dz_u[tindex+0]+dx_u[tindex+2];
-        this->rv[1][tindex] = (AxxAxx + AyyAyy + AzzAzz +
-                               (Sxy*Sxy + Syz*Syz + Szx*Szx)/2);
-    }
-    );
-    MPI_Allreduce(
-                local_hist,
-                QR2D_hist,
-                QR2D_nbins * QR2D_nbins,
-                MPI_INT64_T, MPI_SUM, this->cd->comm);
-    delete[] local_hist;
-    this->compute_rspace_stats3(
-                this->rv[1],
-            trS2QR_moments,
-            trS2QR_hist,
-            tmp_max_estimate,
-            nbins);
-    double *tmp_moments = new double[10*3];
-    ptrdiff_t *tmp_hist = new ptrdiff_t[nbins*3];
-    for (int cc=0; cc<3; cc++)
-    {
-        tmp_max_estimate[0] = gradu_max_estimates[cc*3 + 0];
-        tmp_max_estimate[1] = gradu_max_estimates[cc*3 + 1];
-        tmp_max_estimate[2] = gradu_max_estimates[cc*3 + 2];
-        this->compute_rspace_stats3(
-                    dx_u + cc*2*this->cd->local_size,
-                    tmp_moments,
-                    tmp_hist,
-                    tmp_max_estimate,
-                    nbins);
-        for (int n = 0; n < 10; n++)
-            for (int i = 0; i < 3 ; i++)
-            {
-                gradu_moments[(n*3 + cc)*3 + i] = tmp_moments[n*3 + i];
-            }
-        for (int n = 0; n < nbins; n++)
-            for (int i = 0; i < 3; i++)
-            {
-                gradu_hist[(n*3 + cc)*3 + i] = tmp_hist[n*3 + i];
-            }
-    }
-    delete[] tmp_moments;
-    delete[] tmp_hist;
-    fftw_interface<rnumber>::free(ca);
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::compute_Lagrangian_acceleration(rnumber (*acceleration)[2])
-{
-    TIMEZONE("fluid_solver::compute_Lagrangian_acceleration");
-    typename fftw_interface<rnumber>::complex *pressure;
-    pressure = fftw_interface<rnumber>::alloc_complex(this->cd->local_size/3);
-    this->compute_velocity(this->cvorticity);
-    this->ift_velocity();
-    this->compute_pressure(pressure);
-    this->compute_velocity(this->cvorticity);
-    std::fill_n((rnumber*)this->cv[1], 2*this->cd->local_size, 0.0);
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){
-        if (k2 <= this->kM2)
-        {
-            // cindex indexing is thread safe so there is no overlap between threads
-            ptrdiff_t tindex = 3*cindex;
-            for (int cc=0; cc<3; cc++)
-                for (int i=0; i<2; i++)
-                    this->cv[1][tindex+cc][i] = - this->nu*k2*this->cu[tindex+cc][i];
-            if (strcmp(this->forcing_type, "linear") == 0)
-            {
-                double knorm = sqrt(k2);
-                if ((this->fk0 <= knorm) &&
-                        (this->fk1 >= knorm))
-                    for (int c=0; c<3; c++)
-                        for (int i=0; i<2; i++)
-                            this->cv[1][tindex+c][i] += this->famplitude*this->cu[tindex+c][i];
-            }
-            this->cv[1][tindex+0][0] += this->kx[xindex]*pressure[cindex][1];
-            this->cv[1][tindex+1][0] += this->ky[yindex]*pressure[cindex][1];
-            this->cv[1][tindex+2][0] += this->kz[zindex]*pressure[cindex][1];
-            this->cv[1][tindex+0][1] -= this->kx[xindex]*pressure[cindex][0];
-            this->cv[1][tindex+1][1] -= this->ky[yindex]*pressure[cindex][0];
-            this->cv[1][tindex+2][1] -= this->kz[zindex]*pressure[cindex][0];
-        }
-    }
-    );
-    std::copy(
-                (rnumber*)this->cv[1],
-            (rnumber*)(this->cv[1] + this->cd->local_size),
-            (rnumber*)acceleration);
-    fftw_interface<rnumber>::free(pressure);
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::compute_Eulerian_acceleration(rnumber (*__restrict__ acceleration)[2])
-{
-    TIMEZONE("fluid_solver::compute_Eulerian_acceleration");
-    std::fill_n((rnumber*)(acceleration), 2*this->cd->local_size, 0.0);
-    this->compute_velocity(this->cvorticity);
-    /* put in linear terms */
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){
-        if (k2 <= this->kM2)
-        {
-            // cindex indexing is thread safe so there is no overlap between threads
-            ptrdiff_t tindex = 3*cindex;
-            for (int cc=0; cc<3; cc++)
-                for (int i=0; i<2; i++)
-                    acceleration[tindex+cc][i] = - this->nu*k2*this->cu[tindex+cc][i];
-            if (strcmp(this->forcing_type, "linear") == 0)
-            {
-                double knorm = sqrt(k2);
-                if ((this->fk0 <= knorm) &&
-                        (this->fk1 >= knorm))
-                {
-                    for (int c=0; c<3; c++)
-                        for (int i=0; i<2; i++)
-                            acceleration[tindex+c][i] += this->famplitude*this->cu[tindex+c][i];
-                }
-            }
-        }
-    }
-    );
-    this->ift_velocity();
-    /* compute uu */
-    /* 11 22 33 */
-    RLOOP (
-                this,
-                [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-        // cindex indexing is thread safe so there is no overlap between threads
-        ptrdiff_t tindex = 3*rindex;
-        for (int cc=0; cc<3; cc++)
-            this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+cc] / this->normalization_factor;
-    }
-    );
-    this->clean_up_real_space(this->rv[1], 3);
-    fftw_interface<rnumber>::execute(*(this->vr2c[1]));
-    this->dealias(this->cv[1], 3);
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){
-        if (k2 <= this->kM2)
-        {
-            // cindex indexing is thread safe so there is no overlap between threads
-            ptrdiff_t tindex = 3*cindex;
-            acceleration[tindex+0][0] +=
-                    this->kx[xindex]*this->cv[1][tindex+0][1];
-            acceleration[tindex+0][1] +=
-                    -this->kx[xindex]*this->cv[1][tindex+0][0];
-            acceleration[tindex+1][0] +=
-                    this->ky[yindex]*this->cv[1][tindex+1][1];
-            acceleration[tindex+1][1] +=
-                    -this->ky[yindex]*this->cv[1][tindex+1][0];
-            acceleration[tindex+2][0] +=
-                    this->kz[zindex]*this->cv[1][tindex+2][1];
-            acceleration[tindex+2][1] +=
-                    -this->kz[zindex]*this->cv[1][tindex+2][0];
-        }
-    }
-    );
-    /* 12 23 31 */
-    RLOOP (
-                this,
-                [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-        // cindex indexing is thread safe so there is no overlap between threads
-        ptrdiff_t tindex = 3*rindex;
-        for (int cc=0; cc<3; cc++)
-            this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+(cc+1)%3] / this->normalization_factor;
-    }
-    );
-    this->clean_up_real_space(this->rv[1], 3);
-    fftw_interface<rnumber>::execute(*(this->vr2c[1]));
-    this->dealias(this->cv[1], 3);
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){
-        if (k2 <= this->kM2)
-        {
-            // cindex indexing is thread safe so there is no overlap between threads
-            ptrdiff_t tindex = 3*cindex;
-            acceleration[tindex+0][0] +=
-                    (this->ky[yindex]*this->cv[1][tindex+0][1] +
-                    this->kz[zindex]*this->cv[1][tindex+2][1]);
-            acceleration[tindex+0][1] +=
-                    - (this->ky[yindex]*this->cv[1][tindex+0][0] +
-                    this->kz[zindex]*this->cv[1][tindex+2][0]);
-            acceleration[tindex+1][0] +=
-                    (this->kz[zindex]*this->cv[1][tindex+1][1] +
-                    this->kx[xindex]*this->cv[1][tindex+0][1]);
-            acceleration[tindex+1][1] +=
-                    - (this->kz[zindex]*this->cv[1][tindex+1][0] +
-                    this->kx[xindex]*this->cv[1][tindex+0][0]);
-            acceleration[tindex+2][0] +=
-                    (this->kx[xindex]*this->cv[1][tindex+2][1] +
-                    this->ky[yindex]*this->cv[1][tindex+1][1]);
-            acceleration[tindex+2][1] +=
-                    - (this->kx[xindex]*this->cv[1][tindex+2][0] +
-                    this->ky[yindex]*this->cv[1][tindex+1][0]);
-        }
-    }
-    );
-    if (this->cd->myrank == this->cd->rank[0])
-        std::fill_n((rnumber*)(acceleration), 6, 0.0);
-    this->force_divfree(acceleration);
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::compute_Lagrangian_acceleration(rnumber *__restrict__ acceleration)
-{
-    TIMEZONE("fluid_solver::compute_Lagrangian_acceleration");
-    this->compute_Lagrangian_acceleration((typename fftw_interface<rnumber>::complex*)acceleration);
-    fftw_interface<rnumber>::execute(*(this->vc2r[1]));
-    std::copy(
-                this->rv[1],
-            this->rv[1] + 2*this->cd->local_size,
-            acceleration);
-}
-
-template <class rnumber>
-int fluid_solver<rnumber>::write_rpressure()
-{
-    TIMEZONE("fluid_solver::write_rpressure");
-    char fname[512];
-    typename fftw_interface<rnumber>::complex *pressure;
-    pressure = fftw_interface<rnumber>::alloc_complex(this->cd->local_size/3);
-    this->compute_velocity(this->cvorticity);
-    this->ift_velocity();
-    this->compute_pressure(pressure);
-    this->fill_up_filename("rpressure", fname);
-    rnumber *rpressure = fftw_interface<rnumber>::alloc_real((this->cd->local_size/3)*2);
-    typename fftw_interface<rnumber>::plan c2r;
-    c2r = fftw_interface<rnumber>::mpi_plan_dft_c2r_3d(
-                this->rd->sizes[0], this->rd->sizes[1], this->rd->sizes[2],
-            pressure, rpressure, this->cd->comm,
-            this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN);
-    fftw_interface<rnumber>::execute(c2r);
-    /* output goes here */
-    int ntmp[3];
-    ntmp[0] = this->rd->sizes[0];
-    ntmp[1] = this->rd->sizes[1];
-    ntmp[2] = this->rd->sizes[2];
-    field_descriptor<rnumber> *scalar_descriptor = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), this->cd->comm);
-    clip_zero_padding<rnumber>(scalar_descriptor, rpressure, 1);
-    int return_value = scalar_descriptor->write(fname, rpressure);
-    delete scalar_descriptor;
-    fftw_interface<rnumber>::destroy_plan(c2r);
-    fftw_interface<rnumber>::free(pressure);
-    fftw_interface<rnumber>::free(rpressure);
-    return return_value;
-}
-
-/*****************************************************************************/
-
-
-
-
-/*****************************************************************************/
-/* finally, force generation of code for single precision                    */
-template class fluid_solver<float>;
-template class fluid_solver<double>;
-/*****************************************************************************/
-
diff --git a/bfps/cpp/fluid_solver.hpp b/bfps/cpp/fluid_solver.hpp
deleted file mode 100644
index aaddbb59b4a29530779e3dba81f90a06c790bdcb..0000000000000000000000000000000000000000
--- a/bfps/cpp/fluid_solver.hpp
+++ /dev/null
@@ -1,120 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <iostream>
-#include "field_descriptor.hpp"
-#include "fluid_solver_base.hpp"
-
-#ifndef FLUID_SOLVER
-
-#define FLUID_SOLVER
-
-extern int myrank, nprocs;
-
-
-/* container for field descriptor, fields themselves, parameters, etc
- * using the same big macro idea that they're using in fftw3.h
- * I feel like I should quote:  Ugh.
- * */
-
-template <class rnumber>
-class fluid_solver:public fluid_solver_base<rnumber>
-{
-    public:
-        /* fields */
-        rnumber *rvorticity;
-        rnumber *rvelocity ;
-        typename fluid_solver_base<rnumber>::cnumber *cvorticity;
-        typename fluid_solver_base<rnumber>::cnumber *cvelocity ;
-
-        /* short names for velocity, and 4 vorticity fields */
-        rnumber *ru, *rv[4];
-        typename fluid_solver_base<rnumber>::cnumber *cu, *cv[4];
-
-        /* plans */
-        typename fftw_interface<rnumber>::many_plan *c2r_vorticity;
-        typename fftw_interface<rnumber>::many_plan *r2c_vorticity;
-        typename fftw_interface<rnumber>::many_plan *c2r_velocity;
-        typename fftw_interface<rnumber>::many_plan *r2c_velocity;
-        typename fftw_interface<rnumber>::many_plan *uc2r, *ur2c;
-        typename fftw_interface<rnumber>::many_plan *vr2c[3], *vc2r[3];
-
-        /* physical parameters */
-        double nu;
-        int fmode;         // for Kolmogorov flow
-        double famplitude; // both for Kflow and band forcing
-        double fk0, fk1;   // for band forcing
-        char forcing_type[128];
-
-        /* methods */
-        fluid_solver(
-                const char *NAME,
-                int nx,
-                int ny,
-                int nz,
-                double DKX = 1.0,
-                double DKY = 1.0,
-                double DKZ = 1.0,
-                int DEALIAS_TYPE = 1,
-                unsigned FFTW_PLAN_RIGOR = FFTW_MEASURE);
-        ~fluid_solver(void);
-
-        void compute_gradient_statistics(
-                rnumber (*__restrict__ vec)[2],
-                double *__restrict__ gradu_moments,
-                double *__restrict__ trS2_Q_R_moments,
-                ptrdiff_t *__restrict__ gradu_histograms,
-                ptrdiff_t *__restrict__ trS2_Q_R_histograms,
-                ptrdiff_t *__restrict__ QR2D_histogram,
-                double trS2_Q_R_max_estimates[3],
-                double gradu_max_estimates[9],
-                const int nbins_1D = 256,
-                const int nbins_2D = 64);
-
-        void compute_vorticity(void);
-        void compute_velocity(rnumber (*__restrict__ vorticity)[2]);
-        void compute_pressure(rnumber (*__restrict__ pressure)[2]);
-        void compute_Eulerian_acceleration(rnumber (*__restrict__ dst)[2]);
-        void compute_Lagrangian_acceleration(rnumber (*__restrict__ dst)[2]);
-        void compute_Lagrangian_acceleration(rnumber *__restrict__ dst);
-        void ift_velocity();
-        void dft_velocity();
-        void ift_vorticity();
-        void dft_vorticity();
-        void omega_nonlin(int src);
-        void step(double dt);
-        void impose_zero_modes(void);
-        void add_forcing(rnumber (*__restrict__ acc_field)[2], rnumber (*__restrict__ vort_field)[2], rnumber factor);
-
-        int read(char field, char representation);
-        int write(char field, char representation);
-        int write_rTrS2();
-        int write_renstrophy();
-        int write_rpressure();
-};
-
-#endif//FLUID_SOLVER
-
diff --git a/bfps/cpp/fluid_solver_base.cpp b/bfps/cpp/fluid_solver_base.cpp
deleted file mode 100644
index b1d64ef5ce8294efa53cac23b391700b6b8574d3..0000000000000000000000000000000000000000
--- a/bfps/cpp/fluid_solver_base.cpp
+++ /dev/null
@@ -1,834 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#define NDEBUG
-
-#include <cassert>
-#include <cmath>
-#include <cstring>
-#include "base.hpp"
-#include "fluid_solver_base.hpp"
-#include "fftw_tools.hpp"
-#include "scope_timer.hpp"
-#include "shared_array.hpp"
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::fill_up_filename(const char *base_name, char *destination)
-{
-    sprintf(destination, "%s_%s_i%.5x", this->name, base_name, this->iteration);
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::clean_up_real_space(rnumber *a, int howmany)
-{
-    TIMEZONE("fluid_solver_base::clean_up_real_space");
-    for (ptrdiff_t rindex = 0; rindex < this->cd->local_size*2; rindex += howmany*(this->rd->subsizes[2]+2))
-        std::fill_n(a+rindex+this->rd->subsizes[2]*howmany, 2*howmany, 0.0);
-}
-
-template <class rnumber>
-double fluid_solver_base<rnumber>::autocorrel(cnumber *a)
-{
-    double *spec = fftw_interface<double>::alloc_real(this->nshells*9);
-    double sum_local;
-    this->cospectrum(a, a, spec);
-    sum_local = 0.0;
-    for (unsigned int n = 0; n < this->nshells; n++)
-    {
-        sum_local += spec[n*9] + spec[n*9 + 4] + spec[n*9 + 8];
-    }
-    fftw_interface<double>::free(spec);
-    return sum_local;
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::cospectrum(cnumber *a, cnumber *b, double *spec)
-{
-    TIMEZONE("fluid_solver_base::cospectrum");
-    shared_array<double> cospec_local_thread(this->nshells*9,[&](double* cospec_local){
-        std::fill_n(cospec_local, this->nshells*9, 0);
-    });
-
-    CLOOP_K2_NXMODES(
-                this,
-
-                [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/,
-                ptrdiff_t /*zindex*/, double k2, int nxmodes){
-        if (k2 <= this->kMspec2)
-        {
-            int tmp_int = int(sqrt(k2)/this->dk)*9;
-            double* cospec_local = cospec_local_thread.getMine();
-            for (int i=0; i<3; i++)
-                for (int j=0; j<3; j++)
-                {
-                    cospec_local[tmp_int+i*3+j] += nxmodes * (
-                                (*(a + 3*cindex+i))[0] * (*(b + 3*cindex+j))[0] +
-                            (*(a + 3*cindex+i))[1] * (*(b + 3*cindex+j))[1]);
-                }
-        }}
-    );
-    cospec_local_thread.mergeParallel();
-    MPI_Allreduce(
-                cospec_local_thread.getMasterData(),
-                (void*)spec,
-                this->nshells*9,
-                MPI_DOUBLE, MPI_SUM, this->cd->comm);
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::cospectrum(cnumber *a, cnumber *b, double *spec, const double k2exponent)
-{
-    TIMEZONE("fluid_solver_base::cospectrum2");
-    shared_array<double> cospec_local_thread(this->nshells*9,[&](double* cospec_local){
-        std::fill_n(cospec_local, this->nshells*9, 0);
-    });
-
-    CLOOP_K2_NXMODES(
-                this,
-
-                [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/,
-                ptrdiff_t /*zindex*/, double k2, int nxmodes){
-        if (k2 <= this->kMspec2)
-        {
-            double factor = nxmodes*pow(k2, k2exponent);
-            int tmp_int = int(sqrt(k2)/this->dk)*9;
-            double* cospec_local = cospec_local_thread.getMine();
-            for (int i=0; i<3; i++)
-                for (int j=0; j<3; j++)
-                {
-                    cospec_local[tmp_int+i*3+j] += factor * (
-                                (*(a + 3*cindex+i))[0] * (*(b + 3*cindex+j))[0] +
-                            (*(a + 3*cindex+i))[1] * (*(b + 3*cindex+j))[1]);
-                }
-        }}
-    );
-    cospec_local_thread.mergeParallel();
-    MPI_Allreduce(
-                cospec_local_thread.getMasterData(),
-                (void*)spec,
-                this->nshells*9,
-                MPI_DOUBLE, MPI_SUM, this->cd->comm);
-    //for (int n=0; n<this->nshells; n++)
-    //{
-    //    spec[n] *= 12.5663706144*pow(this->kshell[n], 2) / this->nshell[n];
-    //    /*is normalization needed?
-    //     * spec[n] /= this->normalization_factor*/
-    //}
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::compute_rspace_stats(
-        const rnumber *a,
-        const hid_t group,
-        const std::string dset_name,
-        const hsize_t toffset,
-        const std::vector<double> max_estimate)
-{
-    TIMEZONE("fluid_solver_base::compute_rspace_stats");
-    const int nmoments = 10;
-    int nvals, nbins;
-    if (this->rd->myrank == 0)
-    {
-        hid_t dset, wspace;
-        hsize_t dims[3];
-        int ndims;
-        dset = H5Dopen(group, ("moments/" + dset_name).c_str(), H5P_DEFAULT);
-        wspace = H5Dget_space(dset);
-        ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);
-        assert(ndims == 3);
-        variable_used_only_in_assert(ndims);
-        assert(dims[1] == nmoments);
-        nvals = dims[2];
-        H5Sclose(wspace);
-        H5Dclose(dset);
-        dset = H5Dopen(group, ("histograms/" + dset_name).c_str(), H5P_DEFAULT);
-        wspace = H5Dget_space(dset);
-        ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);
-        assert(ndims == 3);
-        nbins = dims[1];
-        assert(nvals == dims[2]);
-        H5Sclose(wspace);
-        H5Dclose(dset);
-    }
-    MPI_Bcast(&nvals, 1, MPI_INT, 0, this->rd->comm);
-    MPI_Bcast(&nbins, 1, MPI_INT, 0, this->rd->comm);
-    assert(nvals == max_estimate.size());
-    shared_array<double> threaded_local_moments(nmoments*nvals, [&](double* local_moments){
-        std::fill_n(local_moments, nmoments*nvals, 0);
-        if (nvals == 4) local_moments[3] = max_estimate[3];
-    });
-
-    shared_array<double> threaded_val_tmp(nvals);
-
-    shared_array<ptrdiff_t> threaded_local_hist(nbins*nvals, [&](ptrdiff_t* local_hist){
-        std::fill_n(local_hist, nbins*nvals, 0);
-    });
-
-    // Not written by threads
-    double *binsize = new double[nvals];
-    for (int i=0; i<nvals; i++)
-        binsize[i] = 2*max_estimate[i] / nbins;
-
-    RLOOP(
-                this,
-                [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-        double *val_tmp = threaded_val_tmp.getMine();
-        ptrdiff_t* local_hist = threaded_local_hist.getMine();
-        double *local_moments = threaded_local_moments.getMine();
-
-        if (nvals == 4) val_tmp[3] = 0.0;
-        for (int i=0; i<3; i++)
-        {
-            val_tmp[i] = a[rindex*3+i];
-            if (nvals == 4) val_tmp[3] += val_tmp[i]*val_tmp[i];
-        }
-        if (nvals == 4)
-        {
-            val_tmp[3] = sqrt(val_tmp[3]);
-            if (val_tmp[3] < local_moments[0*nvals+3])
-                local_moments[0*nvals+3] = val_tmp[3];
-            if (val_tmp[3] > local_moments[9*nvals+3])
-                local_moments[9*nvals+3] = val_tmp[3];
-            int bin = int(floor(val_tmp[3]*2/binsize[3]));
-            if (bin >= 0 && bin < nbins)
-                local_hist[bin*nvals+3]++;
-        }
-        for (int i=0; i<3; i++)
-        {
-            if (val_tmp[i] < local_moments[0*nvals+i])
-                local_moments[0*nvals+i] = val_tmp[i];
-            if (val_tmp[i] > local_moments[(nmoments-1)*nvals+i])
-                local_moments[(nmoments-1)*nvals+i] = val_tmp[i];
-            int bin = int(floor((val_tmp[i] + max_estimate[i]) / binsize[i]));
-            if (bin >= 0 && bin < nbins)
-                local_hist[bin*nvals+i]++;
-        }
-        for (int n=1; n < nmoments-1; n++){
-            double pow_tmp = 1.;
-            for (int i=0; i<nvals; i++){
-                local_moments[n*nvals + i] += (pow_tmp = val_tmp[i]*pow_tmp);
-            }
-        }
-    }
-    );
-
-    threaded_local_hist.mergeParallel();
-    threaded_local_moments.mergeParallel([&](const int idx, const double& v1, const double& v2) -> double {
-          if(nvals == int(4) && idx == 0*nvals+3){
-              return std::min(v1, v2);  
-          }
-          if(nvals == int(4) && idx == 9*nvals+3){
-              return std::max(v1, v2);  
-          }
-          if(idx < 3){
-              return std::min(v1, v2);        
-          }      
-          if((nmoments-1)*nvals <= idx && idx < (nmoments-1)*nvals+3){
-              return std::max(v1, v2);        
-          }
-          return v1 + v2;
-      });
-
-
-    double *moments = new double[nmoments*nvals];
-    MPI_Allreduce(
-                threaded_local_moments.getMasterData(),
-                (void*)moments,
-                nvals,
-                MPI_DOUBLE, MPI_MIN, this->cd->comm);
-    MPI_Allreduce(
-                (threaded_local_moments.getMasterData() + nvals),
-                (void*)(moments+nvals),
-                (nmoments-2)*nvals,
-                MPI_DOUBLE, MPI_SUM, this->cd->comm);
-    MPI_Allreduce(
-                (threaded_local_moments.getMasterData() + (nmoments-1)*nvals),
-                (void*)(moments+(nmoments-1)*nvals),
-                nvals,
-                MPI_DOUBLE, MPI_MAX, this->cd->comm);
-    ptrdiff_t *hist = new ptrdiff_t[nbins*nvals];
-    MPI_Allreduce(
-                threaded_local_hist.getMasterData(),
-                (void*)hist,
-                nbins*nvals,
-                MPI_INT64_T, MPI_SUM, this->cd->comm);
-    for (int n=1; n < nmoments-1; n++)
-        for (int i=0; i<nvals; i++)
-            moments[n*nvals + i] /= this->normalization_factor;
-    delete[] binsize;
-    if (this->rd->myrank == 0)
-    {
-        hid_t dset, wspace, mspace;
-        hsize_t count[3], offset[3], dims[3];
-        dset = H5Dopen(group, ("moments/" + dset_name).c_str(), H5P_DEFAULT);
-        wspace = H5Dget_space(dset);
-        H5Sget_simple_extent_dims(wspace, dims, NULL);
-        offset[0] = toffset;
-        offset[1] = 0;
-        offset[2] = 0;
-        count[0] = 1;
-        count[1] = nmoments;
-        count[2] = nvals;
-        mspace = H5Screate_simple(3, count, NULL);
-        H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
-        H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, moments);
-        H5Sclose(wspace);
-        H5Sclose(mspace);
-        H5Dclose(dset);
-        dset = H5Dopen(group, ("histograms/" + dset_name).c_str(), H5P_DEFAULT);
-        wspace = H5Dget_space(dset);
-        count[1] = nbins;
-        mspace = H5Screate_simple(3, count, NULL);
-        H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
-        H5Dwrite(dset, H5T_NATIVE_INT64, mspace, wspace, H5P_DEFAULT, hist);
-        H5Sclose(wspace);
-        H5Sclose(mspace);
-        H5Dclose(dset);
-    }
-    delete[] moments;
-    delete[] hist;
-}
-
-
-
-template <class rnumber>
-template<int nvals>
-void fluid_solver_base<rnumber>::compute_rspace_stats(
-        rnumber *a,
-        double *moments,
-        ptrdiff_t *hist,
-        double max_estimate[],
-        const int nbins)
-{
-    TIMEZONE("fluid_solver_base::compute_rspace_stats");
-    shared_array<double> threaded_local_moments(10*nvals,[&](double* local_moments){
-        std::fill_n(local_moments, 10*nvals, 0);
-        if (nvals == 4) local_moments[3] = max_estimate[3];
-    });
-
-    shared_array<ptrdiff_t> threaded_local_hist(nbins*nvals, [&](ptrdiff_t* local_hist){
-        std::fill_n(local_hist, nbins*nvals, 0);
-    });
-
-    // Will not be modified by the threads
-    double binsize[nvals];
-    for (int i=0; i<nvals; i++)
-        binsize[i] = 2*max_estimate[i] / nbins;
-
-    RLOOP(
-                this,
-                [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-        ptrdiff_t *local_hist = threaded_local_hist.getMine();
-        double *local_moments = threaded_local_moments.getMine();
-
-        double val_tmp[nvals];
-        if (nvals == 4) val_tmp[3] = 0.0;
-        for (int i=0; i<3; i++)
-        {
-            val_tmp[i] = a[rindex*3+i];
-            if (nvals == 4) val_tmp[3] += val_tmp[i]*val_tmp[i];
-        }
-        if (nvals == 4)
-        {
-            val_tmp[3] = sqrt(val_tmp[3]);
-            if (val_tmp[3] < local_moments[0*nvals+3])
-                local_moments[0*nvals+3] = val_tmp[3];
-            if (val_tmp[3] > local_moments[9*nvals+3])
-                local_moments[9*nvals+3] = val_tmp[3];
-            int bin = int(floor(val_tmp[3]*2/binsize[3]));
-            if (bin >= 0 && bin < nbins)
-                local_hist[bin*nvals+3]++;
-        }
-        for (int i=0; i<3; i++)
-        {
-            if (val_tmp[i] < local_moments[0*nvals+i])
-                local_moments[0*nvals+i] = val_tmp[i];
-            if (val_tmp[i] > local_moments[9*nvals+i])
-                local_moments[9*nvals+i] = val_tmp[i];
-            int bin = int(floor((val_tmp[i] + max_estimate[i]) / binsize[i]));
-            if (bin >= 0 && bin < nbins)
-                local_hist[bin*nvals+i]++;
-        }
-        for (int n=1; n<9; n++){
-            double pow_tmp = 1;
-            for (int i=0; i<nvals; i++){
-                local_moments[n*nvals + i] += (pow_tmp = val_tmp[i]*pow_tmp);
-            }
-        }
-    }
-    );
-
-    threaded_local_moments.mergeParallel([&](const int idx, const double& v1, const double& v2) -> double {
-          if(nvals == int(4) && idx == 0*nvals+3){
-              return std::min(v1, v2);  
-          }
-          if(nvals == int(4) && idx == 9*nvals+3){
-              return std::max(v1, v2);  
-          }
-          if(idx < 3){
-              return std::min(v1, v2);        
-          }      
-          if(9*nvals <= idx && idx < 9*nvals+3){
-              return std::max(v1, v2);        
-          }
-          return v1 + v2;
-      });
-    threaded_local_hist.mergeParallel();
-
-    MPI_Allreduce(
-                threaded_local_moments.getMasterData(),
-                (void*)moments,
-                nvals,
-                MPI_DOUBLE, MPI_MIN, this->cd->comm);
-    MPI_Allreduce(
-                (threaded_local_moments.getMasterData() + nvals),
-                (void*)(moments+nvals),
-                8*nvals,
-                MPI_DOUBLE, MPI_SUM, this->cd->comm);
-    MPI_Allreduce(
-                (threaded_local_moments.getMasterData() + 9*nvals),
-                (void*)(moments+9*nvals),
-                nvals,
-                MPI_DOUBLE, MPI_MAX, this->cd->comm);
-    MPI_Allreduce(
-                (void*)threaded_local_hist.getMasterData(),
-                (void*)hist,
-                nbins*nvals,
-                MPI_INT64_T, MPI_SUM, this->cd->comm);
-    for (int n=1; n<9; n++)
-        for (int i=0; i<nvals; i++)
-            moments[n*nvals + i] /= this->normalization_factor;
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::write_spectrum(const char *fname, cnumber *a, const double k2exponent)
-{
-    TIMEZONE("fluid_solver_base::write_spectrum");
-    double *spec = fftw_interface<double>::alloc_real(this->nshells);
-    this->cospectrum(a, a, spec, k2exponent);
-    if (this->cd->myrank == 0)
-    {
-        FILE *spec_file;
-        char full_name[512];
-        sprintf(full_name, "%s_%s_spec", this->name, fname);
-        spec_file = fopen(full_name, "ab");
-        fwrite((void*)&this->iteration, sizeof(int), 1, spec_file);
-        fwrite((void*)spec, sizeof(double), this->nshells, spec_file);
-        fclose(spec_file);
-    }
-    fftw_interface<double>::free(spec);
-}
-
-/*****************************************************************************/
-/* macro for specializations to numeric types compatible with FFTW           */
-
-template <class rnumber>
-fluid_solver_base<rnumber>::fluid_solver_base(
-        const char *NAME,
-        int nx,
-        int ny,
-        int nz,
-        double DKX,
-        double DKY,
-        double DKZ,
-        int DEALIAS_TYPE,
-        unsigned FFTW_PLAN_RIGOR)
-{
-    TIMEZONE("fluid_solver_base::fluid_solver_base");
-    strncpy(this->name, NAME, 256);
-    this->name[255] = '\0';
-    this->iteration = 0;
-    this->fftw_plan_rigor = FFTW_PLAN_RIGOR;
-
-    int ntmp[4];
-    ntmp[0] = nz;
-    ntmp[1] = ny;
-    ntmp[2] = nx;
-    ntmp[3] = 3;
-    this->rd = new field_descriptor<rnumber>(
-                4, ntmp, mpi_real_type<rnumber>::real(), MPI_COMM_WORLD);
-    this->normalization_factor = (this->rd->full_size/3);
-    ntmp[0] = ny;
-    ntmp[1] = nz;
-    ntmp[2] = nx/2 + 1;
-    ntmp[3] = 3;
-    this->cd = new field_descriptor<rnumber>(
-                4, ntmp, mpi_real_type<rnumber>::complex(), this->rd->comm);
-
-    this->dkx = DKX;
-    this->dky = DKY;
-    this->dkz = DKZ;
-    this->kx = new double[this->cd->sizes[2]];
-    this->ky = new double[this->cd->subsizes[0]];
-    this->kz = new double[this->cd->sizes[1]];
-    this->dealias_type = DEALIAS_TYPE;
-    switch(this->dealias_type)
-    {
-    /* HL07 smooth filter */
-    case 1:
-        this->kMx = this->dkx*(int(this->rd->sizes[2] / 2)-1);
-        this->kMy = this->dky*(int(this->rd->sizes[1] / 2)-1);
-        this->kMz = this->dkz*(int(this->rd->sizes[0] / 2)-1);
-        break;
-    default:
-        this->kMx = this->dkx*(int(this->rd->sizes[2] / 3)-1);
-        this->kMy = this->dky*(int(this->rd->sizes[1] / 3)-1);
-        this->kMz = this->dkz*(int(this->rd->sizes[0] / 3)-1);
-    }
-    int i, ii;
-    for (i = 0; i<this->cd->sizes[2]; i++)
-        this->kx[i] = i*this->dkx;
-    for (i = 0; i<this->cd->subsizes[0]; i++)
-    {
-        ii = i + this->cd->starts[0];
-        if (ii <= this->rd->sizes[1]/2)
-            this->ky[i] = this->dky*ii;
-        else
-            this->ky[i] = this->dky*(ii - this->rd->sizes[1]);
-    }
-    for (i = 0; i<this->cd->sizes[1]; i++)
-    {
-        if (i <= this->rd->sizes[0]/2)
-            this->kz[i] = this->dkz*i;
-        else
-            this->kz[i] = this->dkz*(i - this->rd->sizes[0]);
-    }
-    this->kM = this->kMx;
-    if (this->kM < this->kMy) this->kM = this->kMy;
-    if (this->kM < this->kMz) this->kM = this->kMz;
-    this->kM2 = this->kM * this->kM;
-    this->kMspec = this->kM;
-    this->kMspec2 = this->kM2;
-    this->dk = this->dkx;
-    if (this->dk > this->dky) this->dk = this->dky;
-    if (this->dk > this->dkz) this->dk = this->dkz;
-    this->dk2 = this->dk*this->dk;
-    DEBUG_MSG(
-                "kM = %g, kM2 = %g, dk = %g, dk2 = %g\n",
-                this->kM, this->kM2, this->dk, this->dk2);
-    /* spectra stuff */
-    this->nshells = int(this->kMspec / this->dk) + 2;
-    DEBUG_MSG(
-                "kMspec = %g, kMspec2 = %g, nshells = %ld\n",
-                this->kMspec, this->kMspec2, this->nshells);
-    this->kshell = new double[this->nshells];
-    std::fill_n(this->kshell, this->nshells, 0.0);
-    this->nshell = new int64_t[this->nshells];
-    std::fill_n(this->nshell, this->nshells, 0);
-    DEBUG_MSG("fluid_solver_base::fluid_solver_base before declaring shared_array\n");
-
-    shared_array<double> kshell_local_threaded(this->nshells,[&](double* kshell_local){
-        std::fill_n(kshell_local, this->nshells, 0.0);
-    });
-    DEBUG_MSG("fluid_solver_base::fluid_solver_base before declaring shared_array\n");
-    shared_array<int64_t> nshell_local_threaded(this->nshells,[&](int64_t* nshell_local){
-        std::fill_n(nshell_local, this->nshells, 0);
-    });
-
-    std::vector<std::unordered_map<int, double>> Fourier_filter_threaded(omp_get_max_threads());
-
-    DEBUG_MSG("fluid_solver_base::fluid_solver_base before cloop_k2_nxmodes\n");
-    CLOOP_K2_NXMODES(
-                this,
-
-                [&](ptrdiff_t /*cindex*/, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/,
-                ptrdiff_t /*zindex*/, double k2, int nxmodes){
-        if (k2 < this->kM2)
-        {
-            double knorm = sqrt(k2);
-            nshell_local_threaded.getMine()[int(knorm/this->dk)] += nxmodes;
-            kshell_local_threaded.getMine()[int(knorm/this->dk)] += nxmodes*knorm;
-        }
-        Fourier_filter_threaded[omp_get_thread_num()][int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.));}
-    );
-
-    // Merge results
-    nshell_local_threaded.mergeParallel();
-    kshell_local_threaded.mergeParallel();
-    for(int idxMerge = 0 ; idxMerge < int(Fourier_filter_threaded.size()) ; ++idxMerge){
-        for(const auto kv : Fourier_filter_threaded[idxMerge]){
-            this->Fourier_filter[kv.first] = kv.second;
-        }
-    }
-
-    MPI_Allreduce(
-                (void*)(nshell_local_threaded.getMasterData()),
-                (void*)(this->nshell),
-                this->nshells,
-                MPI_INT64_T, MPI_SUM, this->cd->comm);
-    MPI_Allreduce(
-                (void*)(kshell_local_threaded.getMasterData()),
-                (void*)(this->kshell),
-                this->nshells,
-                MPI_DOUBLE, MPI_SUM, this->cd->comm);
-    for (unsigned int n=0; n<this->nshells; n++)
-    {
-        if (this->nshell[n] != 0)
-            this->kshell[n] /= this->nshell[n];
-        else
-            this->kshell[n] = -1;
-    }
-    DEBUG_MSG("exiting fluid_solver_base::fluid_solver_base\n");
-}
-
-template <class rnumber>
-fluid_solver_base<rnumber>::~fluid_solver_base()
-{
-    delete[] this->kshell;
-    delete[] this->nshell;
-
-    delete[] this->kx;
-    delete[] this->ky;
-    delete[] this->kz;
-
-    delete this->cd;
-    delete this->rd;
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::low_pass_Fourier(cnumber *a, const int howmany, const double kmax)
-{
-    TIMEZONE("fluid_solver_base::low_pass_Fourier");
-    const double km2 = kmax*kmax;
-    const int howmany2 = 2*howmany;
-    /*DEBUG_MSG("entered low_pass_Fourier, kmax=%lg km2=%lg howmany2=%d\n", kmax, km2, howmany2);*/
-    CLOOP_K2(
-                this,
-                /*DEBUG_MSG("kx=%lg ky=%lg kz=%lg k2=%lg\n",
-                                  this->kx[xindex],
-                                  this->ky[yindex],
-                                  this->kz[zindex],
-                                  k2);*/
-
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex,
-                ptrdiff_t zindex, double k2){
-        if (k2 >= km2)
-            std::fill_n((rnumber*)(a + howmany*cindex), howmany2, 0.0);}
-    );
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::dealias(cnumber *a, const int howmany)
-{
-    TIMEZONE("fluid_solver_base::dealias");
-    if (this->dealias_type == 0)
-    {
-        this->low_pass_Fourier(a, howmany, this->kM);
-        return;
-    }
-
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/,
-                ptrdiff_t /*zindex*/, double k2){
-        double tval = this->Fourier_filter[int(round(k2/this->dk2))];
-        // It is thread safe on the index cindex
-        for (int tcounter = 0; tcounter < howmany; tcounter++)
-            for (int i=0; i<2; i++)
-                a[howmany*cindex+tcounter][i] *= tval;
-    }
-    );
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::force_divfree(cnumber *a)
-{
-    TIMEZONE("fluid_solver_base::force_divfree");
-    CLOOP_K2(
-                this,
-
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex,
-                ptrdiff_t zindex, double k2){
-        if (k2 > 0)
-        {
-            // It is thread safe on index cindex
-            cnumber tval;
-            tval[0] = (this->kx[xindex]*((*(a + cindex*3  ))[0]) +
-                    this->ky[yindex]*((*(a + cindex*3+1))[0]) +
-                    this->kz[zindex]*((*(a + cindex*3+2))[0]) ) / k2;
-            tval[1] = (this->kx[xindex]*((*(a + cindex*3  ))[1]) +
-                    this->ky[yindex]*((*(a + cindex*3+1))[1]) +
-                    this->kz[zindex]*((*(a + cindex*3+2))[1]) ) / k2;
-            for (int imag_part=0; imag_part<2; imag_part++)
-            {
-                a[cindex*3  ][imag_part] -= tval[imag_part]*this->kx[xindex];
-                a[cindex*3+1][imag_part] -= tval[imag_part]*this->ky[yindex];
-                a[cindex*3+2][imag_part] -= tval[imag_part]*this->kz[zindex];
-            }
-        }}
-    );
-    if (this->cd->myrank == this->cd->rank[0])
-        std::fill_n((rnumber*)(a), 6, 0.0);
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::compute_vector_gradient(cnumber *A, cnumber *cvec)
-{
-    TIMEZONE("fluid_solver_base::compute_vector_gradient");
-    std::fill_n((rnumber*)A, 3*2*this->cd->local_size, 0.0);
-    cnumber *dx_u, *dy_u, *dz_u;
-    dx_u = A;
-    dy_u = A + this->cd->local_size;
-    dz_u = A + 2*this->cd->local_size;
-    CLOOP_K2(
-                this,
-
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex,
-                ptrdiff_t zindex, double k2){
-        if (k2 <= this->kM2)
-        {
-            // It is thread safe on cindex
-            ptrdiff_t tindex = 3*cindex;
-            for (int cc=0; cc<3; cc++)
-            {
-                dx_u[tindex + cc][0] = -this->kx[xindex]*cvec[tindex+cc][1];
-                dx_u[tindex + cc][1] =  this->kx[xindex]*cvec[tindex+cc][0];
-                dy_u[tindex + cc][0] = -this->ky[yindex]*cvec[tindex+cc][1];
-                dy_u[tindex + cc][1] =  this->ky[yindex]*cvec[tindex+cc][0];
-                dz_u[tindex + cc][0] = -this->kz[zindex]*cvec[tindex+cc][1];
-                dz_u[tindex + cc][1] =  this->kz[zindex]*cvec[tindex+cc][0];
-            }
-        }}
-    );
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::symmetrize(cnumber *data, const int howmany)
-{
-    TIMEZONE("fluid_solver_base::symmetrize");
-    ptrdiff_t ii, cc;
-    MPI_Status *mpistatus = new MPI_Status;
-    if (this->cd->myrank == this->cd->rank[0])
-    {
-        for (cc = 0; cc < howmany; cc++)
-            data[cc][1] = 0.0;
-        for (ii = 1; ii < this->cd->sizes[1]/2; ii++)
-            for (cc = 0; cc < howmany; cc++) {
-                ( *(data + cc + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]))[0] =
-                        (*(data + cc + howmany*(                     ii)*this->cd->sizes[2]))[0];
-                ( *(data + cc + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]))[1] =
-                        -(*(data + cc + howmany*(                     ii)*this->cd->sizes[2]))[1];
-            }
-    }
-    cnumber *buffer;
-    buffer = fftw_interface<rnumber>::alloc_complex(howmany*this->cd->sizes[1]);
-    ptrdiff_t yy;
-    /*ptrdiff_t tindex;*/
-    int ranksrc, rankdst;
-    for (yy = 1; yy < this->cd->sizes[0]/2; yy++) {
-        ranksrc = this->cd->rank[yy];
-        rankdst = this->cd->rank[this->cd->sizes[0] - yy];
-        if (this->cd->myrank == ranksrc)
-            for (ii = 0; ii < this->cd->sizes[1]; ii++)
-                for (cc = 0; cc < howmany; cc++)
-                    for (int imag_comp=0; imag_comp<2; imag_comp++)
-                        (*(buffer + howmany*ii+cc))[imag_comp] =
-                            (*(data + howmany*((yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[imag_comp];
-        if (ranksrc != rankdst)
-        {
-            if (this->cd->myrank == ranksrc)
-                MPI_Send((void*)buffer,
-                         howmany*this->cd->sizes[1], mpi_real_type<rnumber>::complex(), rankdst, yy,
-                        this->cd->comm);
-            if (this->cd->myrank == rankdst)
-                MPI_Recv((void*)buffer,
-                         howmany*this->cd->sizes[1], mpi_real_type<rnumber>::complex(), ranksrc, yy,
-                        this->cd->comm, mpistatus);
-        }
-        if (this->cd->myrank == rankdst)
-        {
-            for (ii = 1; ii < this->cd->sizes[1]; ii++)
-                for (cc = 0; cc < howmany; cc++)
-                {
-                    (*(data + howmany*((this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[0] =
-                            (*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[0];
-                    (*(data + howmany*((this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[1] =
-                            -(*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[1];
-                }
-            for (cc = 0; cc < howmany; cc++)
-            {
-                (*((data + cc + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2])))[0] =  (*(buffer + cc))[0];
-                (*((data + cc + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2])))[1] = -(*(buffer + cc))[1];
-            }
-        }
-    }
-    fftw_interface<rnumber>::free(buffer);
-    delete mpistatus;
-    /* put asymmetric data to 0 */
-    /*if (this->cd->myrank == this->cd->rank[this->cd->sizes[0]/2])
-    {
-        tindex = howmany*(this->cd->sizes[0]/2 - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2];
-        for (ii = 0; ii < this->cd->sizes[1]; ii++)
-        {
-            std::fill_n((rnumber*)(data + tindex), howmany*2*this->cd->sizes[2], 0.0);
-            tindex += howmany*this->cd->sizes[2];
-        }
-    }
-    tindex = howmany*();
-    std::fill_n((rnumber*)(data + tindex), howmany*2, 0.0);*/
-}
-
-template <class rnumber>
-int fluid_solver_base<rnumber>::read_base(const char *fname, rnumber *data)
-{
-    char full_name[512];
-    sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration);
-    return this->rd->read(full_name, (void*)data);
-}
-
-template <class rnumber>
-int fluid_solver_base<rnumber>::read_base(const char *fname, cnumber *data)
-{
-    char full_name[512];
-    sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration);
-    return this->cd->read(full_name, (void*)data);
-}
-
-template <class rnumber>
-int fluid_solver_base<rnumber>::write_base(const char *fname, rnumber *data)
-{
-    char full_name[512];
-    sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration);
-    return this->rd->write(full_name, (void*)data);
-}
-
-template <class rnumber>
-int fluid_solver_base<rnumber>::write_base(const char *fname, cnumber *data)
-{
-    char full_name[512];
-    sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration);
-    return this->cd->write(full_name, (void*)data);
-}
-
-/* finally, force generation of code                                         */
-template class fluid_solver_base<float>;
-template class fluid_solver_base<double>;
-
-/*****************************************************************************/
-
-
-
-
diff --git a/bfps/cpp/fluid_solver_base.hpp b/bfps/cpp/fluid_solver_base.hpp
deleted file mode 100644
index e446956001a08fdbf0d3b11da8552e1cb6c61a45..0000000000000000000000000000000000000000
--- a/bfps/cpp/fluid_solver_base.hpp
+++ /dev/null
@@ -1,272 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <hdf5.h>
-#include <iostream>
-#include <unordered_map>
-#include <vector>
-#include "base.hpp"
-#include "field_descriptor.hpp"
-#include "scope_timer.hpp"
-#include "omputils.hpp"
-
-#ifndef FLUID_SOLVER_BASE
-
-#define FLUID_SOLVER_BASE
-
-extern int myrank, nprocs;
-
-
-/* container for field descriptor, fields themselves, parameters, etc
- * using the same big macro idea that they're using in fftw3.h
- * I feel like I should quote:  Ugh.
- * */
-
-template <class rnumber>
-class fluid_solver_base
-{
-    protected:
-        typedef rnumber cnumber[2];
-    public:
-        field_descriptor<rnumber> *cd, *rd;
-        ptrdiff_t normalization_factor;
-        unsigned fftw_plan_rigor;
-
-        /* simulation parameters */
-        char name[256];
-        int iteration;
-
-        /* physical parameters */
-        double dkx, dky, dkz, dk, dk2;
-
-        /* mode and dealiasing information */
-        int dealias_type;
-        double kMx, kMy, kMz, kM, kM2;
-        double kMspec, kMspec2;
-        double *kx, *ky, *kz;
-        std::unordered_map<int, double> Fourier_filter;
-        double *kshell;
-        int64_t *nshell;
-        unsigned int nshells;
-
-
-        /* methods */
-        fluid_solver_base(
-                const char *NAME,
-                int nx,
-                int ny,
-                int nz,
-                double DKX = 1.0,
-                double DKY = 1.0,
-                double DKZ = 1.0,
-                int DEALIAS_TYPE = 0,
-                unsigned FFTW_PLAN_RIGOR = DEFAULT_FFTW_FLAG);
-        ~fluid_solver_base();
-
-        void low_pass_Fourier(cnumber *__restrict__ a, int howmany, double kmax);
-        void dealias(cnumber *__restrict__ a, int howmany);
-        void force_divfree(cnumber *__restrict__ a);
-        void symmetrize(cnumber *__restrict__ a, int howmany);
-        void clean_up_real_space(rnumber *__restrict__ a, int howmany);
-        void cospectrum(cnumber *__restrict__ a, cnumber *__restrict__ b, double *__restrict__ spec);
-        void cospectrum(cnumber *__restrict__ a, cnumber *__restrict__ b, double *__restrict__ spec, const double k2exponent);
-        double autocorrel(cnumber *__restrict__ a);
-        void compute_rspace_stats(
-                const rnumber *__restrict__ a,
-                const hid_t group,
-                const std::string dset_name,
-                const hsize_t toffset,
-                const std::vector<double> max_estimate);
-        template <int nvals>
-        void compute_rspace_stats(rnumber *__restrict__ a,
-                                  double *__restrict__ moments,
-                                  ptrdiff_t *__restrict__ hist,
-                                  double max_estimate[nvals],
-                                  const int nbins = 256);
-        inline void compute_rspace_stats3(rnumber *__restrict__ a,
-                                  double *__restrict__ moments,
-                                  ptrdiff_t *__restrict__ hist,
-                                  double max_estimate[3],
-                                  const int nbins = 256)
-        {
-            this->compute_rspace_stats<3>(a, moments, hist, max_estimate, nbins);
-        }
-        inline void compute_rspace_stats4(rnumber *__restrict__ a,
-                                  double *__restrict__ moments,
-                                  ptrdiff_t *__restrict__ hist,
-                                  double max_estimate[4],
-                                  const int nbins = 256)
-        {
-            this->compute_rspace_stats<4>(a, moments, hist, max_estimate, nbins);
-        }
-        void compute_vector_gradient(rnumber (*__restrict__ A)[2], rnumber(*__restrict__ source)[2]);
-        void write_spectrum(const char *fname, cnumber *a, const double k2exponent = 0.0);
-        void fill_up_filename(const char *base_name, char *full_name);
-        int read_base(const char *fname, rnumber *data);
-        int read_base(const char *fname, cnumber *data);
-        int write_base(const char *fname, rnumber *data);
-        int write_base(const char *fname, cnumber *data);
-};
-
-
-
-/*****************************************************************************/
-/* macros for loops                                                          */
-
-/* Fourier space loop */
-template <class ObjectType, class FuncType>
-void CLOOP(ObjectType* obj, FuncType expression)
-{
-    TIMEZONE("CLOOP");
-    #pragma omp parallel
-    {
-        const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[0]);
-        const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[0]);
-        for (ptrdiff_t yindex = start; yindex < ptrdiff_t(end); yindex++){
-            ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2];
-            for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++)
-            for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++)
-                {
-                    expression(cindex, xindex, yindex, zindex);
-                    cindex++;
-                }
-        }
-    }
-}
-
-template <class ObjectType, class FuncType>
-void CLOOP_NXMODES(ObjectType* obj, FuncType expression)
-{
-    TIMEZONE("CLOOP_NXMODES");
-    #pragma omp parallel
-    {
-        const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[1]);
-        const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[1]);
-        for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++){
-            for (ptrdiff_t zindex = start; zindex < ptrdiff_t(end); zindex++)
-            {
-                ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2]
-                                   + zindex*obj->cd->subsizes[2];
-                int nxmodes = 1;
-                ptrdiff_t xindex = 0;
-                expression();
-                cindex++;
-                nxmodes = 2;
-                for (xindex = 1; xindex < obj->cd->subsizes[2]; xindex++)
-                {
-                    expression();
-                    cindex++;
-                }
-            }
-        }
-    }
-}
-
-
-template <class ObjectType, class FuncType>
-void CLOOP_K2(ObjectType* obj, FuncType expression)
-{
-    TIMEZONE("CLOOP_K2");
-    #pragma omp parallel
-    {
-        const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[1]);
-        const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[1]);
-        for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++){
-            for (ptrdiff_t zindex = start; zindex < ptrdiff_t(end); zindex++){
-                ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2]
-                                   + zindex*obj->cd->subsizes[2];
-                for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++)
-                {
-                    double k2 = (obj->kx[xindex]*obj->kx[xindex] +
-                          obj->ky[yindex]*obj->ky[yindex] +
-                          obj->kz[zindex]*obj->kz[zindex]);
-                    expression(cindex, xindex, yindex, zindex, k2);
-                    cindex++;
-                }
-            }
-        }
-    }
-}
-
-
-template <class ObjectType, class FuncType>
-void CLOOP_K2_NXMODES(ObjectType* obj, FuncType expression)
-{
-    #pragma omp parallel
-    {
-        const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[1]);
-        const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[1]);
-        for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++){
-            for (ptrdiff_t zindex = start; zindex < ptrdiff_t(end); zindex++)
-            {
-                ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2]
-                                   + zindex*obj->cd->subsizes[2];
-                int nxmodes = 1;
-                ptrdiff_t xindex = 0;
-                double k2 = (obj->kx[xindex]*obj->kx[xindex] +
-                      obj->ky[yindex]*obj->ky[yindex] +
-                      obj->kz[zindex]*obj->kz[zindex]);
-                expression(cindex, xindex, yindex, zindex, k2, nxmodes);
-                cindex++;
-                nxmodes = 2;
-                for (xindex = 1; xindex < obj->cd->subsizes[2]; xindex++)
-                {
-                    double k2 = (obj->kx[xindex]*obj->kx[xindex] +
-                          obj->ky[yindex]*obj->ky[yindex] +
-                          obj->kz[zindex]*obj->kz[zindex]);
-                    expression(cindex, xindex, yindex, zindex, k2, nxmodes);
-                    cindex++;
-                }
-            }
-        }
-    }
-}
-
-
-template <class ObjectType, class FuncType>
-void RLOOP(ObjectType* obj, FuncType expression)
-{
-    #pragma omp parallel
-    {
-        const hsize_t start = OmpUtils::ForIntervalStart(obj->rd->subsizes[1]);
-        const hsize_t end = OmpUtils::ForIntervalEnd(obj->rd->subsizes[1]);
-        for (int zindex = 0; zindex < obj->rd->subsizes[0] ; zindex++)
-        for (int yindex = start; yindex < ptrdiff_t(end); yindex++)
-        {
-            ptrdiff_t rindex = (zindex * obj->rd->subsizes[1] + yindex)*(obj->rd->subsizes[2]+2);
-            for (int xindex = 0; xindex < obj->rd->subsizes[2]; xindex++)
-            {
-                expression(rindex, xindex, yindex, zindex);
-                rindex++;
-            }
-        }
-    }
-}
-
-/*****************************************************************************/
-
-#endif//FLUID_SOLVER_BASE
-
diff --git a/bfps/cpp/full_code/NSVE.cpp b/bfps/cpp/full_code/NSVE.cpp
index d9cb72a220aaf6cb124cb37f827373f9a44b03ac..ecec7db31235bc827b17b55f0c733f305e488761 100644
--- a/bfps/cpp/full_code/NSVE.cpp
+++ b/bfps/cpp/full_code/NSVE.cpp
@@ -1,7 +1,10 @@
+#define NDEBUG
+
 #include <string>
 #include <cmath>
 #include "NSVE.hpp"
 #include "scope_timer.hpp"
+#include "fftw_tools.hpp"
 
 
 template <typename rnumber>
@@ -37,11 +40,11 @@ int NSVE<rnumber>::initialize(void)
             simname.c_str(),
             nx, ny, nz,
             dkx, dky, dkz,
-            DEFAULT_FFTW_FLAG);
+            fftw_planner_string_to_flag[this->fftw_plan_rigor]);
     this->tmp_vec_field = new field<rnumber, FFTW, THREE>(
             nx, ny, nz,
             this->comm,
-            DEFAULT_FFTW_FLAG);
+            fftw_planner_string_to_flag[this->fftw_plan_rigor]);
 
 
     this->fs->checkpoints_per_file = checkpoints_per_file;
@@ -161,6 +164,7 @@ int NSVE<rnumber>::read_parameters(void)
     this->max_vorticity_estimate = hdf5_tools::read_value<double>(parameter_file, "parameters/max_vorticity_estimate");
     std::string tmp = hdf5_tools::read_string(parameter_file, "parameters/forcing_type");
     snprintf(this->forcing_type, 511, "%s", tmp.c_str());
+    this->fftw_plan_rigor = hdf5_tools::read_string(parameter_file, "parameters/fftw_plan_rigor");
     H5Fclose(parameter_file);
     return EXIT_SUCCESS;
 }
diff --git a/bfps/cpp/full_code/NSVE.hpp b/bfps/cpp/full_code/NSVE.hpp
index 062627fd1cc9513bbd29a14199e05d3a084c0851..83c63d35790d3616cf143da1ac43bec133e91675 100644
--- a/bfps/cpp/full_code/NSVE.hpp
+++ b/bfps/cpp/full_code/NSVE.hpp
@@ -53,6 +53,7 @@ class NSVE: public direct_numerical_simulation
         double max_velocity_estimate;
         double max_vorticity_estimate;
         double nu;
+        std::string fftw_plan_rigor;
 
         /* other stuff */
         vorticity_equation<rnumber, FFTW> *fs;
diff --git a/bfps/cpp/full_code/NSVE_field_stats.cpp b/bfps/cpp/full_code/NSVE_field_stats.cpp
index 15980a20141a563be08ad0b28a3190b3e9e1c17c..b1c8d567592712f5d9feadd2caac73ca279238dd 100644
--- a/bfps/cpp/full_code/NSVE_field_stats.cpp
+++ b/bfps/cpp/full_code/NSVE_field_stats.cpp
@@ -1,6 +1,7 @@
 #include <string>
 #include <cmath>
 #include "NSVE_field_stats.hpp"
+#include "fftw_tools.hpp"
 #include "scope_timer.hpp"
 
 
@@ -12,7 +13,7 @@ int NSVE_field_stats<rnumber>::initialize(void)
     this->vorticity = new field<rnumber, FFTW, THREE>(
             nx, ny, nz,
             this->comm,
-            DEFAULT_FFTW_FLAG);
+            fftw_planner_string_to_flag[this->fftw_plan_rigor]);
     this->vorticity->real_space_representation = false;
     hid_t parameter_file = H5Fopen(
             (this->simname + std::string(".h5")).c_str(),
@@ -43,6 +44,7 @@ int NSVE_field_stats<rnumber>::initialize(void)
                 this->vorticity->clayout->starts,
                 this->vorticity->clayout->comm);
     }
+    this->fftw_plan_rigor = hdf5_tools::read_string(parameter_file, "parameters/fftw_plan_rigor");
     H5Fclose(parameter_file);
     return EXIT_SUCCESS;
 }
diff --git a/bfps/cpp/full_code/NSVE_field_stats.hpp b/bfps/cpp/full_code/NSVE_field_stats.hpp
index d544c0c7d5f4c75559e63ea3e59bf9457d4730c5..28a2376f17ac2ac837cbacac828cd91572bb3a17 100644
--- a/bfps/cpp/full_code/NSVE_field_stats.hpp
+++ b/bfps/cpp/full_code/NSVE_field_stats.hpp
@@ -42,6 +42,8 @@ class NSVE_field_stats: public postprocess
     private:
         field_binary_IO<rnumber, COMPLEX, THREE> *bin_IO;
     public:
+        std::string fftw_plan_rigor;
+
         field<rnumber, FFTW, THREE> *vorticity;
 
         NSVE_field_stats(
diff --git a/bfps/cpp/full_code/NSVEcomplex_particles.cpp b/bfps/cpp/full_code/NSVEcomplex_particles.cpp
index 9b910e9bb7a5aaf6b36d884858a095ff9971dffa..02a199317ae6bdf294adae1b8805b89df7f276b9 100644
--- a/bfps/cpp/full_code/NSVEcomplex_particles.cpp
+++ b/bfps/cpp/full_code/NSVEcomplex_particles.cpp
@@ -24,6 +24,8 @@
 
 
 
+#define NDEBUG
+
 #include <string>
 #include <cmath>
 #include "NSVEcomplex_particles.hpp"
diff --git a/bfps/cpp/full_code/NSVEparticles.cpp b/bfps/cpp/full_code/NSVEparticles.cpp
index b09e32805bbfb61469926be9f9d1b259066f9080..bcb2f435e092ce1288ac28e8e4452bc1a034c8e0 100644
--- a/bfps/cpp/full_code/NSVEparticles.cpp
+++ b/bfps/cpp/full_code/NSVEparticles.cpp
@@ -1,6 +1,9 @@
 
 
 
+
+#define NDEBUG
+
 #include <string>
 #include <cmath>
 #include "NSVEparticles.hpp"
diff --git a/bfps/cpp/full_code/direct_numerical_simulation.cpp b/bfps/cpp/full_code/direct_numerical_simulation.cpp
index c0b0441e5b274cbe088b6fd0903823c6d17b2076..cacda323153f0ed0f628b9fccc38e38fdcdc253c 100644
--- a/bfps/cpp/full_code/direct_numerical_simulation.cpp
+++ b/bfps/cpp/full_code/direct_numerical_simulation.cpp
@@ -1,3 +1,5 @@
+#define NDEBUG
+
 #include <cstdlib>
 #include <sys/types.h>
 #include <sys/stat.h>
diff --git a/bfps/cpp/full_code/field_output_test.cpp b/bfps/cpp/full_code/field_output_test.cpp
index 30df4e7512bec3c08325fe156b21789f80882f54..724060992ad5bba14adbe871c98067b4e57728ab 100644
--- a/bfps/cpp/full_code/field_output_test.cpp
+++ b/bfps/cpp/full_code/field_output_test.cpp
@@ -36,7 +36,7 @@ int field_output_test<rnumber>::do_work(void)
     field<rnumber, FFTW, ONE> *scal_field = new field<rnumber, FFTW, ONE>(
             this->nx, this->ny, this->nz,
             this->comm,
-            DEFAULT_FFTW_FLAG);
+            FFTW_ESTIMATE);
     std::default_random_engine rgen;
     std::normal_distribution<rnumber> rdist;
     rgen.seed(1);
diff --git a/bfps/cpp/full_code/field_test.cpp b/bfps/cpp/full_code/field_test.cpp
index 1627bc4088581468ebedab585db7ca9d6519d3a3..a9d531bcaf939b8b46ae539c57c00ae9b121c0a4 100644
--- a/bfps/cpp/full_code/field_test.cpp
+++ b/bfps/cpp/full_code/field_test.cpp
@@ -44,11 +44,11 @@ int field_test<rnumber>::do_work(void)
     field<rnumber, FFTW, ONE> *scal_field = new field<rnumber, FFTW, ONE>(
             this->nx, this->ny, this->nz,
             this->comm,
-            DEFAULT_FFTW_FLAG);
+            FFTW_ESTIMATE);
     field<rnumber, FFTW, ONE> *scal_field_alt = new field<rnumber, FFTW, ONE>(
             this->nx, this->ny, this->nz,
             this->comm,
-            DEFAULT_FFTW_FLAG);
+            FFTW_ESTIMATE);
     std::default_random_engine rgen;
     std::normal_distribution<rnumber> rdist;
     rgen.seed(2);
diff --git a/bfps/cpp/full_code/filter_test.cpp b/bfps/cpp/full_code/filter_test.cpp
index 4db13843fa8f69db77f8a15cbd0563feb087dfcf..6dbd05a940ff88623cd10802376497148bda5549 100644
--- a/bfps/cpp/full_code/filter_test.cpp
+++ b/bfps/cpp/full_code/filter_test.cpp
@@ -12,7 +12,7 @@ int filter_test<rnumber>::initialize(void)
     this->scal_field = new field<rnumber, FFTW, ONE>(
             nx, ny, nz,
             this->comm,
-            DEFAULT_FFTW_FLAG);
+            FFTW_ESTIMATE);
     this->kk = new kspace<FFTW, SMOOTH>(
             this->scal_field->clayout, this->dkx, this->dky, this->dkz);
 
diff --git a/bfps/cpp/full_code/joint_acc_vel_stats.cpp b/bfps/cpp/full_code/joint_acc_vel_stats.cpp
index 1c28527e5986e12a5d66151a5623194e4ffab3aa..fff2e2f5f4e83c3e89b742a18f2e3feaeb1466d1 100644
--- a/bfps/cpp/full_code/joint_acc_vel_stats.cpp
+++ b/bfps/cpp/full_code/joint_acc_vel_stats.cpp
@@ -110,7 +110,7 @@ int joint_acc_vel_stats<rnumber>::work_on_current_iteration(void)
     vel = new field<rnumber, FFTW, THREE>(
             this->nx, this->ny, this->nz,
             this->comm,
-            DEFAULT_FFTW_FLAG);
+            this->vorticity->fftw_plan_rigor);
     invert_curl(kk, this->ve->cvorticity, vel);
     vel->ift();
 
diff --git a/bfps/cpp/full_code/native_binary_to_hdf5.cpp b/bfps/cpp/full_code/native_binary_to_hdf5.cpp
index fb5a39c2af8a88a158df679ad27ce0f08fab37f8..fe8e1c41a937e49db264aaca41c82df2503e4c99 100644
--- a/bfps/cpp/full_code/native_binary_to_hdf5.cpp
+++ b/bfps/cpp/full_code/native_binary_to_hdf5.cpp
@@ -12,7 +12,7 @@ int native_binary_to_hdf5<rnumber>::initialize(void)
     this->vec_field = new field<rnumber, FFTW, THREE>(
             nx, ny, nz,
             this->comm,
-            DEFAULT_FFTW_FLAG);
+            FFTW_ESTIMATE);
     this->vec_field->real_space_representation = false;
     this->bin_IO = new field_binary_IO<rnumber, COMPLEX, THREE>(
             this->vec_field->clayout->sizes,
diff --git a/bfps/cpp/full_code/symmetrize_test.cpp b/bfps/cpp/full_code/symmetrize_test.cpp
index 821161da846a323721c07ed47a7c66d9efea78f0..7cf96a71efe881876de1bcef2ab4d9f0482aaddf 100644
--- a/bfps/cpp/full_code/symmetrize_test.cpp
+++ b/bfps/cpp/full_code/symmetrize_test.cpp
@@ -2,6 +2,7 @@
 #include <cmath>
 #include <random>
 #include "symmetrize_test.hpp"
+#include "fftw_tools.hpp"
 #include "scope_timer.hpp"
 
 
@@ -31,6 +32,7 @@ int symmetrize_test<rnumber>::read_parameters()
             H5P_DEFAULT);
     this->random_seed = hdf5_tools::read_value<int>(
             parameter_file, "/parameters/random_seed");
+    this->fftw_plan_rigor = hdf5_tools::read_string(parameter_file, "parameters/fftw_plan_rigor");
     H5Fclose(parameter_file);
     return EXIT_SUCCESS;
 }
@@ -44,13 +46,13 @@ int symmetrize_test<rnumber>::do_work(void)
     field<rnumber, FFTW, THREE> *test_field0 = new field<rnumber, FFTW, THREE>(
             this->nx, this->ny, this->nz,
             this->comm,
-            DEFAULT_FFTW_FLAG);
+            fftw_planner_string_to_flag[this->fftw_plan_rigor]);
     DEBUG_MSG("finished allocating field0\n");
     DEBUG_MSG("about to allocate field1\n");
     field<rnumber, FFTW, THREE> *test_field1 = new field<rnumber, FFTW, THREE>(
             this->nx, this->ny, this->nz,
             this->comm,
-            DEFAULT_FFTW_FLAG);
+            fftw_planner_string_to_flag[this->fftw_plan_rigor]);
     DEBUG_MSG("finished allocating field1\n");
     std::default_random_engine rgen;
     std::normal_distribution<rnumber> rdist;
diff --git a/bfps/cpp/full_code/symmetrize_test.hpp b/bfps/cpp/full_code/symmetrize_test.hpp
index d3fbbaeb0728959234ad53859d3940c8ef00ebd9..628aee6f5ba3fac23cfbe551418a6ff1213d7d5c 100644
--- a/bfps/cpp/full_code/symmetrize_test.hpp
+++ b/bfps/cpp/full_code/symmetrize_test.hpp
@@ -42,6 +42,7 @@ template <typename rnumber>
 class symmetrize_test: public test
 {
     public:
+        std::string fftw_plan_rigor;
         int random_seed;
 
         symmetrize_test(
diff --git a/bfps/cpp/full_code/test_interpolation.cpp b/bfps/cpp/full_code/test_interpolation.cpp
index 5ef11de44b6f6a36ab6827facae3c637b702bc58..2acd3c27426a4cdd2af244dfaa6b1779b2871f61 100644
--- a/bfps/cpp/full_code/test_interpolation.cpp
+++ b/bfps/cpp/full_code/test_interpolation.cpp
@@ -30,18 +30,18 @@ int test_interpolation<rnumber>::initialize(void)
     this->vorticity = new field<rnumber, FFTW, THREE>(
             this->nx, this->ny, this->nz,
             this->comm,
-            DEFAULT_FFTW_FLAG);
+            FFTW_ESTIMATE);
     this->vorticity->real_space_representation = false;
 
     this->velocity = new field<rnumber, FFTW, THREE>(
             this->nx, this->ny, this->nz,
             this->comm,
-            DEFAULT_FFTW_FLAG);
+            FFTW_ESTIMATE);
 
     this->nabla_u = new field<rnumber, FFTW, THREExTHREE>(
             this->nx, this->ny, this->nz,
             this->comm,
-            DEFAULT_FFTW_FLAG);
+            FFTW_ESTIMATE);
 
     this->kk = new kspace<FFTW, SMOOTH>(
             this->vorticity->clayout, this->dkx, this->dky, this->dkz);
diff --git a/bfps/cpp/hdf5_tools.cpp b/bfps/cpp/hdf5_tools.cpp
index c2ef6aaebf2538de5575627baf6403d39e749d2a..25acaf21b662501948616236ee1d441df2527ad3 100644
--- a/bfps/cpp/hdf5_tools.cpp
+++ b/bfps/cpp/hdf5_tools.cpp
@@ -208,17 +208,26 @@ std::string hdf5_tools::read_string(
         const hid_t group,
         const std::string dset_name)
 {
-    hid_t dset = H5Dopen(group, dset_name.c_str(), H5P_DEFAULT);
-    hid_t space = H5Dget_space(dset);
-    hid_t memtype = H5Dget_type(dset);
-    char *string_data = (char*)malloc(256);
-    H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data);
-    std::string std_string_data = std::string(string_data);
-    free(string_data);
-    H5Sclose(space);
-    H5Tclose(memtype);
-    H5Dclose(dset);
-    return std_string_data;
+    if (H5Lexists(group, dset_name.c_str(), H5P_DEFAULT))
+    {
+        hid_t dset = H5Dopen(group, dset_name.c_str(), H5P_DEFAULT);
+        hid_t space = H5Dget_space(dset);
+        hid_t memtype = H5Dget_type(dset);
+        char *string_data = (char*)malloc(256);
+        H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data);
+        std::string std_string_data = std::string(string_data);
+        free(string_data);
+        H5Sclose(space);
+        H5Tclose(memtype);
+        H5Dclose(dset);
+        return std_string_data;
+    }
+    else
+    {
+        DEBUG_MSG("attempted to read dataset %s which does not exist.\n",
+                dset_name.c_str());
+        return std::string("parameter does not exist");
+    }
 }
 
 template
diff --git a/bfps/cpp/interpolator.cpp b/bfps/cpp/interpolator.cpp
deleted file mode 100644
index a0b38c4059585cc7fd58ab830b792be4f8bc193d..0000000000000000000000000000000000000000
--- a/bfps/cpp/interpolator.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#define NDEBUG
-
-#include "interpolator.hpp"
-
-template <class rnumber, int interp_neighbours>
-interpolator<rnumber, interp_neighbours>::interpolator(
-        fluid_solver_base<rnumber> *fs,
-        base_polynomial_values BETA_POLYS,
-        ...) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS)
-{
-    int tdims[4];
-    this->compute_beta = BETA_POLYS;
-    tdims[0] = (interp_neighbours+1)*2*this->descriptor->nprocs + this->descriptor->sizes[0];
-    tdims[1] = this->descriptor->sizes[1];
-    tdims[2] = this->descriptor->sizes[2]+2;
-    tdims[3] = this->descriptor->sizes[3];
-    this->buffered_descriptor = new field_descriptor<rnumber>(
-            4, tdims,
-            this->descriptor->mpi_dtype,
-            this->descriptor->comm);
-    this->buffer_size = (interp_neighbours+1)*this->buffered_descriptor->slice_size;
-    this->field = new rnumber[this->buffered_descriptor->local_size];
-}
-
-template <class rnumber, int interp_neighbours>
-interpolator<rnumber, interp_neighbours>::~interpolator()
-{
-    delete[] this->field;
-    delete this->buffered_descriptor;
-}
-
-template <class rnumber, int interp_neighbours>
-int interpolator<rnumber, interp_neighbours>::read_rFFTW(const void *void_src)
-{
-    rnumber *src = (rnumber*)void_src;
-    rnumber *dst = this->field;
-    /* do big copy of middle stuff */
-    std::copy(src,
-              src + this->buffered_descriptor->slice_size*this->descriptor->subsizes[0],
-              dst + this->buffer_size);
-    MPI_Datatype MPI_RNUM = (sizeof(rnumber) == 4) ? MPI_FLOAT : MPI_DOUBLE;
-    int rsrc;
-    /* get upper slices */
-    for (int rdst = 0; rdst < this->descriptor->nprocs; rdst++)
-    {
-        rsrc = this->descriptor->rank[(this->descriptor->all_start0[rdst] +
-                                       this->descriptor->all_size0[rdst]) %
-                                       this->descriptor->sizes[0]];
-        if (this->descriptor->myrank == rsrc)
-            MPI_Send(
-                    src,
-                    this->buffer_size,
-                    MPI_RNUM,
-                    rdst,
-                    2*(rsrc*this->descriptor->nprocs + rdst),
-                    this->buffered_descriptor->comm);
-        if (this->descriptor->myrank == rdst)
-            MPI_Recv(
-                    dst + this->buffer_size + this->buffered_descriptor->slice_size*this->descriptor->subsizes[0],
-                    this->buffer_size,
-                    MPI_RNUM,
-                    rsrc,
-                    2*(rsrc*this->descriptor->nprocs + rdst),
-                    this->buffered_descriptor->comm,
-                    MPI_STATUS_IGNORE);
-    }
-    /* get lower slices */
-    for (int rdst = 0; rdst < this->descriptor->nprocs; rdst++)
-    {
-        rsrc = this->descriptor->rank[MOD(this->descriptor->all_start0[rdst] - 1,
-                                          this->descriptor->sizes[0])];
-        if (this->descriptor->myrank == rsrc)
-            MPI_Send(
-                    src + this->buffered_descriptor->slice_size*this->descriptor->subsizes[0] - this->buffer_size,
-                    this->buffer_size,
-                    MPI_RNUM,
-                    rdst,
-                    2*(rsrc*this->descriptor->nprocs + rdst)+1,
-                    this->descriptor->comm);
-        if (this->descriptor->myrank == rdst)
-            MPI_Recv(
-                    dst,
-                    this->buffer_size,
-                    MPI_RNUM,
-                    rsrc,
-                    2*(rsrc*this->descriptor->nprocs + rdst)+1,
-                    this->descriptor->comm,
-                    MPI_STATUS_IGNORE);
-    }
-    return EXIT_SUCCESS;
-}
-
-template <class rnumber, int interp_neighbours>
-void interpolator<rnumber, interp_neighbours>::sample(
-        const int nparticles,
-        const int pdimension,
-        const double *__restrict__ x,
-        double *__restrict__ y,
-        const int *deriv)
-{
-    /* get grid coordinates */
-    int *xg = new int[3*nparticles];
-    double *xx = new double[3*nparticles];
-    double *yy = new double[3*nparticles];
-    std::fill_n(yy, 3*nparticles, 0.0);
-    this->get_grid_coordinates(nparticles, pdimension, x, xg, xx);
-    /* perform interpolation */
-    for (int p=0; p<nparticles; p++)
-        if (this->descriptor->rank[MOD(xg[p*3+2], this->descriptor->sizes[0])] == this->descriptor->myrank)
-            this->operator()(xg + p*3, xx + p*3, yy + p*3, deriv);
-    MPI_Allreduce(
-            yy,
-            y,
-            3*nparticles,
-            MPI_DOUBLE,
-            MPI_SUM,
-            this->descriptor->comm);
-    delete[] yy;
-    delete[] xg;
-    delete[] xx;
-}
-
-template <class rnumber, int interp_neighbours>
-void interpolator<rnumber, interp_neighbours>::operator()(
-        const int *xg,
-        const double *xx,
-        double *__restrict__ dest,
-        const int *deriv)
-{
-    double bx[interp_neighbours*2+2], by[interp_neighbours*2+2], bz[interp_neighbours*2+2];
-    if (deriv == NULL)
-    {
-        this->compute_beta(0, xx[0], bx);
-        this->compute_beta(0, xx[1], by);
-        this->compute_beta(0, xx[2], bz);
-    }
-    else
-    {
-        this->compute_beta(deriv[0], xx[0], bx);
-        this->compute_beta(deriv[1], xx[1], by);
-        this->compute_beta(deriv[2], xx[2], bz);
-    }
-    std::fill_n(dest, 3, 0);
-    ptrdiff_t bigiz, bigiy, bigix;
-    for (int iz = -interp_neighbours; iz <= interp_neighbours+1; iz++)
-    {
-        bigiz = ptrdiff_t(xg[2]+iz)-this->descriptor->starts[0];
-        for (int iy = -interp_neighbours; iy <= interp_neighbours+1; iy++)
-        {
-            bigiy = ptrdiff_t(MOD(xg[1]+iy, this->descriptor->sizes[1]));
-            for (int ix = -interp_neighbours; ix <= interp_neighbours+1; ix++)
-            {
-                bigix = ptrdiff_t(MOD(xg[0]+ix, this->descriptor->sizes[2]));
-                ptrdiff_t tindex = ((bigiz *this->buffered_descriptor->sizes[1] +
-                                     bigiy)*this->buffered_descriptor->sizes[2] +
-                                     bigix)*3 + this->buffer_size;
-                for (int c=0; c<3; c++)
-                {
-                    dest[c] += this->field[tindex+c]*(bz[iz+interp_neighbours]*
-                                                      by[iy+interp_neighbours]*
-                                                      bx[ix+interp_neighbours]);
-                }
-            }
-        }
-    }
-}
-
-template class interpolator<float, 1>;
-template class interpolator<float, 2>;
-template class interpolator<float, 3>;
-template class interpolator<float, 4>;
-template class interpolator<float, 5>;
-template class interpolator<float, 6>;
-template class interpolator<float, 7>;
-template class interpolator<float, 8>;
-template class interpolator<float, 9>;
-template class interpolator<float, 10>;
-template class interpolator<double, 1>;
-template class interpolator<double, 2>;
-template class interpolator<double, 3>;
-template class interpolator<double, 4>;
-template class interpolator<double, 5>;
-template class interpolator<double, 6>;
-template class interpolator<double, 7>;
-template class interpolator<double, 8>;
-template class interpolator<double, 9>;
-template class interpolator<double, 10>;
-
diff --git a/bfps/cpp/interpolator.hpp b/bfps/cpp/interpolator.hpp
deleted file mode 100644
index 7e56ebe159fd24ed7cf623f0a869e1d262d4aadb..0000000000000000000000000000000000000000
--- a/bfps/cpp/interpolator.hpp
+++ /dev/null
@@ -1,79 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#include <cmath>
-#include "field_descriptor.hpp"
-#include "fftw_tools.hpp"
-#include "fluid_solver_base.hpp"
-#include "interpolator_base.hpp"
-
-#ifndef INTERPOLATOR
-
-#define INTERPOLATOR
-
-template <class rnumber, int interp_neighbours>
-class interpolator:public interpolator_base<rnumber, interp_neighbours>
-{
-    private:
-        /* pointer to buffered field */
-        rnumber *field;
-
-    public:
-        using interpolator_base<rnumber, interp_neighbours>::operator();
-        ptrdiff_t buffer_size;
-
-        /* descriptor for buffered field */
-        field_descriptor<rnumber> *buffered_descriptor;
-
-        interpolator(
-                fluid_solver_base<rnumber> *FSOLVER,
-                base_polynomial_values BETA_POLYS,
-                ...);
-        ~interpolator();
-
-        int read_rFFTW(const void *src);
-
-        inline int get_rank(double z)
-        {
-            return this->descriptor->rank[MOD(int(floor(z/this->dz)), this->descriptor->sizes[0])];
-        }
-
-        /* interpolate field at an array of locations */
-        void sample(
-                const int nparticles,
-                const int pdimension,
-                const double *__restrict__ x,
-                double *__restrict__ y,
-                const int *deriv = NULL);
-        void operator()(
-                const int *__restrict__ xg,
-                const double *__restrict__ xx,
-                double *__restrict__ dest,
-                const int *deriv = NULL);
-};
-
-#endif//INTERPOLATOR
-
diff --git a/bfps/cpp/interpolator_base.cpp b/bfps/cpp/interpolator_base.cpp
deleted file mode 100644
index 668a965c65744ac5aae31afb6bee05711a433657..0000000000000000000000000000000000000000
--- a/bfps/cpp/interpolator_base.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#define NDEBUG
-
-#include <cmath>
-#include "interpolator_base.hpp"
-
-template <class rnumber, int interp_neighbours>
-interpolator_base<rnumber, interp_neighbours>::interpolator_base(
-        fluid_solver_base<rnumber> *fs,
-        base_polynomial_values BETA_POLYS)
-{
-    this->descriptor = fs->rd;
-    this->compute_beta = BETA_POLYS;
-
-    // compute dx, dy, dz;
-    this->dx = 4*acos(0) / (fs->dkx*this->descriptor->sizes[2]);
-    this->dy = 4*acos(0) / (fs->dky*this->descriptor->sizes[1]);
-    this->dz = 4*acos(0) / (fs->dkz*this->descriptor->sizes[0]);
-}
-
-template <class rnumber, int interp_neighbours>
-interpolator_base<rnumber, interp_neighbours>::interpolator_base(
-        vorticity_equation<rnumber, FFTW> *fs,
-        base_polynomial_values BETA_POLYS)
-{
-//    this->descriptor = fs->rd;
-//    this->compute_beta = BETA_POLYS;
-//
-//    // compute dx, dy, dz;
-//    this->dx = 4*acos(0) / (fs->kk->dkx*this->descriptor->sizes[2]);
-//    this->dy = 4*acos(0) / (fs->kk->dky*this->descriptor->sizes[1]);
-//    this->dz = 4*acos(0) / (fs->kk->dkz*this->descriptor->sizes[0]);
-}
-
-template <class rnumber, int interp_neighbours>
-void interpolator_base<rnumber, interp_neighbours>::get_grid_coordinates(
-        const int nparticles,
-        const int pdimension,
-        const double *x,
-        int *xg,
-        double *xx)
-{
-    for (int p=0; p<nparticles; p++)
-        this->get_grid_coordinates(
-                x + p*pdimension,
-                xg + p*3,
-                xx + p*3);
-}
-
-template <class rnumber, int interp_neighbours>
-void interpolator_base<rnumber, interp_neighbours>::get_grid_coordinates(
-        const double *x,
-        int *xg,
-        double *xx)
-{
-    static double grid_size[] = {this->dx, this->dy, this->dz};
-    double tval;
-    for (int c=0; c<3; c++)
-    {
-        tval = floor(x[c]/grid_size[c]);
-        xg[c] = MOD(int(tval), this->descriptor->sizes[2-c]);
-        xx[c] = (x[c] - tval*grid_size[c]) / grid_size[c];
-    }
-}
-
-
-
-template class interpolator_base<float, 1>;
-template class interpolator_base<float, 2>;
-template class interpolator_base<float, 3>;
-template class interpolator_base<float, 4>;
-template class interpolator_base<float, 5>;
-template class interpolator_base<float, 6>;
-template class interpolator_base<float, 7>;
-template class interpolator_base<float, 8>;
-template class interpolator_base<float, 9>;
-template class interpolator_base<float, 10>;
-template class interpolator_base<double, 1>;
-template class interpolator_base<double, 2>;
-template class interpolator_base<double, 3>;
-template class interpolator_base<double, 4>;
-template class interpolator_base<double, 5>;
-template class interpolator_base<double, 6>;
-template class interpolator_base<double, 7>;
-template class interpolator_base<double, 8>;
-template class interpolator_base<double, 9>;
-template class interpolator_base<double, 10>;
-
diff --git a/bfps/cpp/interpolator_base.hpp b/bfps/cpp/interpolator_base.hpp
deleted file mode 100644
index f4c28db7b9de632e8ec4977dd67f929f06080e19..0000000000000000000000000000000000000000
--- a/bfps/cpp/interpolator_base.hpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#include "fluid_solver_base.hpp"
-#include "vorticity_equation.hpp"
-#include "spline_n1.hpp"
-#include "spline_n2.hpp"
-#include "spline_n3.hpp"
-#include "spline_n4.hpp"
-#include "spline_n5.hpp"
-#include "spline_n6.hpp"
-#include "spline_n7.hpp"
-#include "spline_n8.hpp"
-#include "spline_n9.hpp"
-#include "spline_n10.hpp"
-#include "Lagrange_polys.hpp"
-
-#ifndef INTERPOLATOR_BASE
-
-#define INTERPOLATOR_BASE
-
-typedef void (*base_polynomial_values)(
-        const int derivative,
-        const double fraction,
-        double *__restrict__ destination);
-
-template <class rnumber, int interp_neighbours>
-class interpolator_base
-{
-    public:
-        /* pointer to polynomial function */
-        base_polynomial_values compute_beta;
-
-        /* descriptor of field to interpolate */
-        field_descriptor<rnumber> *descriptor;
-
-        /* physical parameters of field */
-        double dx, dy, dz;
-
-        interpolator_base(
-                fluid_solver_base<rnumber> *FSOLVER,
-                base_polynomial_values BETA_POLYS);
-
-        interpolator_base(
-                vorticity_equation<rnumber, FFTW> *FSOLVER,
-                base_polynomial_values BETA_POLYS);
-        virtual ~interpolator_base(){}
-
-        /* may not destroy input */
-        virtual int read_rFFTW(const void *src) = 0;
-
-        /* map real locations to grid coordinates */
-        void get_grid_coordinates(
-                const int nparticles,
-                const int pdimension,
-                const double *__restrict__ x,
-                int *__restrict__ xg,
-                double *__restrict__ xx);
-        void get_grid_coordinates(
-                const double *__restrict__ x,
-                int *__restrict__ xg,
-                double *__restrict__ xx);
-        /* interpolate field at an array of locations */
-        virtual void sample(
-                const int nparticles,
-                const int pdimension,
-                const double *__restrict__ x,
-                double *__restrict__ y,
-                const int *deriv = NULL) = 0;
-        /* interpolate 1 point */
-        virtual void operator()(
-                const int *__restrict__ xg,
-                const double *__restrict__ xx,
-                double *__restrict__ dest,
-                const int *deriv = NULL) = 0;
-
-        /* interpolate 1 point */
-        inline void operator()(
-                const double *__restrict__ x,
-                double *__restrict__ dest,
-                const int *deriv = NULL)
-        {
-            int xg[3];
-            double xx[3];
-            this->get_grid_coordinates(x, xg, xx);
-            (*this)(xg, xx, dest, deriv);
-        }
-};
-
-#endif//INTERPOLATOR_BASE
-
diff --git a/bfps/cpp/kspace.cpp b/bfps/cpp/kspace.cpp
index 3fb250002c33282463684c5f8da051ffe8e35b27..5ceb2a806d9c57556bfa36b98e1e5114c2e64f7a 100644
--- a/bfps/cpp/kspace.cpp
+++ b/bfps/cpp/kspace.cpp
@@ -23,6 +23,9 @@
 **********************************************************************/
 
 
+
+#define NDEBUG
+
 #include <cmath>
 #include <cstdlib>
 #include <algorithm>
@@ -31,6 +34,8 @@
 #include "scope_timer.hpp"
 #include "shared_array.hpp"
 
+
+
 template <field_backend be,
           kspace_dealias_type dt>
 template <field_components fc>
diff --git a/bfps/cpp/rFFTW_distributed_particles.cpp b/bfps/cpp/rFFTW_distributed_particles.cpp
deleted file mode 100644
index 265975f8c817a1b40942e076bd016c2921618bbc..0000000000000000000000000000000000000000
--- a/bfps/cpp/rFFTW_distributed_particles.cpp
+++ /dev/null
@@ -1,804 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#define NDEBUG
-
-#include <cmath>
-#include <cassert>
-#include <cstring>
-#include <string>
-#include <sstream>
-#include <set>
-#include <algorithm>
-#include <ctime>
-
-#include "base.hpp"
-#include "rFFTW_distributed_particles.hpp"
-#include "fftw_tools.hpp"
-#include "scope_timer.hpp"
-
-
-extern int myrank, nprocs;
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::rFFTW_distributed_particles(
-        const char *NAME,
-        const hid_t data_file_id,
-        rFFTW_interpolator<rnumber, interp_neighbours> *VEL,
-        const int TRAJ_SKIP,
-        const int INTEGRATION_STEPS) : particles_io_base<particle_type>(
-            NAME,
-            TRAJ_SKIP,
-            data_file_id,
-            VEL->descriptor->comm)
-{
-    TIMEZONE("rFFTW_distributed_particles::rFFTW_distributed_particles");
-    /* check that integration_steps has a valid value.
-     * If NDEBUG is defined, "assert" doesn't do anything.
-     * With NDEBUG defined, and an invalid INTEGRATION_STEPS,
-     * the particles will simply sit still.
-     * */
-    assert((INTEGRATION_STEPS <= 6) &&
-           (INTEGRATION_STEPS >= 1));
-    /* check that the field layout is compatible with this class.
-     * if it's not, the code will fail in bad ways, most likely ending up
-     * with various CPUs locked in some MPI send/receive.
-     * therefore I prefer to just kill the code at this point,
-     * no matter whether or not NDEBUG is present.
-     * */
-    if (interp_neighbours*2+2 > VEL->descriptor->subsizes[0])
-    {
-        DEBUG_MSG("parameters incompatible with rFFTW_distributed_particles.\n"
-                  "interp kernel size is %d, local_z_size is %d\n",
-                  interp_neighbours*2+2, VEL->descriptor->subsizes[0]);
-        if (VEL->descriptor->myrank == 0)
-            std::cerr << "parameters incompatible with rFFTW_distributed_particles." << std::endl;
-        exit(0);
-    }
-    this->vel = VEL;
-    this->rhs.resize(INTEGRATION_STEPS);
-    this->integration_steps = INTEGRATION_STEPS;
-    /* the particles are expected to be evenly distributed among processes.
-     * therefore allocating twice that amount of memory seems enough.
-     * */
-    this->state.reserve(2*this->nparticles / this->nprocs);
-    for (unsigned int i=0; i<this->rhs.size(); i++)
-        this->rhs[i].reserve(2*this->nparticles / this->nprocs);
-
-    /* build communicators and stuff for interpolation */
-
-    /* number of processors per domain */
-    this->domain_nprocs[-1] = 2; // domain in common with lower z CPU
-    this->domain_nprocs[ 0] = 1; // local domain
-    this->domain_nprocs[ 1] = 2; // domain in common with higher z CPU
-
-    /* initialize domain bins */
-    this->domain_particles[-1] = std::unordered_set<int>();
-    this->domain_particles[ 0] = std::unordered_set<int>();
-    this->domain_particles[ 1] = std::unordered_set<int>();
-    this->domain_particles[-1].reserve(unsigned(
-                1.5*(interp_neighbours*2+2)*
-                float(this->nparticles) /
-                this->nprocs));
-    this->domain_particles[ 1].reserve(unsigned(
-                1.5*(interp_neighbours*2+2)*
-                float(this->nparticles) /
-                this->nprocs));
-    this->domain_particles[ 0].reserve(unsigned(
-                1.5*(this->vel->descriptor->subsizes[0] - interp_neighbours*2-2)*
-                float(this->nparticles) /
-                this->nprocs));
-
-    int color, key;
-    MPI_Comm tmpcomm;
-    for (int rank=0; rank<this->nprocs; rank++)
-    {
-        color = MPI_UNDEFINED;
-        key = MPI_UNDEFINED;
-        if (this->myrank == rank)
-        {
-            color = rank;
-            key = 0;
-        }
-        if (this->myrank == MOD(rank + 1, this->nprocs))
-        {
-            color = rank;
-            key = 1;
-        }
-        MPI_Comm_split(this->comm, color, key, &tmpcomm);
-        if (this->myrank == rank)
-            this->domain_comm[ 1] = tmpcomm;
-        if (this->myrank == MOD(rank+1, this->nprocs))
-            this->domain_comm[-1] = tmpcomm;
-
-    }
-
-    /* following code may be useful in the future for the general case */
-    //this->interp_comm.resize(this->vel->descriptor->sizes[0]);
-    //this->interp_nprocs.resize(this->vel->descriptor->sizes[0]);
-    //for (int zg=0; zg<this->vel->descriptor->sizes[0]; zg++)
-    //{
-    //    color = (this->vel->get_rank_info(
-    //                (zg+.5)*this->vel->dz, rminz, rmaxz) ? zg : MPI_UNDEFINED);
-    //    key = zg - this->vel->descriptor->starts[0] + interp_neighbours;
-    //    MPI_Comm_split(this->comm, color, key, &this->interp_comm[zg]);
-    //    if (this->interp_comm[zg] != MPI_COMM_NULL)
-    //        MPI_Comm_size(this->interp_comm[zg], &this->interp_nprocs[zg]);
-    //    else
-    //        this->interp_nprocs[zg] = 0;
-    //}
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::~rFFTW_distributed_particles()
-{
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sample(
-        rFFTW_interpolator<rnumber, interp_neighbours> *field,
-        const std::unordered_map<int, single_particle_state<particle_type>> &x,
-        const std::unordered_map<int, std::unordered_set<int>> &dp,
-        std::unordered_map<int, single_particle_state<POINT3D>> &y)
-{
-    TIMEZONE("rFFTW_distributed_particles::sample");
-    double *yyy;
-    double *yy;
-    y.clear();
-    /* local z domain */
-    yy = new double[3];
-    for (auto p: dp.at(0))
-    {
-        (*field)(x.find(p)->second.data, yy);
-        y[p] = yy;
-    }
-    delete[] yy;
-    /* boundary z domains */
-    int domain_index;
-    for (int rankpair = 0; rankpair < this->nprocs; rankpair++)
-    {
-        if (this->myrank == rankpair)
-            domain_index = 1;
-        if (this->myrank == MOD(rankpair+1, this->nprocs))
-            domain_index = -1;
-        if (this->myrank == rankpair ||
-            this->myrank == MOD(rankpair+1, this->nprocs))
-        {
-            yy = new double[3*dp.at(domain_index).size()];
-            yyy = new double[3*dp.at(domain_index).size()];
-            int tindex;
-            tindex = 0;
-            // can this sorting be done more efficiently?
-            std::vector<int> ordered_dp;
-            {
-                TIMEZONE("rFFTW_distributed_particles::sample::ordered_dp");
-            ordered_dp.reserve(dp.at(domain_index).size());
-            for (auto p: dp.at(domain_index))
-                ordered_dp.push_back(p);
-            //std::set<int> ordered_dp(dp.at(domain_index));
-            std::sort(ordered_dp.begin(), ordered_dp.end());
-            }
-
-            for (auto p: ordered_dp)
-            //for (auto p: dp.at(domain_index))
-            {
-                (*field)(x.at(p).data, yy + tindex*3);
-                tindex++;
-            }
-            {
-                TIMEZONE("rFFTW_distributed_particles::sample::MPI_Allreduce");
-                MPI_Allreduce(
-                    yy,
-                    yyy,
-                    3*dp.at(domain_index).size(),
-                    MPI_DOUBLE,
-                    MPI_SUM,
-                    this->domain_comm[domain_index]);
-            }
-            tindex = 0;
-            for (auto p: ordered_dp)
-            //for (auto p: dp.at(domain_index))
-            {
-                y[p] = yyy + tindex*3;
-                tindex++;
-            }
-            delete[] yy;
-            delete[] yyy;
-        }
-    }
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::get_rhs(
-        const std::unordered_map<int, single_particle_state<particle_type>> &x,
-        const std::unordered_map<int, std::unordered_set<int>> &dp,
-        std::unordered_map<int, single_particle_state<particle_type>> &y)
-{
-    std::unordered_map<int, single_particle_state<POINT3D>> yy;
-    switch(particle_type)
-    {
-        case VELOCITY_TRACER:
-            this->sample(this->vel, x, dp, yy);
-            y.clear();
-            y.reserve(yy.size());
-            y.rehash(this->nparticles);
-            for (auto &pp: yy)
-                y[pp.first] = pp.second.data;
-            break;
-    }
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sample(
-        rFFTW_interpolator<rnumber, interp_neighbours> *field,
-        const char *dset_name)
-{
-    std::unordered_map<int, single_particle_state<POINT3D>> y;
-    this->sample(field, this->state, this->domain_particles, y);
-    this->write(dset_name, y);
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::roll_rhs()
-{
-    for (int i=this->integration_steps-2; i>=0; i--)
-        rhs[i+1] = rhs[i];
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::redistribute(
-        std::unordered_map<int, single_particle_state<particle_type>> &x,
-        std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals,
-        std::unordered_map<int, std::unordered_set<int>> &dp)
-{
-    TIMEZONE("rFFTW_distributed_particles::redistribute");
-    //DEBUG_MSG("entered redistribute\n");
-    /* get new distribution of particles */
-    std::unordered_map<int, std::unordered_set<int>> newdp;
-    {
-        TIMEZONE("sort_into_domains");
-        this->sort_into_domains(x, newdp);
-    }
-    /* take care of particles that are leaving the shared domains */
-    int dindex[2] = {-1, 1};
-    // for each D of the 2 shared domains
-    {
-        TIMEZONE("Loop1");
-        for (int di=0; di<2; di++)
-            // for all particles previously in D
-            for (auto p: dp[dindex[di]])
-            {
-                // if the particle is no longer in D
-                if (newdp[dindex[di]].find(p) == newdp[dindex[di]].end())
-                {
-                    // and the particle is not in the local domain
-                    if (newdp[0].find(p) == newdp[0].end())
-                    {
-                        // remove the particle from the local list
-                        x.erase(p);
-                        for (unsigned int i=0; i<vals.size(); i++)
-                            vals[i].erase(p);
-                    }
-                    // if the particle is in the local domain, do nothing
-                }
-            }
-    }
-    /* take care of particles that are entering the shared domains */
-    /* neighbouring rank offsets */
-    int ro[2];
-    ro[0] = -1;
-    ro[1] = 1;
-    /* particles to send, particles to receive */
-    std::vector<int> ps[2], pr[2];
-    for (int tcounter = 0; tcounter < 2; tcounter++)
-    {
-        ps[tcounter].reserve(newdp[dindex[tcounter]].size());
-    }
-    /* number of particles to send, number of particles to receive */
-    int nps[2], npr[2];
-    int rsrc, rdst;
-    /* get list of id-s to send */
-    {
-        TIMEZONE("Loop2");
-        for (auto &p: dp[0])
-        {
-            for (int di=0; di<2; di++)
-            {
-                if (newdp[dindex[di]].find(p) != newdp[dindex[di]].end())
-                    ps[di].push_back(p);
-            }
-        }
-    }
-    /* prepare data for send recv */
-    for (int i=0; i<2; i++)
-        nps[i] = ps[i].size();
-    for (rsrc = 0; rsrc<this->nprocs; rsrc++)
-        for (int i=0; i<2; i++)
-        {
-            rdst = MOD(rsrc+ro[i], this->nprocs);
-            if (this->myrank == rsrc){
-                TIMEZONE("MPI_Send");
-                MPI_Send(
-                        nps+i,
-                        1,
-                        MPI_INTEGER,
-                        rdst,
-                        2*(rsrc*this->nprocs + rdst)+i,
-                        this->comm);
-            }
-            if (this->myrank == rdst){
-                TIMEZONE("MPI_Recv");
-                MPI_Recv(
-                        npr+1-i,
-                        1,
-                        MPI_INTEGER,
-                        rsrc,
-                        2*(rsrc*this->nprocs + rdst)+i,
-                        this->comm,
-                        MPI_STATUS_IGNORE);
-            }
-        }
-    //DEBUG_MSG("I have to send %d %d particles\n", nps[0], nps[1]);
-    //DEBUG_MSG("I have to recv %d %d particles\n", npr[0], npr[1]);
-    for (int i=0; i<2; i++)
-        pr[i].resize(npr[i]);
-
-    int buffer_size = (nps[0] > nps[1]) ? nps[0] : nps[1];
-    buffer_size = (buffer_size > npr[0])? buffer_size : npr[0];
-    buffer_size = (buffer_size > npr[1])? buffer_size : npr[1];
-    //DEBUG_MSG("buffer size is %d\n", buffer_size);
-    double *buffer = new double[buffer_size*state_dimension(particle_type)*(1+vals.size())];
-    for (rsrc = 0; rsrc<this->nprocs; rsrc++)
-        for (int i=0; i<2; i++)
-        {
-            rdst = MOD(rsrc+ro[i], this->nprocs);
-            if (this->myrank == rsrc && nps[i] > 0)
-            {
-                TIMEZONE("this->myrank == rsrc && nps[i] > 0");
-                MPI_Send(
-                        &ps[i].front(),
-                        nps[i],
-                        MPI_INTEGER,
-                        rdst,
-                        2*(rsrc*this->nprocs + rdst),
-                        this->comm);
-                int pcounter = 0;
-                for (int p: ps[i])
-                {
-                    std::copy(x[p].data,
-                              x[p].data + state_dimension(particle_type),
-                              buffer + pcounter*(1+vals.size())*state_dimension(particle_type));
-                    for (unsigned int tindex=0; tindex<vals.size(); tindex++)
-                    {
-                        std::copy(vals[tindex][p].data,
-                                  vals[tindex][p].data + state_dimension(particle_type),
-                                  buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type));
-                    }
-                    pcounter++;
-                }
-                MPI_Send(
-                        buffer,
-                        nps[i]*(1+vals.size())*state_dimension(particle_type),
-                        MPI_DOUBLE,
-                        rdst,
-                        2*(rsrc*this->nprocs + rdst)+1,
-                        this->comm);
-            }
-            if (this->myrank == rdst && npr[1-i] > 0)
-            {
-                TIMEZONE("this->myrank == rdst && npr[1-i] > 0");
-                MPI_Recv(
-                        &pr[1-i].front(),
-                        npr[1-i],
-                        MPI_INTEGER,
-                        rsrc,
-                        2*(rsrc*this->nprocs + rdst),
-                        this->comm,
-                        MPI_STATUS_IGNORE);
-                MPI_Recv(
-                        buffer,
-                        npr[1-i]*(1+vals.size())*state_dimension(particle_type),
-                        MPI_DOUBLE,
-                        rsrc,
-                        2*(rsrc*this->nprocs + rdst)+1,
-                        this->comm,
-                        MPI_STATUS_IGNORE);
-                int pcounter = 0;
-                for (int p: pr[1-i])
-                {
-                    x[p] = buffer + (pcounter*(1+vals.size()))*state_dimension(particle_type);
-                    newdp[1-i].insert(p);
-                    for (unsigned int tindex=0; tindex<vals.size(); tindex++)
-                    {
-                        vals[tindex][p] = buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type);
-                    }
-                    pcounter++;
-                }
-            }
-        }
-    delete[] buffer;
-    // x has been changed, so newdp is obsolete
-    // we need to sort into domains again
-    {
-        TIMEZONE("sort_into_domains2");
-        this->sort_into_domains(x, dp);
-    }
-
-#ifndef NDEBUG
-    /* check that all particles at x are local */
-    //for (auto &pp: x)
-    //    if (this->vel->get_rank(pp.second.data[2]) != this->myrank)
-    //    {
-    //        DEBUG_MSG("found particle %d with rank %d\n",
-    //                pp.first,
-    //                this->vel->get_rank(pp.second.data[2]));
-    //        assert(false);
-    //    }
-#endif
-    //DEBUG_MSG("exiting redistribute\n");
-}
-
-
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth(
-        const int nsteps)
-{
-    this->get_rhs(this->state, this->domain_particles, this->rhs[0]);
-#define AdamsBashforth_LOOP_PREAMBLE \
-    for (auto &pp: this->state) \
-        for (unsigned int i=0; i<state_dimension(particle_type); i++)
-    switch(nsteps)
-    {
-        case 1:
-            AdamsBashforth_LOOP_PREAMBLE
-            pp.second[i] += this->dt*this->rhs[0][pp.first][i];
-            break;
-        case 2:
-            AdamsBashforth_LOOP_PREAMBLE
-            pp.second[i] += this->dt*(3*this->rhs[0][pp.first][i]
-                                    -   this->rhs[1][pp.first][i])/2;
-            break;
-        case 3:
-            AdamsBashforth_LOOP_PREAMBLE
-            pp.second[i] += this->dt*(23*this->rhs[0][pp.first][i]
-                                    - 16*this->rhs[1][pp.first][i]
-                                    +  5*this->rhs[2][pp.first][i])/12;
-            break;
-        case 4:
-            AdamsBashforth_LOOP_PREAMBLE
-            pp.second[i] += this->dt*(55*this->rhs[0][pp.first][i]
-                                    - 59*this->rhs[1][pp.first][i]
-                                    + 37*this->rhs[2][pp.first][i]
-                                    -  9*this->rhs[3][pp.first][i])/24;
-            break;
-        case 5:
-            AdamsBashforth_LOOP_PREAMBLE
-            pp.second[i] += this->dt*(1901*this->rhs[0][pp.first][i]
-                                    - 2774*this->rhs[1][pp.first][i]
-                                    + 2616*this->rhs[2][pp.first][i]
-                                    - 1274*this->rhs[3][pp.first][i]
-                                    +  251*this->rhs[4][pp.first][i])/720;
-            break;
-        case 6:
-            AdamsBashforth_LOOP_PREAMBLE
-            pp.second[i] += this->dt*(4277*this->rhs[0][pp.first][i]
-                                    - 7923*this->rhs[1][pp.first][i]
-                                    + 9982*this->rhs[2][pp.first][i]
-                                    - 7298*this->rhs[3][pp.first][i]
-                                    + 2877*this->rhs[4][pp.first][i]
-                                    -  475*this->rhs[5][pp.first][i])/1440;
-            break;
-    }
-    this->redistribute(this->state, this->rhs, this->domain_particles);
-    this->roll_rhs();
-}
-
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::step()
-{
-    TIMEZONE("rFFTW_distributed_particles::step");
-    this->AdamsBashforth((this->iteration < this->integration_steps) ?
-                          this->iteration+1 :
-                          this->integration_steps);
-    this->iteration++;
-}
-
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sort_into_domains(
-        const std::unordered_map<int, single_particle_state<particle_type>> &x,
-        std::unordered_map<int, std::unordered_set<int>> &dp)
-{
-    TIMEZONE("rFFTW_distributed_particles::sort_into_domains");
-    int tmpint1, tmpint2;
-    dp.clear();
-    dp[-1] = std::unordered_set<int>();
-    dp[ 0] = std::unordered_set<int>();
-    dp[ 1] = std::unordered_set<int>();
-    dp[-1].reserve(unsigned(
-                1.5*(interp_neighbours*2+2)*
-                float(this->nparticles) /
-                this->nprocs));
-    dp[ 1].reserve(unsigned(
-                1.5*(interp_neighbours*2+2)*
-                float(this->nparticles) /
-                this->nprocs));
-    dp[ 0].reserve(unsigned(
-                1.5*(this->vel->descriptor->subsizes[0] - interp_neighbours*2-2)*
-                float(this->nparticles) /
-                this->nprocs));
-    for (auto &xx: x)
-    {
-        if (this->vel->get_rank_info(xx.second.data[2], tmpint1, tmpint2))
-        {
-            if (tmpint1 == tmpint2)
-                dp[0].insert(xx.first);
-            else
-            {
-                if (this->myrank == tmpint1)
-                    dp[-1].insert(xx.first);
-                else
-                    dp[ 1].insert(xx.first);
-            }
-        }
-    }
-}
-
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::read()
-{
-    TIMEZONE("rFFTW_distributed_particles::read");
-    double *temp = new double[this->chunk_size*state_dimension(particle_type)];
-    int tmpint1, tmpint2;
-    for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
-    {
-        //read state
-        if (this->myrank == 0){
-            TIMEZONE("read_state_chunk");
-            this->read_state_chunk(cindex, temp);
-        }
-        {
-            TIMEZONE("MPI_Bcast");
-            MPI_Bcast(
-                temp,
-                this->chunk_size*state_dimension(particle_type),
-                MPI_DOUBLE,
-                0,
-                this->comm);
-        }
-        for (unsigned int p=0; p<this->chunk_size; p++)
-        {
-            if (this->vel->get_rank_info(temp[state_dimension(particle_type)*p+2], tmpint1, tmpint2))
-            {
-                this->state[p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p;
-            }
-        }
-        //read rhs
-        if (this->iteration > 0){
-            TIMEZONE("this->iteration > 0");
-            for (int i=0; i<this->integration_steps; i++)
-            {
-                if (this->myrank == 0){
-                    TIMEZONE("read_rhs_chunk");
-                    this->read_rhs_chunk(cindex, i, temp);
-                }
-                {
-                    TIMEZONE("MPI_Bcast");
-                    MPI_Bcast(
-                        temp,
-                        this->chunk_size*state_dimension(particle_type),
-                        MPI_DOUBLE,
-                        0,
-                        this->comm);
-                }
-                for (unsigned int p=0; p<this->chunk_size; p++)
-                {
-                    auto pp = this->state.find(p+cindex*this->chunk_size);
-                    if (pp != this->state.end())
-                        this->rhs[i][p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p;
-                }
-            }
-        }
-    }
-    this->sort_into_domains(this->state, this->domain_particles);
-    DEBUG_MSG("%s->state.size = %ld\n", this->name.c_str(), this->state.size());
-    for (int domain=-1; domain<=1; domain++)
-    {
-        DEBUG_MSG("domain %d nparticles = %ld\n", domain, this->domain_particles[domain].size());
-    }
-    delete[] temp;
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::write(
-        const char *dset_name,
-        std::unordered_map<int, single_particle_state<POINT3D>> &y)
-{
-    TIMEZONE("rFFTW_distributed_particles::write");
-    double *data = new double[this->chunk_size*3];
-    double *yy = new double[this->chunk_size*3];
-    //int pindex = 0;
-   for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
-    {
-        std::fill_n(yy, this->chunk_size*3, 0);
-        //for (unsigned int p=0; p<this->chunk_size; p++, pindex++)
-        //{
-        //    if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() ||
-        //        this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end())
-        //    {
-        //        std::copy(y[pindex].data,
-        //                  y[pindex].data + 3,
-        //                  yy + p*3);
-        //    }
-        //}
-        for (int s = -1; s <= 0; s++)
-             for (auto &pp: this->domain_particles[s])
-             {
-                 if (pp >= int(cindex*this->chunk_size) &&
-                     pp < int((cindex+1)*this->chunk_size))
-                {
-                    std::copy(y[pp].data,
-                              y[pp].data + 3,
-                              yy + (pp-cindex*this->chunk_size)*3);
-                }
-             }
-        {
-            TIMEZONE("MPI_Allreduce");
-            MPI_Allreduce(
-                yy,
-                data,
-                3*this->chunk_size,
-                MPI_DOUBLE,
-                MPI_SUM,
-                this->comm);
-        }
-        if (this->myrank == 0){
-            TIMEZONE("write_point3D_chunk");
-            this->write_point3D_chunk(dset_name, cindex, data);
-        }
-    }
-    delete[] yy;
-    delete[] data;
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::write(
-        const bool write_rhs)
-{
-    TIMEZONE("rFFTW_distributed_particles::write2");
-    double *temp0 = new double[this->chunk_size*state_dimension(particle_type)];
-    double *temp1 = new double[this->chunk_size*state_dimension(particle_type)];
-    //int pindex = 0;
-    for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
-    {
-        //write state
-        std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0);
-        //pindex = cindex*this->chunk_size;
-        //for (unsigned int p=0; p<this->chunk_size; p++, pindex++)
-        //{
-        //    if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() ||
-        //        this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end())
-        //    {
-        //        TIMEZONE("std::copy");
-        //        std::copy(this->state[pindex].data,
-        //                  this->state[pindex].data + state_dimension(particle_type),
-        //                  temp0 + p*state_dimension(particle_type));
-        //    }
-        //}
-        for (int s = -1; s <= 0; s++)
-             for (auto &pp: this->domain_particles[s])
-             {
-                 if (pp >= int(cindex*this->chunk_size) &&
-                     pp < int((cindex+1)*this->chunk_size))
-                {
-                    std::copy(this->state[pp].data,
-                              this->state[pp].data + state_dimension(particle_type),
-                              temp0 + (pp-cindex*this->chunk_size)*state_dimension(particle_type));
-                }
-             }
-        {
-            TIMEZONE("MPI_Allreduce");
-            MPI_Allreduce(
-                    temp0,
-                    temp1,
-                    state_dimension(particle_type)*this->chunk_size,
-                    MPI_DOUBLE,
-                    MPI_SUM,
-                    this->comm);
-        }
-        if (this->myrank == 0){
-            TIMEZONE("write_state_chunk");
-            this->write_state_chunk(cindex, temp1);
-        }
-        //write rhs
-        if (write_rhs){
-            TIMEZONE("write_rhs");
-            for (int i=0; i<this->integration_steps; i++)
-            {
-                std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0);
-                //pindex = cindex*this->chunk_size;
-                //for (unsigned int p=0; p<this->chunk_size; p++, pindex++)
-                //{
-                //    if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() ||
-                //        this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end())
-                //    {
-                //        TIMEZONE("std::copy");
-                //        std::copy(this->rhs[i][pindex].data,
-                //                  this->rhs[i][pindex].data + state_dimension(particle_type),
-                //                  temp0 + p*state_dimension(particle_type));
-                //    }
-                //}
-                for (int s = -1; s <= 0; s++)
-                     for (auto &pp: this->domain_particles[s])
-                     {
-                         if (pp >= int(cindex*this->chunk_size) &&
-                             pp < int((cindex+1)*this->chunk_size))
-                        {
-                            std::copy(this->rhs[i][pp].data,
-                                      this->rhs[i][pp].data + state_dimension(particle_type),
-                                      temp0 + (pp-cindex*this->chunk_size)*state_dimension(particle_type));
-                        }
-                     }
-                {
-                    TIMEZONE("MPI_Allreduce");
-                    MPI_Allreduce(
-                        temp0,
-                        temp1,
-                        state_dimension(particle_type)*this->chunk_size,
-                        MPI_DOUBLE,
-                        MPI_SUM,
-                        this->comm);
-                }
-                if (this->myrank == 0){
-                    TIMEZONE("write_rhs_chunk");
-                    this->write_rhs_chunk(cindex, i, temp1);
-                }
-            }
-        }
-    }
-    delete[] temp0;
-    delete[] temp1;
-}
-
-
-/*****************************************************************************/
-template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 1>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 2>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 3>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 4>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 5>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 6>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 1>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 2>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 3>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 4>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 5>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 6>;
-/*****************************************************************************/
-
diff --git a/bfps/cpp/rFFTW_distributed_particles.hpp b/bfps/cpp/rFFTW_distributed_particles.hpp
deleted file mode 100644
index 400411d5f1fd6e597714be494a72272a76e01206..0000000000000000000000000000000000000000
--- a/bfps/cpp/rFFTW_distributed_particles.hpp
+++ /dev/null
@@ -1,144 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <iostream>
-#include <unordered_map>
-#include <unordered_set>
-#include <vector>
-#include <hdf5.h>
-#include "base.hpp"
-#include "particles_base.hpp"
-#include "fluid_solver_base.hpp"
-#include "rFFTW_interpolator.hpp"
-
-#ifndef RFFTW_DISTRIBUTED_PARTICLES
-
-#define RFFTW_DISTRIBUTED_PARTICLES
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-class rFFTW_distributed_particles: public particles_io_base<particle_type>
-{
-    private:
-        // a "domain" corresponds to a region in 3D real space where a fixed set
-        // of MPI processes are required to participate in the interpolation
-        // formula (i.e. they all contain required information).
-        // we need to know how many processes there are for each of the domains
-        // to which the local process belongs.
-        std::unordered_map<int, int> domain_nprocs;
-        // each domain has an associated communicator, and we keep a list of the
-        // communicators to which the local process belongs
-        std::unordered_map<int, MPI_Comm> domain_comm;
-        // for each domain, we need a list of the IDs of the particles located
-        // in that domain
-        std::unordered_map<int, std::unordered_set<int>> domain_particles;
-
-        // for each domain, we need the state of each particle
-        std::unordered_map<int, single_particle_state<particle_type>> state;
-        // for each domain, we also need the last few values of the right hand
-        // side of the ODE, since we use Adams-Bashforth integration
-        std::vector<std::unordered_map<int, single_particle_state<particle_type>>> rhs;
-
-    public:
-        int integration_steps;
-        // this class only works with rFFTW interpolator
-        rFFTW_interpolator<rnumber, interp_neighbours> *vel;
-
-        /* simulation parameters */
-        double dt;
-
-        /* methods */
-
-        /* constructor and destructor.
-         * allocate and deallocate:
-         *  this->state
-         *  this->rhs
-         * */
-        rFFTW_distributed_particles(
-                const char *NAME,
-                const hid_t data_file_id,
-                rFFTW_interpolator<rnumber, interp_neighbours> *FIELD,
-                const int TRAJ_SKIP,
-                const int INTEGRATION_STEPS = 2);
-        ~rFFTW_distributed_particles();
-
-        void sample(
-                rFFTW_interpolator<rnumber, interp_neighbours> *field,
-                const char *dset_name);
-        void sample(
-                rFFTW_interpolator<rnumber, interp_neighbours> *field,
-                const std::unordered_map<int, single_particle_state<particle_type>> &x,
-                const std::unordered_map<int, std::unordered_set<int>> &dp,
-                std::unordered_map<int, single_particle_state<POINT3D>> &y);
-        void get_rhs(
-                const std::unordered_map<int, single_particle_state<particle_type>> &x,
-                const std::unordered_map<int, std::unordered_set<int>> &dp,
-                std::unordered_map<int, single_particle_state<particle_type>> &y);
-
-
-        /* given a list of particle positions,
-         * figure out which go into what local domain, and construct the relevant
-         * map of ID lists "dp" (for domain particles).
-         * */
-        void sort_into_domains(
-                const std::unordered_map<int, single_particle_state<particle_type>> &x,
-                std::unordered_map<int, std::unordered_set<int>> &dp);
-        /* suppose the particles are currently badly distributed, and some
-         * arbitrary quantities (stored in "vals") are associated to the particles,
-         * and we need to properly distribute them among processes.
-         * that's what this function does.
-         * In practice it's only used to redistribute the rhs values (and it
-         * automatically redistributes the state x being passed).
-         * Some more comments are present in the .cpp file, but, in brief: the
-         * particles are simply moved from one domain to another.
-         * If it turns out that the new domain contains a process which does not
-         * know about a particle, that information is sent from the closest process.
-         * */
-        void redistribute(
-                std::unordered_map<int, single_particle_state<particle_type>> &x,
-                std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals,
-                std::unordered_map<int, std::unordered_set<int>> &dp);
-
-
-        /* input/output */
-        void read();
-        void write(
-                const char *dset_name,
-                std::unordered_map<int, single_particle_state<POINT3D>> &y);
-        void write(
-                const char *dset_name,
-                std::unordered_map<int, single_particle_state<particle_type>> &y);
-        void write(const bool write_rhs = true);
-
-        /* solvers */
-        void step();
-        void roll_rhs();
-        void AdamsBashforth(const int nsteps);
-};
-
-#endif//RFFTW_DISTRIBUTED_PARTICLES
-
diff --git a/bfps/cpp/rFFTW_interpolator.cpp b/bfps/cpp/rFFTW_interpolator.cpp
deleted file mode 100644
index b8b21e8811d7f5286dc4edd00833c205539ea89c..0000000000000000000000000000000000000000
--- a/bfps/cpp/rFFTW_interpolator.cpp
+++ /dev/null
@@ -1,210 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#define NDEBUG
-
-#include <cmath>
-#include "rFFTW_interpolator.hpp"
-#include "scope_timer.hpp"
-
-template <class rnumber, int interp_neighbours>
-rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator(
-        fluid_solver_base<rnumber> *fs,
-        base_polynomial_values BETA_POLYS,
-        rnumber *FIELD_POINTER) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS)
-{
-    this->field = FIELD_POINTER;
-
-
-    // generate compute array
-    this->compute = new bool[this->descriptor->sizes[0]];
-    std::fill_n(this->compute, this->descriptor->sizes[0], false);
-    for (int iz = this->descriptor->starts[0]-interp_neighbours-1;
-            iz <= this->descriptor->starts[0]+this->descriptor->subsizes[0]+interp_neighbours;
-            iz++)
-        this->compute[((iz + this->descriptor->sizes[0]) % this->descriptor->sizes[0])] = true;
-}
-
-template <class rnumber, int interp_neighbours>
-rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator(
-        vorticity_equation<rnumber, FFTW> *fs,
-        base_polynomial_values BETA_POLYS,
-        rnumber *FIELD_POINTER) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS)
-{
-//    this->field = FIELD_POINTER;
-//
-//
-//    // generate compute array
-//    this->compute = new bool[this->descriptor->sizes[0]];
-//    std::fill_n(this->compute, this->descriptor->sizes[0], false);
-//    for (int iz = this->descriptor->starts[0]-interp_neighbours-1;
-//            iz <= this->descriptor->starts[0]+this->descriptor->subsizes[0]+interp_neighbours;
-//            iz++)
-//        this->compute[((iz + this->descriptor->sizes[0]) % this->descriptor->sizes[0])] = true;
-}
-
-template <class rnumber, int interp_neighbours>
-rFFTW_interpolator<rnumber, interp_neighbours>::~rFFTW_interpolator()
-{
-    delete[] this->compute;
-}
-
-template <class rnumber, int interp_neighbours>
-bool rFFTW_interpolator<rnumber, interp_neighbours>::get_rank_info(double z, int &maxz_rank, int &minz_rank)
-{
-    int zg = int(floor(z/this->dz));
-    minz_rank = this->descriptor->rank[MOD(
-             zg - interp_neighbours,
-            this->descriptor->sizes[0])];
-    maxz_rank = this->descriptor->rank[MOD(
-            zg + 1 + interp_neighbours,
-            this->descriptor->sizes[0])];
-    bool is_here = false;
-    for (int iz = -interp_neighbours; iz <= interp_neighbours+1; iz++)
-        is_here = (is_here ||
-                   (this->descriptor->myrank ==
-                    this->descriptor->rank[MOD(zg+iz, this->descriptor->sizes[0])]));
-    return is_here;
-}
-
-template <class rnumber, int interp_neighbours>
-void rFFTW_interpolator<rnumber, interp_neighbours>::sample(
-        const int nparticles,
-        const int pdimension,
-        const double *__restrict__ x,
-        double *__restrict__ y,
-        const int *deriv)
-{
-    TIMEZONE("rFFTW_interpolator::sample");
-    /* get grid coordinates */
-    int *xg = new int[3*nparticles];
-    double *xx = new double[3*nparticles];
-    double *yy =  new double[3*nparticles];
-    std::fill_n(yy, 3*nparticles, 0.0);
-    this->get_grid_coordinates(nparticles, pdimension, x, xg, xx);
-    /* perform interpolation */
-    for (int p=0; p<nparticles; p++)
-        if (this->compute[xg[p*3+2]])
-            this->operator()(xg + p*3, xx + p*3, yy + p*3, deriv);
-    MPI_Allreduce(
-            yy,
-            y,
-            3*nparticles,
-            MPI_DOUBLE,
-            MPI_SUM,
-            this->descriptor->comm);
-    delete[] yy;
-    delete[] xg;
-    delete[] xx;
-}
-
-template <class rnumber, int interp_neighbours>
-void rFFTW_interpolator<rnumber, interp_neighbours>::operator()(
-        const int *xg,
-        const double *xx,
-        double *dest,
-        const int *deriv)
-{
-    TIMEZONE("rFFTW_interpolator::operator()");
-    double bx[interp_neighbours*2+2], by[interp_neighbours*2+2], bz[interp_neighbours*2+2];
-    /* please note that the polynomials in z are computed for all the different
-     * iz values, independently of whether or not "myrank" will perform the
-     * computation for all the different iz slices.
-     * I don't know how big a deal this really is, but it is something that we can
-     * optimize.
-     * */
-    if (deriv == NULL)
-    {
-        this->compute_beta(0, xx[0], bx);
-        this->compute_beta(0, xx[1], by);
-        this->compute_beta(0, xx[2], bz);
-    }
-    else
-    {
-        this->compute_beta(deriv[0], xx[0], bx);
-        this->compute_beta(deriv[1], xx[1], by);
-        this->compute_beta(deriv[2], xx[2], bz);
-    }
-    std::fill_n(dest, 3, 0);
-    ptrdiff_t bigiz, bigiy, bigix;
-    // loop over the 2*interp_neighbours + 2 z slices
-    for (int iz = -interp_neighbours; iz <= interp_neighbours+1; iz++)
-    {
-        // bigiz is the z index of the cell containing the particles
-        // this->descriptor->sizes[0] is added before taking the modulo
-        // because we want to be sure that "bigiz" is a positive number.
-        // I'm no longer sure why I don't use the MOD function here.
-        bigiz = ptrdiff_t(((xg[2]+iz) + this->descriptor->sizes[0]) % this->descriptor->sizes[0]);
-        // once we know bigiz, we know whether "myrank" has the relevant slice.
-        // if not, go to next value of bigiz
-        if (this->descriptor->myrank == this->descriptor->rank[bigiz])
-        {
-            for (int iy = -interp_neighbours; iy <= interp_neighbours+1; iy++)
-            {
-                // bigiy is the y index of the cell
-                // since we have all the y indices in myrank, we can safely use the
-                // modulo value
-                bigiy = ptrdiff_t(MOD(xg[1]+iy, this->descriptor->sizes[1]));
-                for (int ix = -interp_neighbours; ix <= interp_neighbours+1; ix++)
-                {
-                    // bigix is the x index of the cell
-                    bigix = ptrdiff_t(MOD(xg[0]+ix, this->descriptor->sizes[2]));
-                    // here we create the index to the current grid node
-                    // note the removal of local_z_start from bigiz.
-                    ptrdiff_t tindex = (((bigiz-this->descriptor->starts[0])*this->descriptor->sizes[1] +
-                                         bigiy)*(this->descriptor->sizes[2]+2) +
-                                         bigix)*3;
-                    for (int c=0; c<3; c++)
-                        dest[c] += this->field[tindex+c]*(bz[iz+interp_neighbours]*
-                                                          by[iy+interp_neighbours]*
-                                                          bx[ix+interp_neighbours]);
-                }
-            }
-        }
-    }
-}
-
-template class rFFTW_interpolator<float, 1>;
-template class rFFTW_interpolator<float, 2>;
-template class rFFTW_interpolator<float, 3>;
-template class rFFTW_interpolator<float, 4>;
-template class rFFTW_interpolator<float, 5>;
-template class rFFTW_interpolator<float, 6>;
-template class rFFTW_interpolator<float, 7>;
-template class rFFTW_interpolator<float, 8>;
-template class rFFTW_interpolator<float, 9>;
-template class rFFTW_interpolator<float, 10>;
-template class rFFTW_interpolator<double, 1>;
-template class rFFTW_interpolator<double, 2>;
-template class rFFTW_interpolator<double, 3>;
-template class rFFTW_interpolator<double, 4>;
-template class rFFTW_interpolator<double, 5>;
-template class rFFTW_interpolator<double, 6>;
-template class rFFTW_interpolator<double, 7>;
-template class rFFTW_interpolator<double, 8>;
-template class rFFTW_interpolator<double, 9>;
-template class rFFTW_interpolator<double, 10>;
-
diff --git a/bfps/cpp/rFFTW_interpolator.hpp b/bfps/cpp/rFFTW_interpolator.hpp
deleted file mode 100644
index 5088be8b2f3094fd96332af0c923d7cc905e4f3f..0000000000000000000000000000000000000000
--- a/bfps/cpp/rFFTW_interpolator.hpp
+++ /dev/null
@@ -1,118 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#include "field_descriptor.hpp"
-#include "fftw_tools.hpp"
-#include "fluid_solver_base.hpp"
-#include "vorticity_equation.hpp"
-#include "interpolator_base.hpp"
-
-#ifndef RFFTW_INTERPOLATOR
-
-#define RFFTW_INTERPOLATOR
-
-template <class rnumber, int interp_neighbours>
-class rFFTW_interpolator:public interpolator_base<rnumber, interp_neighbours>
-{
-    public:
-        using interpolator_base<rnumber, interp_neighbours>::operator();
-
-        /* pointer to field that has to be interpolated
-         * The reason this is a member variable is because I want this class
-         * to be consistent with the "interpolator" class, where a member
-         * variable is absolutely required (since that class uses padding).
-         * */
-        rnumber *field;
-
-        /* compute[iz] is an array that says whether or not the current MPI
-         * process is involved in the interpolation formula for a particle
-         * located in cell "iz".
-         * It is mostly used in the formula itself.
-         * This translates as the following condition:
-         * local_zstart - neighbours <= iz <= local_zend + 1 + neighbours
-         * I think it's cleaner to keep things in an array, especially since
-         * "local_zend" is shorthand for another arithmetic operation anyway.
-         * */
-        bool *compute;
-
-
-        /* Constructors */
-        rFFTW_interpolator(
-                fluid_solver_base<rnumber> *FSOLVER,
-                base_polynomial_values BETA_POLYS,
-                rnumber *FIELD_DATA);
-
-        /* this constructor is empty, I just needed for a quick hack of the
-         * "vorticity_equation" class.
-         * It should be removed soon.
-         * */
-        rFFTW_interpolator(
-                vorticity_equation<rnumber, FFTW> *FSOLVER,
-                base_polynomial_values BETA_POLYS,
-                rnumber *FIELD_DATA);
-        ~rFFTW_interpolator();
-
-        /* This method is provided for consistency with "interpolator", and it
-         * does not destroy input */
-        inline int read_rFFTW(const void *src)
-        {
-            this->field = (rnumber*)src;
-            return EXIT_SUCCESS;
-        }
-
-        /* This is used when "compute" is not enough.
-         * For a given z location, it gives the outermost ranks that are relevant
-         * for the interpolation formula.
-         * */
-        bool get_rank_info(double z, int &maxz_rank, int &minz_rank);
-
-        /* interpolate field at an array of locations.
-         * After interpolation is performed, call Allreduce for "y", over
-         * this->descriptor->comm --- generally MPI_COMM_WORLD.
-         * This is useful for the simple "particles" class, where particle
-         * information is synchronized across all processes.
-         * */
-        void sample(
-                const int nparticles,
-                const int pdimension,
-                const double *__restrict__ x,
-                double *__restrict__ y,
-                const int *deriv = NULL);
-        /* interpolate 1 point.
-         * Result is kept local.
-         * This is used in the "rFFTW_distributed_particles" class, with the
-         * result being synchronized across the relevant "local particle
-         * communicator".
-         * */
-        void operator()(
-                const int *__restrict__ xg,
-                const double *__restrict__ xx,
-                double *__restrict__ dest,
-                const int *deriv = NULL);
-};
-
-#endif//RFFTW_INTERPOLATOR
-
diff --git a/bfps/cpp/slab_field_particles.cpp b/bfps/cpp/slab_field_particles.cpp
deleted file mode 100644
index e3c84574062a4eabd5bf52d14a2b0d727c67b68e..0000000000000000000000000000000000000000
--- a/bfps/cpp/slab_field_particles.cpp
+++ /dev/null
@@ -1,799 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#define NDEBUG
-
-
-#include <cmath>
-#include <cassert>
-#include <cstring>
-#include <string>
-#include <sstream>
-
-#include "base.hpp"
-#include "slab_field_particles.hpp"
-#include "fftw_tools.hpp"
-
-
-extern int myrank, nprocs;
-
-template <class rnumber>
-slab_field_particles<rnumber>::slab_field_particles(
-        const char *NAME,
-        fluid_solver_base<rnumber> *FSOLVER,
-        const int NPARTICLES,
-        const int NCOMPONENTS,
-        base_polynomial_values BETA_POLYS,
-        const int INTERP_NEIGHBOURS,
-        const int TRAJ_SKIP,
-        const int INTEGRATION_STEPS)
-{
-    assert((NCOMPONENTS % 3) == 0);
-    assert((INTERP_NEIGHBOURS >= 1) ||
-           (INTERP_NEIGHBOURS <= 8));
-    assert((INTEGRATION_STEPS <= 6) &&
-           (INTEGRATION_STEPS >= 1));
-    strncpy(this->name, NAME, 256);
-    this->fs = FSOLVER;
-    this->nparticles = NPARTICLES;
-    this->ncomponents = NCOMPONENTS;
-    this->integration_steps = INTEGRATION_STEPS;
-    this->interp_neighbours = INTERP_NEIGHBOURS;
-    this->traj_skip = TRAJ_SKIP;
-    this->compute_beta = BETA_POLYS;
-    // in principle only the buffer width at the top needs the +1,
-    // but things are simpler if buffer_width is the same
-    this->buffer_width = this->interp_neighbours+1;
-    this->buffer_size = this->buffer_width*this->fs->rd->slice_size;
-    this->array_size = this->nparticles * this->ncomponents;
-    this->state = fftw_interface<rnumber>::alloc_real(this->array_size);
-    std::fill_n(this->state, this->array_size, 0.0);
-    for (int i=0; i < this->integration_steps; i++)
-    {
-        this->rhs[i] = fftw_interface<rnumber>::alloc_real(this->array_size);
-        std::fill_n(this->rhs[i], this->array_size, 0.0);
-    }
-    this->watching = new bool[this->fs->rd->nprocs*nparticles];
-    std::fill_n(this->watching, this->fs->rd->nprocs*this->nparticles, false);
-    this->computing = new int[nparticles];
-
-    int tdims[4];
-    tdims[0] = this->buffer_width*2*this->fs->rd->nprocs + this->fs->rd->sizes[0];
-    tdims[1] = this->fs->rd->sizes[1];
-    tdims[2] = this->fs->rd->sizes[2];
-    tdims[3] = this->fs->rd->sizes[3];
-    this->buffered_field_descriptor = new field_descriptor<rnumber>(
-            4, tdims,
-            this->fs->rd->mpi_dtype,
-            this->fs->rd->comm);
-
-    // compute dx, dy, dz;
-    this->dx = 4*acos(0) / (this->fs->dkx*this->fs->rd->sizes[2]);
-    this->dy = 4*acos(0) / (this->fs->dky*this->fs->rd->sizes[1]);
-    this->dz = 4*acos(0) / (this->fs->dkz*this->fs->rd->sizes[0]);
-
-    // compute lower and upper bounds
-    this->lbound = new double[nprocs];
-    this->ubound = new double[nprocs];
-    double *tbound = new double[nprocs];
-    std::fill_n(tbound, nprocs, 0.0);
-    tbound[this->fs->rd->myrank] = this->fs->rd->starts[0]*this->dz;
-    MPI_Allreduce(
-            tbound,
-            this->lbound,
-            nprocs,
-            MPI_DOUBLE,
-            MPI_SUM,
-            this->fs->rd->comm);
-    std::fill_n(tbound, nprocs, 0.0);
-    tbound[this->fs->rd->myrank] = (this->fs->rd->starts[0] + this->fs->rd->subsizes[0])*this->dz;
-    MPI_Allreduce(
-            tbound,
-            this->ubound,
-            nprocs,
-            MPI_DOUBLE,
-            MPI_SUM,
-            this->fs->rd->comm);
-    delete[] tbound;
-    //for (int r = 0; r<nprocs; r++)
-    //    DEBUG_MSG(
-    //            "lbound[%d] = %lg, ubound[%d] = %lg\n",
-    //            r, this->lbound[r],
-    //            r, this->ubound[r]
-    //            );
-}
-
-template <class rnumber>
-slab_field_particles<rnumber>::~slab_field_particles()
-{
-    delete[] this->computing;
-    delete[] this->watching;
-    fftw_interface<rnumber>::free(this->state);
-    for (int i=0; i < this->integration_steps; i++)
-    {
-        fftw_interface<rnumber>::free(this->rhs[i]);
-    }
-    delete[] this->lbound;
-    delete[] this->ubound;
-    delete this->buffered_field_descriptor;
-}
-
-template <class rnumber>
-void slab_field_particles<rnumber>::get_rhs(double *x, double *y)
-{
-    std::fill_n(y, this->array_size, 0.0);
-}
-
-template <class rnumber>
-void slab_field_particles<rnumber>::jump_estimate(double *dest)
-{
-    std::fill_n(dest, this->nparticles, 0.0);
-}
-
-template <class rnumber>
-int slab_field_particles<rnumber>::get_rank(double z)
-{
-    int tmp = this->fs->rd->rank[MOD(int(floor(z/this->dz)), this->fs->rd->sizes[0])];
-    assert(tmp >= 0 && tmp < this->fs->rd->nprocs);
-    return tmp;
-}
-
-template <class rnumber>
-void slab_field_particles<rnumber>::synchronize_single_particle_state(int p, double *x, int source)
-{
-    if (source == -1) source = this->computing[p];
-    if (this->watching[this->fs->rd->myrank*this->nparticles+p]) for (int r=0; r<this->fs->rd->nprocs; r++)
-        if (r != source &&
-            this->watching[r*this->nparticles+p])
-        {
-            //DEBUG_MSG("synchronizing state %d from %d to %d\n", p, this->computing[p], r);
-            if (this->fs->rd->myrank == source)
-                MPI_Send(
-                        x+p*this->ncomponents,
-                        this->ncomponents,
-                        MPI_DOUBLE,
-                        r,
-                        p+this->computing[p]*this->nparticles,
-                        this->fs->rd->comm);
-            if (this->fs->rd->myrank == r)
-                MPI_Recv(
-                        x+p*this->ncomponents,
-                        this->ncomponents,
-                        MPI_DOUBLE,
-                        source,
-                        p+this->computing[p]*this->nparticles,
-                        this->fs->rd->comm,
-                        MPI_STATUS_IGNORE);
-        }
-}
-
-template <class rnumber>
-void slab_field_particles<rnumber>::synchronize()
-{
-    double *tstate = fftw_interface<double>::alloc_real(this->array_size);
-    // first, synchronize state and jump across CPUs
-    std::fill_n(tstate, this->array_size, 0.0);
-    for (int p=0; p<this->nparticles; p++)
-    {
-        //if (this->watching[this->fs->rd->myrank*this->nparticles + p])
-        //DEBUG_MSG(
-        //        "in synchronize, position for particle %d is %g %g %g\n",
-        //        p,
-        //        this->state[p*this->ncomponents],
-        //        this->state[p*this->ncomponents+1],
-        //        this->state[p*this->ncomponents+2]);
-        if (this->fs->rd->myrank == this->computing[p])
-            std::copy(this->state + p*this->ncomponents,
-                      this->state + (p+1)*this->ncomponents,
-                      tstate + p*this->ncomponents);
-    }
-    MPI_Allreduce(
-            tstate,
-            this->state,
-            this->array_size,
-            MPI_DOUBLE,
-            MPI_SUM,
-            this->fs->rd->comm);
-    if (this->integration_steps >= 1)
-    {
-        for (int i=0; i<this->integration_steps; i++)
-        {
-            std::fill_n(tstate, this->array_size, 0.0);
-            for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
-                std::copy(this->rhs[i] + p*this->ncomponents,
-                          this->rhs[i] + (p+1)*this->ncomponents,
-                          tstate + p*this->ncomponents);
-            std::fill_n(this->rhs[i], this->array_size, 0.0);
-            MPI_Allreduce(
-                    tstate,
-                    this->rhs[i],
-                    this->array_size,
-                    MPI_DOUBLE,
-                    MPI_SUM,
-                    this->fs->rd->comm);
-        }
-    }
-    fftw_interface<double>::free(tstate);
-    // assignment of particles
-    for (int p=0; p<this->nparticles; p++)
-    {
-        this->computing[p] = this->get_rank(this->state[p*this->ncomponents + 2]);
-        //DEBUG_MSG("synchronizing particles, particle %d computing is %d\n", p, this->computing[p]);
-    }
-    double *jump = fftw_interface<double>::alloc_real(this->nparticles);
-    this->jump_estimate(jump);
-    // now, see who needs to watch
-    bool *local_watching = new bool[this->fs->rd->nprocs*this->nparticles];
-    std::fill_n(local_watching, this->fs->rd->nprocs*this->nparticles, false);
-    for (int p=0; p<this->nparticles; p++)
-        if (this->fs->rd->myrank == this->computing[p])
-        {
-            local_watching[this->get_rank(this->state[this->ncomponents*p+2]        )*this->nparticles+p] = true;
-            local_watching[this->get_rank(this->state[this->ncomponents*p+2]-jump[p])*this->nparticles+p] = true;
-            local_watching[this->get_rank(this->state[this->ncomponents*p+2]+jump[p])*this->nparticles+p] = true;
-        }
-    fftw_interface<double>::free(jump);
-    MPI_Allreduce(
-            local_watching,
-            this->watching,
-            this->nparticles*this->fs->rd->nprocs,
-            MPI_C_BOOL,
-            MPI_LOR,
-            this->fs->rd->comm);
-    delete[] local_watching;
-    for (int p=0; p<this->nparticles; p++)
-        DEBUG_MSG("watching = %d for particle %d\n", this->watching[this->fs->rd->myrank*nparticles+p], p);
-}
-
-
-
-template <class rnumber>
-void slab_field_particles<rnumber>::roll_rhs()
-{
-    for (int i=this->integration_steps-2; i>=0; i--)
-        std::copy(this->rhs[i],
-                  this->rhs[i] + this->array_size,
-                  this->rhs[i+1]);
-}
-
-
-
-template <class rnumber>
-void slab_field_particles<rnumber>::AdamsBashforth(int nsteps)
-{
-    ptrdiff_t ii;
-    this->get_rhs(this->state, this->rhs[0]);
-    //if (myrank == 0)
-    //{
-    //    DEBUG_MSG(
-    //            "in AdamsBashforth for particles %s, integration_steps = %d, nsteps = %d, iteration = %d\n",
-    //            this->name,
-    //            this->integration_steps,
-    //            nsteps,
-    //            this->iteration);
-    //    std::stringstream tstring;
-    //    for (int p=0; p<this->nparticles; p++)
-    //        tstring << " " << this->computing[p];
-    //    DEBUG_MSG("%s\n", tstring.str().c_str());
-    //    for (int i=0; i<this->integration_steps; i++)
-    //    {
-    //        std::stringstream tstring;
-    //        for (int p=0; p<this->nparticles; p++)
-    //            tstring << " " << this->rhs[i][p*3];
-    //        DEBUG_MSG("%s\n", tstring.str().c_str());
-    //    }
-    //}
-    switch(nsteps)
-    {
-        case 1:
-            for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
-                for (int i=0; i<this->ncomponents; i++)
-                {
-                    ii = p*this->ncomponents+i;
-                    this->state[ii] += this->dt*this->rhs[0][ii];
-                }
-            break;
-        case 2:
-            for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
-                for (int i=0; i<this->ncomponents; i++)
-                {
-                    ii = p*this->ncomponents+i;
-                    this->state[ii] += this->dt*(3*this->rhs[0][ii]
-                                               -   this->rhs[1][ii])/2;
-                }
-            break;
-        case 3:
-            for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
-                for (int i=0; i<this->ncomponents; i++)
-                {
-                    ii = p*this->ncomponents+i;
-                    this->state[ii] += this->dt*(23*this->rhs[0][ii]
-                                               - 16*this->rhs[1][ii]
-                                               +  5*this->rhs[2][ii])/12;
-                }
-            break;
-        case 4:
-            for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
-                for (int i=0; i<this->ncomponents; i++)
-                {
-                    ii = p*this->ncomponents+i;
-                    this->state[ii] += this->dt*(55*this->rhs[0][ii]
-                                               - 59*this->rhs[1][ii]
-                                               + 37*this->rhs[2][ii]
-                                               -  9*this->rhs[3][ii])/24;
-                }
-            break;
-        case 5:
-            for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
-                for (int i=0; i<this->ncomponents; i++)
-                {
-                    ii = p*this->ncomponents+i;
-                    this->state[ii] += this->dt*(1901*this->rhs[0][ii]
-                                               - 2774*this->rhs[1][ii]
-                                               + 2616*this->rhs[2][ii]
-                                               - 1274*this->rhs[3][ii]
-                                               +  251*this->rhs[4][ii])/720;
-                }
-            break;
-        case 6:
-            for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
-                for (int i=0; i<this->ncomponents; i++)
-                {
-                    ii = p*this->ncomponents+i;
-                    this->state[ii] += this->dt*(4277*this->rhs[0][ii]
-                                               - 7923*this->rhs[1][ii]
-                                               + 9982*this->rhs[2][ii]
-                                               - 7298*this->rhs[3][ii]
-                                               + 2877*this->rhs[4][ii]
-                                               -  475*this->rhs[5][ii])/1440;
-                }
-            break;
-    }
-    this->roll_rhs();
-}
-
-
-template <class rnumber>
-void slab_field_particles<rnumber>::step()
-{
-    this->AdamsBashforth((this->iteration < this->integration_steps) ? this->iteration+1 : this->integration_steps);
-    //this->cRK4();
-    this->iteration++;
-    this->synchronize();
-}
-
-
-template <class rnumber>
-void slab_field_particles<rnumber>::Euler()
-{
-    double *y = fftw_interface<double>::alloc_real(this->array_size);
-    this->get_rhs(this->state, y);
-    for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
-    {
-        for (int i=0; i<this->ncomponents; i++)
-            this->state[p*this->ncomponents+i] += this->dt*y[p*this->ncomponents+i];
-        //DEBUG_MSG(
-        //        "particle %d state is %lg %lg %lg\n",
-        //        p, this->state[p*this->ncomponents], this->state[p*this->ncomponents+1], this->state[p*this->ncomponents+2]);
-    }
-    fftw_interface<double>::free(y);
-}
-
-
-template <class rnumber>
-void slab_field_particles<rnumber>::Heun()
-{
-    double *y = new double[this->array_size];
-    double dtfactor[] = {0.0, this->dt};
-    this->get_rhs(this->state, this->rhs[0]);
-    for (int p=0; p<this->nparticles; p++)
-    {
-        this->synchronize_single_particle_state(p, this->rhs[0]);
-        //int crank = this->get_rank(this->state[p*3 + 2]);
-        //DEBUG_MSG(
-        //        "k 0 iteration %d particle is %d, crank is %d, computing rank is %d, position is %g %g %g, rhs is %g %g %g\n",
-        //        this->iteration, p,
-        //        crank, this->computing[p],
-        //        this->state[p*3], this->state[p*3+1], this->state[p*3+2],
-        //        this->rhs[0][p*3], this->rhs[0][p*3+1], this->rhs[0][p*3+2]);
-    }
-    for (int kindex = 1; kindex < 2; kindex++)
-    {
-        for (int p=0; p<this->nparticles; p++)
-        {
-            if (this->watching[this->fs->rd->myrank*this->nparticles+p])
-                for (int i=0; i<this->ncomponents; i++)
-                {
-                    ptrdiff_t tindex = ptrdiff_t(p)*this->ncomponents + i;
-                    y[tindex] = this->state[tindex] + dtfactor[kindex]*this->rhs[kindex-1][tindex];
-                }
-        }
-        for (int p=0; p<this->nparticles; p++)
-            this->synchronize_single_particle_state(p, y);
-        this->get_rhs(y, this->rhs[kindex]);
-        for (int p=0; p<this->nparticles; p++)
-        {
-            this->synchronize_single_particle_state(p, this->rhs[kindex]);
-        DEBUG_MSG(
-                "k %d iteration %d particle is %d, position is %g %g %g, rhs is %g %g %g\n",
-                kindex, this->iteration, p,
-                y[p*3], y[p*3+1], y[p*3+2],
-                this->rhs[kindex][p*3], this->rhs[kindex][p*3+1], this->rhs[kindex][p*3+2]);
-        }
-    }
-    for (int p=0; p<this->nparticles; p++)
-    {
-        if (this->watching[this->fs->rd->myrank*this->nparticles+p])
-        {
-            for (int i=0; i<this->ncomponents; i++)
-            {
-                ptrdiff_t tindex = ptrdiff_t(p)*this->ncomponents + i;
-                this->state[tindex] += this->dt*(this->rhs[0][tindex] + this->rhs[1][tindex])/2;
-            }
-            //int crank = this->get_rank(this->state[p*3 + 2]);
-            //if (crank != this->computing[p])
-            //    DEBUG_MSG(
-            //            "k _ iteration %d particle is %d, crank is %d, computing rank is %d, position is %g %g %g\n",
-            //            this->iteration, p,
-            //            crank, this->computing[p],
-            //            this->state[p*3], this->state[p*3+1], this->state[p*3+2]);
-        }
-    }
-    delete[] y;
-    DEBUG_MSG("exiting Heun\n");
-}
-
-
-template <class rnumber>
-void slab_field_particles<rnumber>::cRK4()
-{
-    double *y = new double[this->array_size];
-    double dtfactor[] = {0.0, this->dt/2, this->dt/2, this->dt};
-    this->get_rhs(this->state, this->rhs[0]);
-    for (int p=0; p<this->nparticles; p++)
-        this->synchronize_single_particle_state(p, this->rhs[0]);
-    for (int kindex = 1; kindex < 4; kindex++)
-    {
-        for (int p=0; p<this->nparticles; p++)
-        {
-            if (this->watching[this->fs->rd->myrank*this->nparticles+p])
-                for (int i=0; i<this->ncomponents; i++)
-                {
-                    ptrdiff_t tindex = ptrdiff_t(p)*this->ncomponents + i;
-                    y[tindex] = this->state[tindex] + dtfactor[kindex]*this->rhs[kindex-1][tindex];
-                }
-        }
-        for (int p=0; p<this->nparticles; p++)
-            this->synchronize_single_particle_state(p, y);
-        this->get_rhs(y, this->rhs[kindex]);
-        for (int p=0; p<this->nparticles; p++)
-            this->synchronize_single_particle_state(p, this->rhs[kindex]);
-    }
-    for (int p=0; p<this->nparticles; p++)
-    {
-        if (this->watching[this->fs->rd->myrank*this->nparticles+p])
-            for (int i=0; i<this->ncomponents; i++)
-            {
-                ptrdiff_t tindex = ptrdiff_t(p)*this->ncomponents + i;
-                this->state[tindex] += this->dt*(this->rhs[0][tindex] +
-                                              2*(this->rhs[1][tindex] + this->rhs[2][tindex]) +
-                                                 this->rhs[3][tindex])/6;
-            }
-    }
-    delete[] y;
-}
-
-template <class rnumber>
-void slab_field_particles<rnumber>::get_grid_coordinates(double *x, int *xg, double *xx)
-{
-    static double grid_size[] = {this->dx, this->dy, this->dz};
-    double tval;
-    std::fill_n(xg, this->nparticles*3, 0);
-    std::fill_n(xx, this->nparticles*3, 0.0);
-    for (int p=0; p<this->nparticles; p++) if (this->watching[this->fs->rd->myrank*this->nparticles+p])
-    {
-        for (int c=0; c<3; c++)
-        {
-            tval = floor(x[p*this->ncomponents+c]/grid_size[c]);
-            xg[p*3+c] = MOD(int(tval), this->fs->rd->sizes[2-c]);
-            xx[p*3+c] = (x[p*this->ncomponents+c] - tval*grid_size[c]) / grid_size[c];
-        }
-        xg[p*3+2] -= this->fs->rd->starts[0];
-        if (this->fs->rd->myrank == this->fs->rd->rank[0] &&
-            xg[p*3+2] > this->fs->rd->subsizes[0])
-            xg[p*3+2] -= this->fs->rd->sizes[0];
-        //DEBUG_MSG(
-        //        "particle %d x is %lg %lg %lg xx is %lg %lg %lg xg is %d %d %d\n",
-        //        p,
-        //         x[p*3],  x[p*3+1],  x[p*3+2],
-        //        xx[p*3], xx[p*3+1], xx[p*3+2],
-        //        xg[p*3], xg[p*3+1], xg[p*3+2]);
-    }
-}
-
-template <class rnumber>
-void slab_field_particles<rnumber>::interpolation_formula(rnumber *field, int *xg, double *xx, double *dest, int *deriv)
-{
-    double bx[this->interp_neighbours*2+2], by[this->interp_neighbours*2+2], bz[this->interp_neighbours*2+2];
-    this->compute_beta(deriv[0], xx[0], bx);
-    this->compute_beta(deriv[1], xx[1], by);
-    this->compute_beta(deriv[2], xx[2], bz);
-    //DEBUG_MSG("computed beta polynomials\n");
-    std::fill_n(dest, 3, 0);
-    for (int iz = -this->interp_neighbours; iz <= this->interp_neighbours+1; iz++)
-    for (int iy = -this->interp_neighbours; iy <= this->interp_neighbours+1; iy++)
-    for (int ix = -this->interp_neighbours; ix <= this->interp_neighbours+1; ix++)
-        for (int c=0; c<3; c++)
-        {
-            //DEBUG_MSG(
-            //        "%d %d %d %d %d %d %d %ld %ld\n",
-            //        xg[2], xg[1], xg[0], iz, iy, ix, c,
-            //        ((ptrdiff_t(xg[2]+iz) *this->fs->rd->subsizes[1] +
-            //          ptrdiff_t(xg[1]+iy))*this->fs->rd->subsizes[2] +
-            //          ptrdiff_t(xg[0]+ix))*3+c,
-            //        this->buffered_field_descriptor->local_size
-            //        );
-            dest[c] += field[((ptrdiff_t(    xg[2]+iz                         ) *this->fs->rd->subsizes[1] +
-                               ptrdiff_t(MOD(xg[1]+iy, this->fs->rd->sizes[1])))*this->fs->rd->subsizes[2] +
-                               ptrdiff_t(MOD(xg[0]+ix, this->fs->rd->sizes[2])))*3+c]*(bz[iz+this->interp_neighbours]*
-                                                                                       by[iy+this->interp_neighbours]*
-                                                                                       bx[ix+this->interp_neighbours]);
-        }
-}
-
-template <class rnumber>
-void slab_field_particles<rnumber>::linear_interpolation(rnumber *field, int *xg, double *xx, double *dest, int *deriv)
-{
-    //ptrdiff_t tindex, tmp;
-    //tindex = ((ptrdiff_t(xg[2]  )*this->fs->rd->subsizes[1]+xg[1]  )*this->fs->rd->subsizes[2]+xg[0]  )*3;
-    //tmp = ptrdiff_t(xg[2]);
-    //DEBUG_MSG(
-    //        "linear interpolation xx is %lg %lg %lg xg is %d %d %d,"
-    //        " corner index is ((%ld*%d+%d)*%d+%d)*3 = %ld\n",
-    //        xx[0], xx[1], xx[2],
-    //        xg[0], xg[1], xg[2],
-    //        tmp, this->fs->rd->subsizes[1], xg[1], this->fs->rd->subsizes[2], xg[0],
-    //        tindex);
-    for (int c=0; c<3; c++)
-        dest[c] = (field[((ptrdiff_t(xg[2]  )*this->fs->rd->subsizes[1]+xg[1]  )*this->fs->rd->subsizes[2]+xg[0]  )*3+c]*((1-xx[0])*(1-xx[1])*(1-xx[2])) +
-                   field[((ptrdiff_t(xg[2]  )*this->fs->rd->subsizes[1]+xg[1]  )*this->fs->rd->subsizes[2]+xg[0]+1)*3+c]*((  xx[0])*(1-xx[1])*(1-xx[2])) +
-                   field[((ptrdiff_t(xg[2]  )*this->fs->rd->subsizes[1]+xg[1]+1)*this->fs->rd->subsizes[2]+xg[0]  )*3+c]*((1-xx[0])*(  xx[1])*(1-xx[2])) +
-                   field[((ptrdiff_t(xg[2]  )*this->fs->rd->subsizes[1]+xg[1]+1)*this->fs->rd->subsizes[2]+xg[0]+1)*3+c]*((  xx[0])*(  xx[1])*(1-xx[2])) +
-                   field[((ptrdiff_t(xg[2]+1)*this->fs->rd->subsizes[1]+xg[1]  )*this->fs->rd->subsizes[2]+xg[0]  )*3+c]*((1-xx[0])*(1-xx[1])*(  xx[2])) +
-                   field[((ptrdiff_t(xg[2]+1)*this->fs->rd->subsizes[1]+xg[1]  )*this->fs->rd->subsizes[2]+xg[0]+1)*3+c]*((  xx[0])*(1-xx[1])*(  xx[2])) +
-                   field[((ptrdiff_t(xg[2]+1)*this->fs->rd->subsizes[1]+xg[1]+1)*this->fs->rd->subsizes[2]+xg[0]  )*3+c]*((1-xx[0])*(  xx[1])*(  xx[2])) +
-                   field[((ptrdiff_t(xg[2]+1)*this->fs->rd->subsizes[1]+xg[1]+1)*this->fs->rd->subsizes[2]+xg[0]+1)*3+c]*((  xx[0])*(  xx[1])*(  xx[2])));
-}
-
-template <class rnumber>
-void slab_field_particles<rnumber>::read(hid_t data_file_id)
-{
-    //DEBUG_MSG("aloha\n");
-    if (this->fs->rd->myrank == 0)
-    {
-        std::string temp_string = (std::string("/particles/") +
-                                   std::string(this->name) +
-                                   std::string("/state"));
-        hid_t Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT);
-        hid_t mspace, rspace;
-        hsize_t count[4], offset[4];
-        rspace = H5Dget_space(Cdset);
-        H5Sget_simple_extent_dims(rspace, count, NULL);
-        count[0] = 1;
-        offset[0] = this->iteration / this->traj_skip;
-        offset[1] = 0;
-        offset[2] = 0;
-        mspace = H5Screate_simple(3, count, NULL);
-        H5Sselect_hyperslab(rspace, H5S_SELECT_SET, offset, NULL, count, NULL);
-        H5Dread(Cdset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, this->state);
-        H5Sclose(mspace);
-        H5Sclose(rspace);
-        H5Dclose(Cdset);
-        if (this->iteration > 0)
-        {
-            temp_string = (std::string("/particles/") +
-                           std::string(this->name) +
-                           std::string("/rhs"));
-            Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT);
-            rspace = H5Dget_space(Cdset);
-            H5Sget_simple_extent_dims(rspace, count, NULL);
-            //reading from last available position
-            offset[0] = count[0] - 1;
-            offset[3] = 0;
-            count[0] = 1;
-            count[1] = 1;
-            mspace = H5Screate_simple(4, count, NULL);
-            for (int i=0; i<this->integration_steps; i++)
-            {
-                offset[1] = i;
-                H5Sselect_hyperslab(rspace, H5S_SELECT_SET, offset, NULL, count, NULL);
-                H5Dread(Cdset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, this->rhs[i]);
-            }
-            H5Sclose(mspace);
-            H5Sclose(rspace);
-            H5Dclose(Cdset);
-        }
-    }
-    MPI_Bcast(
-            this->state,
-            this->array_size,
-            MPI_DOUBLE,
-            0,
-            this->fs->rd->comm);
-    for (int i = 0; i<this->integration_steps; i++)
-    {
-        MPI_Bcast(
-                this->rhs[i],
-                this->array_size,
-                MPI_DOUBLE,
-                0,
-                this->fs->rd->comm);
-    }
-    // initial assignment of particles
-    for (int p=0; p<this->nparticles; p++)
-    {
-        this->computing[p] = this->get_rank(this->state[p*this->ncomponents + 2]);
-        //DEBUG_MSG("reading particles, particle %d computing is %d\n", p, this->computing[p]);
-    }
-    // now actual synchronization
-    this->synchronize();
-}
-
-template <class rnumber>
-void slab_field_particles<rnumber>::write(hid_t data_file_id, bool write_rhs)
-{
-    if (this->fs->rd->myrank == 0)
-    {
-        std::string temp_string = (std::string("/particles/") +
-                                   std::string(this->name) +
-                                   std::string("/state"));
-        hid_t Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT);
-        hid_t mspace, wspace;
-        hsize_t count[4], offset[4];
-        wspace = H5Dget_space(Cdset);
-        H5Sget_simple_extent_dims(wspace, count, NULL);
-        count[0] = 1;
-        offset[0] = this->iteration / this->traj_skip;
-        offset[1] = 0;
-        offset[2] = 0;
-        mspace = H5Screate_simple(3, count, NULL);
-        H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
-        H5Dwrite(Cdset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, this->state);
-        H5Sclose(mspace);
-        H5Sclose(wspace);
-        H5Dclose(Cdset);
-        if (write_rhs)
-        {
-            temp_string = (std::string("/particles/") +
-                           std::string(this->name) +
-                           std::string("/rhs"));
-            Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT);
-            wspace = H5Dget_space(Cdset);
-            H5Sget_simple_extent_dims(wspace, count, NULL);
-            //writing to last available position
-            offset[0] = count[0] - 1;
-            count[0] = 1;
-            count[1] = 1;
-            offset[3] = 0;
-            mspace = H5Screate_simple(4, count, NULL);
-            for (int i=0; i<this->integration_steps; i++)
-            {
-                offset[1] = i;
-                H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
-                H5Dwrite(Cdset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, this->rhs[i]);
-            }
-            H5Sclose(mspace);
-            H5Sclose(wspace);
-            H5Dclose(Cdset);
-        }
-    }
-}
-
-
-
-/*****************************************************************************/
-/* macro for specializations to numeric types compatible with FFTW           */
-#define SLAB_FIELD_PARTICLES_DEFINITIONS(FFTW, R, MPI_RNUM) \
- \
-template <> \
-void slab_field_particles<R>::rFFTW_to_buffered(R *src, R *dst) \
-{ \
-    /* do big copy of middle stuff */ \
-    std::copy(src, \
-              src + this->fs->rd->local_size, \
-              dst + this->buffer_size); \
-    int rsrc; \
-    /* get upper slices */ \
-    for (int rdst = 0; rdst < this->fs->rd->nprocs; rdst++) \
-    { \
-        rsrc = this->fs->rd->rank[(this->fs->rd->all_start0[rdst] + \
-                                   this->fs->rd->all_size0[rdst]) % \
-                                   this->fs->rd->sizes[0]]; \
-        if (this->fs->rd->myrank == rsrc) \
-            MPI_Send( \
-                    (void*)(src), \
-                    this->buffer_size, \
-                    MPI_RNUM, \
-                    rdst, \
-                    2*(rsrc*this->fs->rd->nprocs + rdst), \
-                    this->fs->rd->comm); \
-        if (this->fs->rd->myrank == rdst) \
-            MPI_Recv( \
-                    (void*)(dst + this->buffer_size + this->fs->rd->local_size), \
-                    this->buffer_size, \
-                    MPI_RNUM, \
-                    rsrc, \
-                    2*(rsrc*this->fs->rd->nprocs + rdst), \
-                    this->fs->rd->comm, \
-                    MPI_STATUS_IGNORE); \
-    } \
-    /* get lower slices */ \
-    for (int rdst = 0; rdst < this->fs->rd->nprocs; rdst++) \
-    { \
-        rsrc = this->fs->rd->rank[MOD(this->fs->rd->all_start0[rdst] - 1, \
-                                      this->fs->rd->sizes[0])]; \
-        if (this->fs->rd->myrank == rsrc) \
-            MPI_Send( \
-                    (void*)(src + this->fs->rd->local_size - this->buffer_size), \
-                    this->buffer_size, \
-                    MPI_RNUM, \
-                    rdst, \
-                    2*(rsrc*this->fs->rd->nprocs + rdst)+1, \
-                    this->fs->rd->comm); \
-        if (this->fs->rd->myrank == rdst) \
-            MPI_Recv( \
-                    (void*)(dst), \
-                    this->buffer_size, \
-                    MPI_RNUM, \
-                    rsrc, \
-                    2*(rsrc*this->fs->rd->nprocs + rdst)+1, \
-                    this->fs->rd->comm, \
-                    MPI_STATUS_IGNORE); \
-    } \
-} \
-/*****************************************************************************/
-
-
-
-/*****************************************************************************/
-/* now actually use the macro defined above                                  */
-SLAB_FIELD_PARTICLES_DEFINITIONS(
-        FFTW_MANGLE_FLOAT,
-        float,
-        MPI_FLOAT)
-SLAB_FIELD_PARTICLES_DEFINITIONS(
-        FFTW_MANGLE_DOUBLE,
-        double,
-        MPI_DOUBLE)
-/*****************************************************************************/
-
-
-
-/*****************************************************************************/
-/* finally, force generation of code for single precision                    */
-template class slab_field_particles<float>;
-template class slab_field_particles<double>;
-/*****************************************************************************/
diff --git a/bfps/cpp/slab_field_particles.hpp b/bfps/cpp/slab_field_particles.hpp
deleted file mode 100644
index 15f9477bbfb680be17390447ce88bc40cd7471e2..0000000000000000000000000000000000000000
--- a/bfps/cpp/slab_field_particles.hpp
+++ /dev/null
@@ -1,149 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <iostream>
-#include <hdf5.h>
-#include "base.hpp"
-#include "fluid_solver_base.hpp"
-#include "interpolator.hpp"
-
-#ifndef SLAB_FIELD_PARTICLES
-
-#define SLAB_FIELD_PARTICLES
-
-extern int myrank, nprocs;
-
-template <class rnumber>
-class slab_field_particles
-{
-    protected:
-        //typedef void (slab_field_particles<rnumber>::*tensor_product_interpolation_formula)(
-        //        rnumber *field,
-        //        int *xg,
-        //        double *xx,
-        //        double *dest,
-        //        int *deriv);
-    public:
-        fluid_solver_base<rnumber> *fs;
-        field_descriptor<rnumber> *buffered_field_descriptor;
-
-        /* watching is an array of shape [nparticles], with
-         * watching[p] being true if particle p is in the domain of myrank
-         * or in the buffer regions.
-         * watching is not really being used right now, since I don't do partial
-         * synchronizations of particles.
-         * we may do this at some point in the future, if it seems needed...
-         * */
-        bool *watching;
-        /* computing is an array of shape [nparticles], with
-         * computing[p] being the rank that is currently working on particle p
-         * */
-        int *computing;
-
-        /* state will generally hold all the information about the particles.
-         * in the beginning, we will only need to solve 3D ODEs, but I figured
-         * a general ncomponents is better, since we may change our minds.
-         * */
-        double *state;
-        double *rhs[6];
-        int nparticles;
-        int ncomponents;
-        int array_size;
-        int interp_neighbours;
-        int buffer_width;
-        int integration_steps;
-        int traj_skip;
-        ptrdiff_t buffer_size;
-        double *lbound;
-        double *ubound;
-        //tensor_product_interpolation_formula spline_formula;
-        base_polynomial_values compute_beta;
-
-        /* simulation parameters */
-        char name[256];
-        int iteration;
-        double dt;
-
-        /* physical parameters of field */
-        rnumber dx, dy, dz;
-
-        /* methods */
-
-        /* constructor and destructor.
-         * allocate and deallocate:
-         *  this->state
-         *  this->lbound
-         *  this->ubound
-         *  this->watching
-         * */
-        slab_field_particles(
-                const char *NAME,
-                fluid_solver_base<rnumber> *FSOLVER,
-                const int NPARTICLES,
-                const int NCOMPONENTS,
-                base_polynomial_values BETA_POLYS,
-                const int INTERP_NEIGHBOURS,
-                const int TRAJ_SKIP,
-                const int INTEGRATION_STEPS = 2);
-        ~slab_field_particles();
-
-        /* an Euler step is needed to compute an estimate of future positions,
-         * which is needed for synchronization.
-         * */
-        virtual void jump_estimate(double *jump_length);
-        /* function get_rhs is virtual since we want children to do different things,
-         * depending on the type of particle.
-         * */
-        virtual void get_rhs(double *x, double *rhs);
-
-        /* generic methods, should work for all children of this class */
-        int get_rank(double z); // get rank for given value of z
-        void synchronize();
-        void synchronize_single_particle_state(int p, double *x, int source_id = -1);
-        void get_grid_coordinates(double *x, int *xg, double *xx);
-        void linear_interpolation(rnumber *field, int *xg, double *xx, double *dest, int *deriv);
-        void interpolation_formula(rnumber *field, int *xg, double *xx, double *dest, int *deriv);
-
-        void rFFTW_to_buffered(rnumber *src, rnumber *dst);
-
-        /* generic methods, should work for all children of this class */
-        void read(hid_t data_file_id);
-        void write(hid_t data_file_id, bool write_rhs = true);
-
-        /* solver stuff */
-        void step();
-        void roll_rhs();
-        void AdamsBashforth(int nsteps);
-        void Euler();
-        void Heun();
-        void cRK4();
-};
-
-
-#endif//SLAB_FIELD_PARTICLES
-
diff --git a/bfps/cpp/tracers.cpp b/bfps/cpp/tracers.cpp
deleted file mode 100644
index 3d9fbfb6a1e357d70452466b6cc901659444539d..0000000000000000000000000000000000000000
--- a/bfps/cpp/tracers.cpp
+++ /dev/null
@@ -1,204 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#define NDEBUG
-
-
-#include <cmath>
-#include "base.hpp"
-#include "fftw_tools.hpp"
-#include "tracers.hpp"
-
-template <class rnumber>
-void tracers<rnumber>::jump_estimate(double *jump)
-{
-    int deriv[] = {0, 0, 0};
-    int *xg = new int[this->array_size];
-    double *xx = new double[this->array_size];
-    rnumber *vel = this->data + this->buffer_size;
-    double tmp[3];
-    /* get grid coordinates */
-    this->get_grid_coordinates(this->state, xg, xx);
-
-    /* perform interpolation */
-    for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
-    {
-        this->interpolation_formula(vel, xg + p*3, xx + p*3, tmp, deriv);
-        jump[p] = fabs(3*this->dt * tmp[2]);
-        if (jump[p] < this->dz*1.01)
-            jump[p] = this->dz*1.01;
-    }
-    delete[] xg;
-    delete[] xx;
-}
-
-template <class rnumber>
-void tracers<rnumber>::get_rhs(double *x, double *y)
-{
-    std::fill_n(y, this->array_size, 0.0);
-    int deriv[] = {0, 0, 0};
-    /* get grid coordinates */
-    int *xg = new int[this->array_size];
-    double *xx = new double[this->array_size];
-    rnumber *vel = this->data + this->buffer_size;
-    this->get_grid_coordinates(x, xg, xx);
-    //DEBUG_MSG(
-    //        "position is %g %g %g, grid_coords are %d %d %d %g %g %g\n",
-    //        x[0], x[1], x[2],
-    //        xg[0], xg[1], xg[2],
-    //        xx[0], xx[1], xx[2]);
-    /* perform interpolation */
-    for (int p=0; p<this->nparticles; p++)
-    {
-        if (this->watching[this->fs->rd->myrank*this->nparticles+p])
-        {
-            int crank = this->get_rank(x[p*3 + 2]);
-            if (this->fs->rd->myrank == crank)
-            {
-                this->interpolation_formula(vel, xg + p*3, xx + p*3, y + p*3, deriv);
-            DEBUG_MSG(
-                    "position is %g %g %g %d %d %d %g %g %g, result is %g %g %g\n",
-                    x[p*3], x[p*3+1], x[p*3+2],
-                    xg[p*3], xg[p*3+1], xg[p*3+2],
-                    xx[p*3], xx[p*3+1], xx[p*3+2],
-                    y[p*3], y[p*3+1], y[p*3+2]);
-            }
-            if (crank != this->computing[p])
-            {
-                this->synchronize_single_particle_state(p, y, crank);
-            }
-            //DEBUG_MSG(
-            //        "after synch crank is %d, computing rank is %d, position is %g %g %g, result is %g %g %g\n",
-            //        this->iteration, p,
-            //        crank, this->computing[p],
-            //        x[p*3], x[p*3+1], x[p*3+2],
-            //        y[p*3], y[p*3+1], y[p*3+2]);
-        }
-    }
-    delete[] xg;
-    delete[] xx;
-}
-
-template<class rnumber>
-void tracers<rnumber>::update_field(bool clip_on)
-{
-    if (clip_on)
-        clip_zero_padding<rnumber>(this->fs->rd, this->source_data, 3);
-    this->rFFTW_to_buffered(this->source_data, this->data);
-}
-
-/*****************************************************************************/
-/* macro for specializations to numeric types compatible with FFTW           */
-
-#define TRACERS_DEFINITIONS(FFTW, R, MPI_RNUM, MPI_CNUM) \
- \
-template <> \
-tracers<R>::tracers( \
-                const char *NAME, \
-                fluid_solver_base<R> *FSOLVER, \
-                const int NPARTICLES, \
-                base_polynomial_values BETA_POLYS, \
-                const int NEIGHBOURS, \
-                const int TRAJ_SKIP, \
-                const int INTEGRATION_STEPS, \
-                R *SOURCE_DATA) : slab_field_particles<R>( \
-                    NAME, \
-                    FSOLVER, \
-                    NPARTICLES, \
-                    3, \
-                    BETA_POLYS, \
-                    NEIGHBOURS, \
-                    TRAJ_SKIP, \
-                    INTEGRATION_STEPS) \
-{ \
-    this->source_data = SOURCE_DATA; \
-    this->data = FFTW(alloc_real)(this->buffered_field_descriptor->local_size); \
-} \
- \
-template<> \
-tracers<R>::~tracers() \
-{ \
-    FFTW(free)(this->data); \
-} \
- \
-template <> \
-void tracers<R>::sample_vec_field(R *vec_field, double *vec_values) \
-{ \
-    vec_field += this->buffer_size; \
-    double *vec_local =  new double[this->array_size]; \
-    std::fill_n(vec_local, this->array_size, 0.0); \
-    int deriv[] = {0, 0, 0}; \
-    /* get grid coordinates */ \
-    int *xg = new int[this->array_size]; \
-    double *xx = new double[this->array_size]; \
-    this->get_grid_coordinates(this->state, xg, xx); \
-    /* perform interpolation */ \
-    for (int p=0; p<this->nparticles; p++) \
-        if (this->fs->rd->myrank == this->computing[p]) \
-            this->interpolation_formula( \
-                    vec_field, \
-                    xg + p*3, \
-                    xx + p*3, \
-                    vec_local + p*3, \
-                    deriv); \
-    MPI_Allreduce( \
-            vec_local, \
-            vec_values, \
-            this->array_size, \
-            MPI_DOUBLE, \
-            MPI_SUM, \
-            this->fs->rd->comm); \
-    delete[] xg; \
-    delete[] xx; \
-    delete[] vec_local; \
-} \
-
-/*****************************************************************************/
-
-
-
-/*****************************************************************************/
-/* now actually use the macro defined above                                  */
-TRACERS_DEFINITIONS(
-        FFTW_MANGLE_FLOAT,
-        float,
-        MPI_FLOAT,
-        MPI_COMPLEX)
-TRACERS_DEFINITIONS(
-        FFTW_MANGLE_DOUBLE,
-        double,
-        MPI_DOUBLE,
-        BFPS_MPICXX_DOUBLE_COMPLEX)
-/*****************************************************************************/
-
-
-
-/*****************************************************************************/
-/* finally, force generation of code                                         */
-template class tracers<float>;
-template class tracers<double>;
-/*****************************************************************************/
-
diff --git a/bfps/cpp/tracers.hpp b/bfps/cpp/tracers.hpp
deleted file mode 100644
index 1a063e026578dd71b9a223ee46b55d2c86d4399f..0000000000000000000000000000000000000000
--- a/bfps/cpp/tracers.hpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#include "slab_field_particles.hpp"
-
-#ifndef TRACERS
-
-#define TRACERS
-
-extern int myrank, nprocs;
-
-template <class rnumber>
-class tracers final:public slab_field_particles<rnumber>
-{
-    public:
-        rnumber *source_data;
-        rnumber *data;
-
-        /* methods */
-        tracers(
-                const char *NAME,
-                fluid_solver_base<rnumber> *FSOLVER,
-                const int NPARTICLES,
-                base_polynomial_values BETA_POLYS,
-                const int NEIGHBOURS,
-                const int TRAJ_SKIP,
-                const int INTEGRATION_STEPS,
-                rnumber *SOURCE_DATA);
-        ~tracers();
-
-        void update_field(bool clip_on = true);
-        virtual void get_rhs(double *x, double *rhs);
-        virtual void jump_estimate(double *jump_length);
-
-        void sample_vec_field(rnumber *vec_field, double *vec_values);
-};
-
-
-#endif//TRACERS
-
diff --git a/cpp_build.py b/cpp_build.py
index a312191aadd3c54d1f5461823e4d39fe54355e79..39371214bab4e2ac3ee9f5b064f5532811448765 100644
--- a/cpp_build.py
+++ b/cpp_build.py
@@ -65,6 +65,15 @@ src_file_list = ['hdf5_tools',
                  'Lagrange_polys',
                  'scope_timer']
 
+def get_file_dependency_list(src_file):
+    p = subprocess.Popen(
+            ['g++', '-Ibfps/cpp', '-MM', 'bfps/cpp/' + src_file + '.cpp'],
+            stdout = subprocess.PIPE)
+    out, err = p.communicate()
+    p.terminate()
+    deps = str(out, 'ASCII').replace('\\\n', '')
+    return deps
+
 def get_dependency_list():
     ofile = open('dependencies.txt', 'w')
     for src_file in src_file_list:
diff --git a/setup.py b/setup.py
index b427ebe77ab86ec8be96b3a751a97600c629df53..3094c692942de2191e3a924af12aad0e48f584f1 100644
--- a/setup.py
+++ b/setup.py
@@ -123,8 +123,6 @@ src_file_list = [
                  'spline_n10',
                  'Lagrange_polys',
                  'scope_timer',
-                 'interpolator',
-                 'interpolator_base',
                  'full_code/test_interpolation',
                  'full_code/NSVEparticles',
                  'full_code/NSVEcomplex_particles',
@@ -215,13 +213,10 @@ class CompileLibCommand(distutils.cmd.Command):
         if not os.path.isfile('bfps/libbfps.a'):
             need_to_compile = True
         else:
+            need_to_compile = False
             ofile = 'bfps/libbfps.a'
             libtime = datetime.datetime.fromtimestamp(os.path.getctime(ofile))
             latest = libtime
-            for fname in header_list:
-                latest = max(latest,
-                             datetime.datetime.fromtimestamp(os.path.getctime('bfps/' + fname)))
-            need_to_compile = (latest > libtime)
         eca = extra_compile_args
         eca += ['-fPIC']
         if self.timing_output:
@@ -238,9 +233,14 @@ class CompileLibCommand(distutils.cmd.Command):
             if not os.path.exists(ofile):
                 need_to_compile_file = True
             else:
-                need_to_compile_file = (need_to_compile or
-                                        (datetime.datetime.fromtimestamp(os.path.getctime(ofile)) <
-                                         datetime.datetime.fromtimestamp(os.path.getctime(ifile))))
+                need_to_compile_file = False
+                if not need_to_compile:
+                    latest = libtime
+                    dependency_list = get_file_dependency_list(fname)
+                    for depname in dependency_list.split()[1:]:
+                        latest = max(latest,
+                                     datetime.datetime.fromtimestamp(os.path.getctime(depname)))
+                need_to_compile_file = (latest > libtime)
             if need_to_compile_file:
                 command_strings = [compiler, '-c']
                 command_strings += ['bfps/cpp/' + fname + '.cpp']
@@ -269,6 +269,15 @@ class CompileLibCommand(distutils.cmd.Command):
                 protocol = 2)
         return None
 
+def get_file_dependency_list(src_file):
+    p = subprocess.Popen(
+            ['g++', '-std=c++11', '-Ibfps/cpp', '-MM', 'bfps/cpp/' + src_file + '.cpp'],
+            stdout = subprocess.PIPE)
+    out, err = p.communicate()
+    p.terminate()
+    deps = str(out, 'ASCII').replace('\\\n', '')
+    return deps
+
 from setuptools import setup
 
 setup(
diff --git a/tests/base.py b/tests/base.py
index 6f110716cfc01f560549093247f39d038b48a92b..542679733757b5213193f3b7f6ad02cda7e0617b 100644
--- a/tests/base.py
+++ b/tests/base.py
@@ -33,7 +33,6 @@ import numpy as np
 import matplotlib.pyplot as plt
 
 import bfps
-from bfps import FluidResize
 from bfps.tools import particle_finite_diff_test as acceleration_test
 
 import argparse
@@ -92,33 +91,13 @@ parser.add_argument(
         dest = 'kMeta',
         default = 2.0)
 
-def double(opt):
-    old_simname = 'N{0:0>3x}'.format(opt.n)
-    new_simname = 'N{0:0>3x}'.format(opt.n*2)
-    c = FluidResize(fluid_precision = opt.precision)
-    c.launch(
-            args = ['--simname', old_simname + '_double',
-                    '--wd', opt.work_dir,
-                    '--nx', '{0}'.format(opt.n),
-                    '--ny', '{0}'.format(opt.n),
-                    '--nz', '{0}'.format(opt.n),
-                    '--dst_nx', '{0}'.format(2*opt.n),
-                    '--dst_ny', '{0}'.format(2*opt.n),
-                    '--dst_nz', '{0}'.format(2*opt.n),
-                    '--dst_simname', new_simname,
-                    '--src_simname', old_simname,
-                    '--src_iteration', '0',
-                    '--src_wd', './',
-                    '--niter_todo', '0'])
-    return None
-
 def launch(
         opt,
         nu = None,
         dt = None,
         tracer_state_file = None,
         vorticity_field = None,
-        code_class = bfps.NavierStokes,
+        code_class = bfps.DNS,
         particle_class = 'particles',
         interpolator_class = 'rFFTW_interpolator'):
     c = code_class(
diff --git a/tests/test_plain.py b/tests/test_plain.py
deleted file mode 100644
index ad30224f869fc724758cc95d8b9e10da7b4ca2d4..0000000000000000000000000000000000000000
--- a/tests/test_plain.py
+++ /dev/null
@@ -1,156 +0,0 @@
-#! /usr/bin/env python3
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
-
-
-
-#from base import *
-import bfps
-from bfps.tools import particle_finite_diff_test as acceleration_test
-import sys
-
-import numpy as np
-import matplotlib.pyplot as plt
-
-#parser.add_argument('--multiplejob',
-#        dest = 'multiplejob', action = 'store_true')
-#
-#parser.add_argument(
-#        '--particle-class',
-#        default = 'particles',
-#        dest = 'particle_class',
-#        type = str)
-#
-#parser.add_argument(
-#        '--interpolator-class',
-#        default = 'interpolator',
-#        dest = 'interpolator_class',
-#        type = str)
-
-class NSPlain(bfps.NavierStokes):
-    def specific_parser_arguments(
-            self,
-            parser):
-        bfps.NavierStokes.specific_parser_arguments(self, parser)
-        parser.add_argument(
-                '--particle-class',
-                default = 'rFFTW_distributed_particles',
-                dest = 'particle_class',
-                type = str)
-        parser.add_argument(
-                '--interpolator-class',
-                default = 'rFFTW_interpolator',
-                dest = 'interpolator_class',
-                type = str)
-        parser.add_argument('--neighbours',
-                type = int,
-                dest = 'neighbours',
-                default = 3)
-        parser.add_argument('--smoothness',
-                type = int,
-                dest = 'smoothness',
-                default = 2)
-        return None
-    def launch(
-            self,
-            args = [],
-            **kwargs):
-        opt = self.prepare_launch(args = args)
-        self.fill_up_fluid_code()
-        if type(opt.nparticles) == int:
-            if opt.nparticles > 0:
-                self.add_3D_rFFTW_field(
-                        name = 'rFFTW_acc')
-                self.add_interpolator(
-                        name = 'spline',
-                        neighbours = opt.neighbours,
-                        smoothness = opt.smoothness,
-                        class_name =  opt.interpolator_class)
-                self.add_particles(
-                        kcut = ['fs->kM/2', 'fs->kM/3'],
-                        integration_steps = 3,
-                        interpolator = 'spline',
-                        class_name = opt.particle_class)
-                self.add_particles(
-                        integration_steps = [2, 3, 4, 6],
-                        interpolator = 'spline',
-                        acc_name = 'rFFTW_acc',
-                        class_name = opt.particle_class)
-        self.finalize_code()
-        self.launch_jobs(opt = opt)
-        return None
-
-def plain(args):
-    wd = opt.work_dir
-    opt.work_dir = wd + '/N{0:0>3x}_1'.format(opt.n)
-    c0 = launch(opt, dt = 0.2/opt.n,
-            particle_class = opt.particle_class,
-            interpolator_class = opt.interpolator_class)
-    c0.compute_statistics()
-    print ('Re = {0:.0f}'.format(c0.statistics['Re']))
-    print ('Rlambda = {0:.0f}'.format(c0.statistics['Rlambda']))
-    print ('Lint = {0:.4e}, etaK = {1:.4e}'.format(c0.statistics['Lint'], c0.statistics['etaK']))
-    print ('Tint = {0:.4e}, tauK = {1:.4e}'.format(c0.statistics['Tint'], c0.statistics['tauK']))
-    print ('kMetaK = {0:.4e}'.format(c0.statistics['kMeta']))
-    for s in range(c0.particle_species):
-        acceleration_test(c0, species = s, m = 1)
-    if not opt.multiplejob:
-        return None
-    assert(opt.niter_todo % 3 == 0)
-    opt.work_dir = wd + '/N{0:0>3x}_2'.format(opt.n)
-    opt.njobs *= 2
-    opt.niter_todo = opt.niter_todo//2
-    c1 = launch(opt, dt = c0.parameters['dt'],
-            particle_class = opt.particle_class,
-            interpolator_class = opt.interpolator_class)
-    c1.compute_statistics()
-    opt.work_dir = wd + '/N{0:0>3x}_3'.format(opt.n)
-    opt.njobs = 3*opt.njobs//2
-    opt.niter_todo = 2*opt.niter_todo//3
-    c2 = launch(opt, dt = c0.parameters['dt'],
-            particle_class = opt.particle_class,
-            interpolator_class = opt.interpolator_class)
-    c2.compute_statistics()
-    compare_stats(opt, c0, c1)
-    compare_stats(opt, c0, c2)
-    return None
-
-if __name__ == '__main__':
-    c0 = NSPlain()
-    c0.launch(
-            ['-n', '32',
-             '--ncpu', '4',
-             '--nparticles', '1000',
-             '--niter_todo', '48',
-             '--wd', 'data/single'] +
-            sys.argv[1:])
-    c0.compute_statistics()
-    print ('Re = {0:.0f}'.format(c0.statistics['Re']))
-    print ('Rlambda = {0:.0f}'.format(c0.statistics['Rlambda']))
-    print ('Lint = {0:.4e}, etaK = {1:.4e}'.format(c0.statistics['Lint'], c0.statistics['etaK']))
-    print ('Tint = {0:.4e}, tauK = {1:.4e}'.format(c0.statistics['Tint'], c0.statistics['tauK']))
-    print ('kMetaK = {0:.4e}'.format(c0.statistics['kMeta']))
-    for s in range(c0.particle_species):
-        acceleration_test(c0, species = s, m = 1)
-
diff --git a/tests/test_vorticity_equation.py b/tests/test_vorticity_equation.py
index dfaccb8bf352bdd252e5edf29f6e7d711689f7dc..e492bfa5c75d0f2f2b9989cccef49964b8bc90b4 100644
--- a/tests/test_vorticity_equation.py
+++ b/tests/test_vorticity_equation.py
@@ -273,12 +273,13 @@ def main():
         particle_initial_condition[..., 2] = yvals[None, :, None]
         particle_initial_condition = particle_initial_condition.reshape(-1, 3)
         nparticles = nparticles**2
-    c = bfps.NavierStokes(simname = 'fluid_solver')
+    c = bfps.DNS(simname = 'fluid_solver')
     if run_NS:
         run_NSVE = True
         subprocess.call('rm *fluid_solver* NavierStokes*', shell = True)
         c.launch(
-                ['-n', '32',
+                ['NSVE',
+                 '-n', '32',
                  '--simname', 'fluid_solver',
                  '--ncpu', '4',
                  '--niter_todo', '{0}'.format(niterations),
@@ -298,9 +299,10 @@ def main():
         f = h5py.File('vorticity_equation_checkpoint_0.h5', 'w')
         f['vorticity/complex/0'] = data
         f.close()
-        c = bfps.NSVorticityEquation()
+        c = bfps.DNS()
         c.launch(
-                ['-n', '32',
+                ['NSVEparticles',
+                 '-n', '32',
                  '--simname', 'vorticity_equation',
                  '--np', '4',
                  '--ntpp', '1',