diff --git a/.gitignore b/.gitignore
index 6525641d66f2c1e7fae907b8596b55992777da60..3717951f2c8409fbe95675bc9975a689154f8c77 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 .ipynb_checkpoints/*
+meta/.ipynb_checkpoints/*
 .*.swp
 *.pyc
 .tox
diff --git a/README.rst b/README.rst
index 61f65073c430175c4cac01619423a52c2be60d50..4284a71649448531e6fd7e6aae951ec5b3725ca2 100644
--- a/README.rst
+++ b/README.rst
@@ -1,3 +1,4 @@
+================================
 Big Fluid and Particle Simulator
 ================================
 
@@ -12,26 +13,98 @@ in the repository), and it is working as expected. Parameters and
 statistics are stored in HDF5 format, together with code information,
 so simulation data should be "future proof".
 
+Users of this code are expected to either use `NavierStokes` objects
+directly, or construct their own class that inherits this class.
+The way I use it is to I inherit and add custom statistics as necessary; I
+also have private C++ code that can get added and used when needed.
+I plan on adding documentation on the procedure as when other people
+show interest in using the code, or when time permits.
+
+
+Installation
+------------
+
+**Postprocessing only**
+
+.. code:: bash
+
+    python setup.py install
+
+(add `--user` or `sudo` as appropriate).
+`setup.py` should tell you about the various packages you need.
+
+**Full installation**
+
+If you want to run simulations on the machine where you're installing,
+you will need to call `build` before installing.
+
+.. code:: bash
+
+    python setup.py build
+    python setup.py install
+
+The `build` command will most likely fail unless you modify
+`machine_settings.py` appropriately for your machine.
+Also, in order to run the C++ code you need to have an MPI compiler
+installed, the HDF5 C library as well as FFTW 3 (at least 3.3 I think).
+
+
 Comments
 --------
 
 * particles: initialization of multistep solvers is done with lower
   order methods, so don't be surprised if direct convergence tests fail.
 
-TODO
-----
+* I am using this code mainly with Python 3.4, but Python 2.7
+  compatibility should be kept since mayavi (well, vtk actually) only
+  works on Python 2.
+
 
-* add different numerical method, for testing purposes
+===========
+Development
+===========
 
-* test involving hydrodynamic similarity
+`bfps` is using `git` for version tracking (see https://git-scm.com/
+for description).
+The branching model described at
+http://nvie.com/posts/a-successful-git-branching-model/ should be
+adhered to as strictly as possible.
 
-* test anisotropic grids
+The usable `VERSION` number will be constructed according to semantic
+versioning rules (see http://semver.org/), `MAJOR.MINOR.PATCH`.
+In principle, if you use `bfps` and you've created children of the
+`NavierStokes` class, you should not need to rewrite your code unless
+the `MAJOR` version changes.
 
-* test non-cubic domains
+Versioning guidelines
+---------------------
 
-* use HDF5 io for fields
+There are 2 main branches, `master` and `develop`.
+`setup.py` will call `git` to read in the `VERSION`: it will get the
+latest available tag.
+If the active branch name contains either of the strings `develop`,
+`feature` or `bugfix`, then the full output of `git describe --tags`
+will be used;
+otherwise, only the string before the dash (if a dash exists) will be
+used.
 
-* try to make code more memory efficient
+At the moment the following rules seem adequate.
+Once I get feedback from someone who actually knows how to handle bigger
+projects, these may change.
 
-* make code py3 compatible
+1. New features are worked on in branches forked from `develop`, with
+   the branch name of the form `feature/bla`.
+   Feature branches are merged back into `develop` only after all tests
+   pass.
+2. Whenever the `develop` branch is merged into `master`, the last
+   commit on the `develop` branch is tagged with `MAJOR.MINOR`.
+3. Whenever a bug is discovered in version X.Y, a new branch called `vX.Y`
+   is forked from the corresponding `master` merge point.
+   A new bugfix branch is forked from `vX.Y`, the bug is fixed, and then
+   this bugfix branch is merged into all affected branches (this includes
+   `vX.Y`).
+   After merging, the respective merge points are tagged adequately (`vX.Y`
+   gets the tag X.Y.1).
+4. Whenever a bug is discovered in version X.Y.Z, a bugfix branch is
+   forked from `vX.Y`, and then rule 3 is adapted accordingly.
 
diff --git a/bfps/NavierStokes.py b/bfps/NavierStokes.py
index 1279e7e4b66bf65b9017f8affa724b2d6d0eb0ee..20e1093181a2cd018cb4a1f53a7a89979d93ade2 100644
--- a/bfps/NavierStokes.py
+++ b/bfps/NavierStokes.py
@@ -41,15 +41,17 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
             fluid_precision = 'single',
             fftw_plan_rigor = 'FFTW_MEASURE',
             frozen_fields = False,
-            use_fftw_wisdom = True):
+            use_fftw_wisdom = True,
+            QR_stats_on = False):
+        self.QR_stats_on = QR_stats_on
+        self.frozen_fields = frozen_fields
+        self.fftw_plan_rigor = fftw_plan_rigor
         super(NavierStokes, self).__init__(
                 name = name,
                 work_dir = work_dir,
                 simname = simname,
                 dtype = fluid_precision,
                 use_fftw_wisdom = use_fftw_wisdom)
-        self.frozen_fields = frozen_fields
-        self.fftw_plan_rigor = fftw_plan_rigor
         self.file_datasets_grow = """
                 //begincpp
                 std::string temp_string;
@@ -57,6 +59,7 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
                 hid_t group;
                 hid_t Cspace, Cdset;
                 int ndims;
+                DEBUG_MSG("about to grow datasets\\n");
                 // store kspace information
                 Cdset = H5Dopen(stat_file, "/kspace/kshell", H5P_DEFAULT);
                 Cspace = H5Dget_space(Cdset);
@@ -64,7 +67,9 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
                 H5Sclose(Cspace);
                 if (fs->nshells != dims[0])
                 {
-                    std::cerr << "ERROR: computed nshells not equal to data file nshells\\n" << std::endl;
+                    DEBUG_MSG(
+                        "ERROR: computed nshells %d not equal to data file nshells %d\\n",
+                        fs->nshells, dims[0]);
                     file_problems++;
                 }
                 H5Dwrite(Cdset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, fs->kshell);
@@ -73,7 +78,7 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
                 H5Dwrite(Cdset, H5T_NATIVE_INT64, H5S_ALL, H5S_ALL, H5P_DEFAULT, fs->nshell);
                 H5Dclose(Cdset);
                 Cdset = H5Dopen(stat_file, "/kspace/kM", H5P_DEFAULT);
-                H5Dwrite(Cdset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kM);
+                H5Dwrite(Cdset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kMspec);
                 H5Dclose(Cdset);
                 Cdset = H5Dopen(stat_file, "/kspace/dk", H5P_DEFAULT);
                 H5Dwrite(Cdset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->dk);
@@ -81,55 +86,103 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
                 group = H5Gopen(stat_file, "/statistics", H5P_DEFAULT);
                 H5Ovisit(group, H5_INDEX_NAME, H5_ITER_NATIVE, grow_statistics_dataset, NULL);
                 H5Gclose(group);
+                DEBUG_MSG("finished growing datasets\\n");
                 //endcpp
                 """
         self.style = {}
         self.statistics = {}
         self.fluid_output = 'fs->write(\'v\', \'c\');\n'
         return None
+    def create_stat_output(
+            self,
+            dset_name,
+            data_buffer,
+            data_type = 'H5T_NATIVE_DOUBLE',
+            size_setup = None,
+            close_spaces = True):
+        new_stat_output_txt = 'Cdset = H5Dopen(stat_file, "{0}", H5P_DEFAULT);\n'.format(dset_name)
+        if not type(size_setup) == type(None):
+            new_stat_output_txt += (
+                    size_setup +
+                    'wspace = H5Dget_space(Cdset);\n' +
+                    'ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);\n' +
+                    'mspace = H5Screate_simple(ndims, count, NULL);\n' +
+                    'H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);\n')
+        new_stat_output_txt += ('H5Dwrite(Cdset, {0}, mspace, wspace, H5P_DEFAULT, {1});\n' +
+                                'H5Dclose(Cdset);\n').format(data_type, data_buffer)
+        if close_spaces:
+            new_stat_output_txt += ('H5Sclose(mspace);\n' +
+                                    'H5Sclose(wspace);\n')
+        return new_stat_output_txt
     def write_fluid_stats(self):
         self.fluid_includes += '#include <cmath>\n'
         self.fluid_includes += '#include "fftw_tools.hpp"\n'
-        ## ksamples
-        #self.stat_src += '{0} *ksamples = new {0}[nksamples];\n'.format(self.C_dtype)
         self.stat_src += """
                 //begincpp
                 //endcpp
                 """
         self.stat_src += """
                 //begincpp
-                    double *velocity_moments = fftw_alloc_real(10*4);
-                    double *vorticity_moments = fftw_alloc_real(10*4);
-                    ptrdiff_t *hist_velocity = new ptrdiff_t[histogram_bins*4];
+                    double *velocity_moments  = new double[10*4];
+                    double *vorticity_moments = new double[10*4];
+                    ptrdiff_t *hist_velocity  = new ptrdiff_t[histogram_bins*4];
                     ptrdiff_t *hist_vorticity = new ptrdiff_t[histogram_bins*4];
-                    {0} *ksample_values;
+                    double max_estimates[4];
                     fs->compute_velocity(fs->cvorticity);
-                    if (fs->cd->myrank == 0)
-                    {{
-                        ksample_values = new {0}[nksamples*6];
-                        for (int i=0; i<nksamples; i++)
-                        {{
-                            ptrdiff_t cindex = (ptrdiff_t(kindices[2*i+1])*fs->cd->subsizes[2] + ptrdiff_t(kindices[2*i]))*3;
-                            std::copy(({0}*)(fs->cvelocity + cindex),
-                                      ({0}*)(fs->cvelocity + cindex + 3),
-                                      ksample_values + i*6);
-                        }}
-                    }}
-                    double *spec_velocity = fftw_alloc_real(fs->nshells*9);
-                    double *spec_vorticity = fftw_alloc_real(fs->nshells*9);
+                    double *spec_velocity  = new double[fs->nshells*9];
+                    double *spec_vorticity = new double[fs->nshells*9];
                     fs->cospectrum(fs->cvelocity, fs->cvelocity, spec_velocity);
                     fs->cospectrum(fs->cvorticity, fs->cvorticity, spec_vorticity);
+                    //endcpp
+                    """
+        if self.QR_stats_on:
+            self.stat_src += """
+                //begincpp
+                    double *trS2_Q_R_moments  = new double[10*3];
+                    double *gradu_moments     = new double[10*9];
+                    ptrdiff_t *hist_trS2_Q_R  = new ptrdiff_t[histogram_bins*3];
+                    ptrdiff_t *hist_gradu     = new ptrdiff_t[histogram_bins*9];
+                    ptrdiff_t *hist_QR2D      = new ptrdiff_t[QR2D_histogram_bins*QR2D_histogram_bins];
+                    double trS2QR_max_estimates[3];
+                    double gradu_max_estimates[9];
+                    trS2QR_max_estimates[0] = max_trS2_estimate;
+                    trS2QR_max_estimates[1] = max_Q_estimate;
+                    trS2QR_max_estimates[2] = max_R_estimate;
+                    std::fill_n(gradu_max_estimates, 9, sqrt(3*max_trS2_estimate));
+                    fs->compute_gradient_statistics(
+                        fs->cvelocity,
+                        gradu_moments,
+                        trS2_Q_R_moments,
+                        hist_gradu,
+                        hist_trS2_Q_R,
+                        hist_QR2D,
+                        trS2QR_max_estimates,
+                        gradu_max_estimates,
+                        histogram_bins,
+                        QR2D_histogram_bins);
+                    //endcpp
+                    """
+        self.stat_src += """
+                //begincpp
                     fs->ift_velocity();
-                    fs->compute_rspace_stats(fs->rvelocity,
+                    max_estimates[0] = max_velocity_estimate/sqrt(3);
+                    max_estimates[1] = max_estimates[0];
+                    max_estimates[2] = max_estimates[0];
+                    max_estimates[3] = max_velocity_estimate;
+                    fs->compute_rspace_stats4(fs->rvelocity,
                                              velocity_moments,
                                              hist_velocity,
-                                             max_velocity_estimate,
+                                             max_estimates,
                                              histogram_bins);
                     fs->ift_vorticity();
-                    fs->compute_rspace_stats(fs->rvorticity,
+                    max_estimates[0] = max_vorticity_estimate/sqrt(3);
+                    max_estimates[1] = max_estimates[0];
+                    max_estimates[2] = max_estimates[0];
+                    max_estimates[3] = max_vorticity_estimate;
+                    fs->compute_rspace_stats4(fs->rvorticity,
                                              vorticity_moments,
                                              hist_vorticity,
-                                             max_vorticity_estimate,
+                                             max_estimates,
                                              histogram_bins);
                     if (fs->cd->myrank == 0)
                     {{
@@ -142,97 +195,135 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
                         offset[3] = 0;
                 //endcpp
                 """.format(self.C_dtype)
-        size_setups = ["""
+        if self.dtype == np.float32:
+            field_H5T = 'H5T_NATIVE_FLOAT'
+        elif self.dtype == np.float64:
+            field_H5T = 'H5T_NATIVE_DOUBLE'
+        self.stat_src += self.create_stat_output(
+                '/statistics/xlines/velocity',
+                'fs->rvelocity',
+                data_type = field_H5T,
+                size_setup = """
+                    count[0] = 1;
+                    count[1] = nx;
+                    count[2] = 3;
+                    """,
+                close_spaces = False)
+        self.stat_src += self.create_stat_output(
+                '/statistics/xlines/vorticity',
+                'fs->rvorticity',
+                data_type = field_H5T)
+        self.stat_src += self.create_stat_output(
+                '/statistics/moments/velocity',
+                'velocity_moments',
+                size_setup = """
+                    count[0] = 1;
+                    count[1] = 10;
+                    count[2] = 4;
+                    """,
+                close_spaces = False)
+        self.stat_src += self.create_stat_output(
+                '/statistics/moments/vorticity',
+                'vorticity_moments')
+        self.stat_src += self.create_stat_output(
+                '/statistics/spectra/velocity_velocity',
+                'spec_velocity',
+                size_setup = """
+                    count[0] = 1;
+                    count[1] = fs->nshells;
+                    count[2] = 3;
+                    count[3] = 3;
+                    """,
+                close_spaces = False)
+        self.stat_src += self.create_stat_output(
+                '/statistics/spectra/vorticity_vorticity',
+                'spec_vorticity')
+        self.stat_src += self.create_stat_output(
+                '/statistics/histograms/velocity',
+                'hist_velocity',
+                data_type = 'H5T_NATIVE_INT64',
+                size_setup = """
+                    count[0] = 1;
+                    count[1] = histogram_bins;
+                    count[2] = 4;
+                    """,
+                close_spaces = False)
+        self.stat_src += self.create_stat_output(
+                '/statistics/histograms/vorticity',
+                'hist_vorticity',
+                data_type = 'H5T_NATIVE_INT64')
+        if self.QR_stats_on:
+            self.stat_src += self.create_stat_output(
+                    '/statistics/moments/trS2_Q_R',
+                    'trS2_Q_R_moments',
+                    size_setup ="""
                         count[0] = 1;
-                        count[1] = nx;
+                        count[1] = 10;
                         count[2] = 3;
-                       """,
-                       """
+                        """)
+            self.stat_src += self.create_stat_output(
+                    '/statistics/moments/velocity_gradient',
+                    'gradu_moments',
+                    size_setup ="""
                         count[0] = 1;
                         count[1] = 10;
-                        count[2] = 4;
-                       """,
-                       """
+                        count[2] = 3;
+                        count[3] = 3;
+                        """)
+            self.stat_src += self.create_stat_output(
+                    '/statistics/histograms/trS2_Q_R',
+                    'hist_trS2_Q_R',
+                    data_type = 'H5T_NATIVE_INT64',
+                    size_setup = """
                         count[0] = 1;
                         count[1] = histogram_bins;
-                        count[2] = 4;
-                       """,
-                       """
+                        count[2] = 3;
+                        """)
+            self.stat_src += self.create_stat_output(
+                    '/statistics/histograms/velocity_gradient',
+                    'hist_gradu',
+                    data_type = 'H5T_NATIVE_INT64',
+                    size_setup = """
                         count[0] = 1;
-                        count[1] = fs->nshells;
+                        count[1] = histogram_bins;
                         count[2] = 3;
                         count[3] = 3;
-                       """,]
-        if self.dtype == np.float32:
-            field_H5T = 'H5T_NATIVE_FLOAT'
-        elif self.dtype == np.float64:
-            field_H5T = 'H5T_NATIVE_DOUBLE'
-        stat_template = """
-                //begincpp
-                        Cdset = H5Dopen(stat_file, "{0}", H5P_DEFAULT);
-                        wspace = H5Dget_space(Cdset);
-                        ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);
-                        mspace = H5Screate_simple(ndims, count, NULL);
-                        H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
-                        H5Dwrite(Cdset, {1}, mspace, wspace, H5P_DEFAULT, {2});
-                        H5Dclose(Cdset);
-                        Cdset = H5Dopen(stat_file, "{3}", H5P_DEFAULT);
-                        H5Dwrite(Cdset, {1}, mspace, wspace, H5P_DEFAULT, {4});
-                        H5Sclose(mspace);
-                        H5Sclose(wspace);
-                        H5Dclose(Cdset);
-                //endcpp
-                """
-        stat_outputs = [stat_template.format('/statistics/xlines/velocity',
-                                              field_H5T,
-                                              'fs->rvelocity',
-                                              '/statistics/xlines/vorticity',
-                                              'fs->rvorticity'),
-                        stat_template.format('/statistics/moments/velocity',
-                                             'H5T_NATIVE_DOUBLE',
-                                             'velocity_moments',
-                                             '/statistics/moments/vorticity',
-                                             'vorticity_moments'),
-                        stat_template.format('/statistics/histograms/velocity',
-                                             'H5T_NATIVE_DOUBLE',
-                                             'hist_velocity',
-                                             '/statistics/histograms/vorticity',
-                                             'hist_vorticity'),
-                        stat_template.format('/statistics/spectra/velocity_velocity',
-                                             'H5T_NATIVE_DOUBLE',
-                                             'spec_velocity',
-                                             '/statistics/spectra/vorticity_vorticity',
-                                             'spec_vorticity')]
-        for i in range(len(size_setups)):
-            self.stat_src += size_setups[i] + stat_outputs[i]
+                        """)
+            self.stat_src += self.create_stat_output(
+                    '/statistics/histograms/QR2D',
+                    'hist_QR2D',
+                    data_type = 'H5T_NATIVE_INT64',
+                    size_setup = """
+                        count[0] = 1;
+                        count[1] = QR2D_histogram_bins;
+                        count[2] = QR2D_histogram_bins;
+                        """)
         self.stat_src += """
                 //begincpp
-                        count[0] = 1;
-                        count[1] = nksamples;
-                        count[2] = 3;
-                        Cdset = H5Dopen(stat_file, "/statistics/ksamples/velocity", H5P_DEFAULT);
-                        wspace = H5Dget_space(Cdset);
-                        ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);
-                        mspace = H5Screate_simple(ndims, count, NULL);
-                        H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
-                        H5Dwrite(Cdset, H5T_field_complex, mspace, wspace, H5P_DEFAULT, ksample_values);
-                        H5Dclose(Cdset);
-                        delete[] ksample_values;
                     }
-                    fftw_free(spec_velocity);
-                    fftw_free(spec_vorticity);
-                    fftw_free(velocity_moments);
-                    fftw_free(vorticity_moments);
+                    delete[] spec_velocity;
+                    delete[] spec_vorticity;
+                    delete[] velocity_moments;
+                    delete[] vorticity_moments;
                     delete[] hist_velocity;
                     delete[] hist_vorticity;
                 //endcpp
                 """
+        if self.QR_stats_on:
+            self.stat_src += """
+                //begincpp
+                    delete[] trS2_Q_R_moments;
+                    delete[] gradu_moments;
+                    delete[] hist_trS2_Q_R;
+                    delete[] hist_gradu;
+                    delete[] hist_QR2D;
+                //endcpp
+                """
         return None
     def fill_up_fluid_code(self):
         self.fluid_includes += '#include <cstring>\n'
         self.fluid_variables += ('fluid_solver<{0}> *fs;\n'.format(self.C_dtype) +
                                  'int *kindices;\n' +
-                                 'int nksamples;\n' +
                                  'hid_t H5T_field_complex;\n')
         self.fluid_definitions += """
                     typedef struct {{
@@ -248,6 +339,7 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
         self.fluid_start += """
                 //begincpp
                 char fname[512];
+                DEBUG_MSG("about to allocate fluid_solver\\n");
                 fs = new fluid_solver<{0}>(
                         simname,
                         nx, ny, nz,
@@ -262,18 +354,9 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
                 strncpy(fs->forcing_type, forcing_type, 128);
                 fs->iteration = iteration;
                 fs->read('v', 'c');
+                DEBUG_MSG("finished reading initial condition\\n");
                 if (fs->cd->myrank == 0)
                 {{
-                    hid_t dset = H5Dopen(stat_file, "kspace/ksample_indices", H5P_DEFAULT);
-                    hid_t rspace;
-                    rspace = H5Dget_space(dset);
-                    hsize_t count[2];
-                    H5Sget_simple_extent_dims(rspace, count, NULL);
-                    H5Sclose(rspace);
-                    nksamples = count[0];
-                    kindices = new int[nksamples*2];
-                    H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, kindices);
-                    H5Dclose(dset);
                     H5T_field_complex = H5Tcreate(H5T_COMPOUND, sizeof(tmp_complex_type));
                     H5Tinsert(H5T_field_complex, "r", HOFFSET(tmp_complex_type, re), {2});
                     H5Tinsert(H5T_field_complex, "i", HOFFSET(tmp_complex_type, im), {2});
@@ -301,8 +384,10 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
             kcut = None,
             neighbours = 1,
             smoothness = 1,
-            name = 'particle_field'):
-        self.particle_variables += 'interpolator<{0}, {1}> *vel_{2}, *acc_{2};\n'.format(self.C_dtype, neighbours, name)
+            name = 'particle_field',
+            field_class = 'rFFTW_interpolator'):
+        self.fluid_includes += '#include "{0}.hpp"\n'.format(field_class)
+        self.fluid_variables += field_class + '<{0}, {1}> *vel_{2}, *acc_{2};\n'.format(self.C_dtype, neighbours, name)
         self.parameters[name + '_type'] = interp_type
         self.parameters[name + '_neighbours'] = neighbours
         if interp_type == 'spline':
@@ -310,13 +395,14 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
             beta_name = 'beta_n{0}_m{1}'.format(neighbours, smoothness)
         elif interp_type == 'Lagrange':
             beta_name = 'beta_Lagrange_n{0}'.format(neighbours)
-        self.particle_start += ('vel_{0} = new interpolator<{1}, {2}>(fs, {3});\n' +
-                                'acc_{0} = new interpolator<{1}, {2}>(fs, {3});\n').format(name,
-                                                                                           self.C_dtype,
-                                                                                           neighbours,
-                                                                                           beta_name)
-        self.particle_end += ('delete vel_{0};\n' +
-                              'delete acc_{0};\n').format(name)
+        self.fluid_start += ('vel_{0} = new {1}<{2}, {3}>(fs, {4});\n' +
+                             'acc_{0} = new {1}<{2}, {3}>(fs, {4});\n').format(name,
+                                                                               field_class,
+                                                                               self.C_dtype,
+                                                                               neighbours,
+                                                                               beta_name)
+        self.fluid_end += ('delete vel_{0};\n' +
+                           'delete acc_{0};\n').format(name)
         update_fields = 'fs->compute_velocity(fs->cvorticity);\n'
         if not type(kcut) == type(None):
             update_fields += 'fs->low_pass_Fourier(fs->cvelocity, 3, {0});\n'.format(kcut)
@@ -324,8 +410,8 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
                           'vel_{0}->read_rFFTW(fs->rvelocity);\n' +
                           'fs->compute_Lagrangian_acceleration(acc_{0}->temp);\n' +
                           'acc_{0}->read_rFFTW(acc_{0}->temp);\n').format(name)
-        self.particle_start += update_fields
-        self.particle_loop += update_fields
+        self.fluid_start += update_fields
+        self.fluid_loop += update_fields
         return None
     def add_particles(
             self,
@@ -333,7 +419,8 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
             integration_steps = 2,
             kcut = 'fs->kM',
             frozen_particles = False,
-            fields_name = None):
+            fields_name = None,
+            particle_class = 'rFFTW_particles'):
         if integration_method == 'cRK4':
             integration_steps = 4
         elif integration_method == 'Heun':
@@ -403,38 +490,52 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
         self.particle_start += 'sprintf(fname, "tracers{0}");\n'.format(self.particle_species)
         self.particle_end += ('ps{0}->write(stat_file);\n' +
                               'delete ps{0};\n').format(self.particle_species)
-        self.particle_includes += '#include "particles.hpp"\n'
-        if integration_method == 'AdamsBashforth':
-            multistep = 'true'
-        else:
-            multistep = 'false'
-        self.particle_variables += 'particles<VELOCITY_TRACER, {0}, {1}, {2}> *ps{3};\n'.format(
-                self.C_dtype,
-                multistep,
-                neighbours,
-                self.particle_species)
-        self.particle_start += ('ps{0} = new particles<VELOCITY_TRACER, {1}, {2},{3}>(\n' +
+        self.particle_includes += '#include "{0}.hpp"\n'.format(particle_class)
+        if particle_class == 'particles':
+            if integration_method == 'AdamsBashforth':
+                multistep = 'true'
+            else:
+                multistep = 'false'
+            self.particle_variables += 'particles<VELOCITY_TRACER, {0}, {1}, {2}> *ps{3};\n'.format(
+                    self.C_dtype,
+                    multistep,
+                    neighbours,
+                    self.particle_species)
+            self.particle_start += ('ps{0} = new particles<VELOCITY_TRACER, {1}, {2},{3}>(\n' +
                                     'fname, fs, vel_{4},\n' +
                                     'nparticles,\n' +
                                     'niter_part, tracers{0}_integration_steps);\n').format(
                                             self.particle_species, self.C_dtype, multistep, neighbours, fields_name)
+        else:
+            self.particle_variables += 'rFFTW_particles<VELOCITY_TRACER, {0}, {1}> *ps{2};\n'.format(
+                    self.C_dtype,
+                    neighbours,
+                    self.particle_species)
+            self.particle_start += ('ps{0} = new rFFTW_particles<VELOCITY_TRACER, {1}, {2}>(\n' +
+                                    'fname, fs, vel_{3},\n' +
+                                    'nparticles,\n' +
+                                    'niter_part, tracers{0}_integration_steps);\n').format(
+                                            self.particle_species, self.C_dtype, neighbours, fields_name)
         self.particle_start += ('ps{0}->dt = dt;\n' +
                                 'ps{0}->iteration = iteration;\n' +
                                 'ps{0}->read(stat_file);\n').format(self.particle_species)
         self.particle_start += output_vel_acc
         if not frozen_particles:
-            if integration_method == 'AdamsBashforth':
-                self.particle_loop += 'ps{0}->AdamsBashforth((ps{0}->iteration < ps{0}->integration_steps) ? ps{0}->iteration+1 : ps{0}->integration_steps);\n'.format(self.particle_species)
-            elif integration_method == 'Euler':
-                self.particle_loop += 'ps{0}->Euler();\n'.format(self.particle_species)
-            elif integration_method == 'Heun':
-                assert(integration_steps == 2)
-                self.particle_loop += 'ps{0}->Heun();\n'.format(self.particle_species)
-            elif integration_method == 'cRK4':
-                assert(integration_steps == 4)
-                self.particle_loop += 'ps{0}->cRK4();\n'.format(self.particle_species)
-            self.particle_loop += 'ps{0}->iteration++;\n'.format(self.particle_species)
-            self.particle_loop += 'ps{0}->synchronize();\n'.format(self.particle_species)
+            if particle_class == 'particles':
+                if integration_method == 'AdamsBashforth':
+                    self.particle_loop += 'ps{0}->AdamsBashforth((ps{0}->iteration < ps{0}->integration_steps) ? ps{0}->iteration+1 : ps{0}->integration_steps);\n'.format(self.particle_species)
+                elif integration_method == 'Euler':
+                    self.particle_loop += 'ps{0}->Euler();\n'.format(self.particle_species)
+                elif integration_method == 'Heun':
+                    assert(integration_steps == 2)
+                    self.particle_loop += 'ps{0}->Heun();\n'.format(self.particle_species)
+                elif integration_method == 'cRK4':
+                    assert(integration_steps == 4)
+                    self.particle_loop += 'ps{0}->cRK4();\n'.format(self.particle_species)
+                self.particle_loop += 'ps{0}->iteration++;\n'.format(self.particle_species)
+                self.particle_loop += 'ps{0}->synchronize();\n'.format(self.particle_species)
+            elif particle_class == 'rFFTW_particles':
+                self.particle_loop += 'ps{0}->step();\n'.format(self.particle_species)
         self.particle_loop += (('if (ps{0}->iteration % niter_part == 0)\n' +
                                 '{{\n' +
                                 'ps{0}->write(stat_file, false);\n').format(self.particle_species) +
@@ -442,7 +543,9 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
         self.particle_species += 1
         return None
     def get_data_file(self):
-        return h5py.File(os.path.join(self.work_dir, self.simname + '.h5'), 'a')
+        return h5py.File(os.path.join(self.work_dir, self.simname + '.h5'), 'r')
+    def get_postprocess_file_name(self):
+        return os.path.join(self.work_dir, self.simname + '_postprocess.h5')
     def compute_statistics(self, iter0 = 0, iter1 = None):
         if len(list(self.statistics.keys())) > 0:
             return None
@@ -460,67 +563,174 @@ class NavierStokes(bfps.fluid_base.fluid_particle_base):
             ii1 = iter1 // self.parameters['niter_stat']
             self.statistics['kshell'] = data_file['kspace/kshell'].value
             self.statistics['kM'] = data_file['kspace/kM'].value
+            self.statistics['dk'] = data_file['kspace/dk'].value
             if self.particle_species > 0:
                 self.trajectories = [
                         data_file['particles/' + key + '/state'][
                             iter0//self.parameters['niter_part']:iter1//self.parameters['niter_part']+1]
                                      for key in data_file['particles'].keys()]
             computation_needed = True
-            if 'postprocess' in data_file.keys():
-                computation_needed =  not (ii0 == data_file['postprocess/ii0'].value and
-                                           ii1 == data_file['postprocess/ii1'].value)
+            pp_file = h5py.File(self.get_postprocess_file_name(), 'a')
+            if 'ii0' in pp_file.keys():
+                computation_needed =  not (ii0 == pp_file['ii0'].value and
+                                           ii1 == pp_file['ii1'].value)
                 if computation_needed:
-                    del data_file['postprocess']
+                    for k in pp_file.keys():
+                        del pp_file[k]
             if computation_needed:
-                data_file['postprocess/iter0'] = iter0
-                data_file['postprocess/iter1'] = iter1
-                data_file['postprocess/ii0'] = ii0
-                data_file['postprocess/ii1'] = ii1
-                data_file['postprocess/t'] = (self.parameters['dt']*
-                                              self.parameters['niter_stat']*
-                                              (np.arange(ii0, ii1+1).astype(np.float)))
-                data_file['postprocess/energy(t, k)'] = (
-                        data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 0, 0] +
-                        data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 1, 1] +
-                        data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 2, 2])/2
-                data_file['postprocess/enstrophy(t, k)'] = (
-                        data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] +
-                        data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] +
-                        data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2
-                data_file['postprocess/vel_max(t)'] = data_file['statistics/moments/velocity']  [ii0:ii1+1, 9, 3]
-                data_file['postprocess/renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2
+                pp_file['iter0'] = iter0
+                pp_file['iter1'] = iter1
+                pp_file['ii0'] = ii0
+                pp_file['ii1'] = ii1
+                pp_file['t'] = (self.parameters['dt']*
+                                self.parameters['niter_stat']*
+                                (np.arange(ii0, ii1+1).astype(np.float)))
+                pp_file['energy(t, k)'] = (
+                    data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 0, 0] +
+                    data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 1, 1] +
+                    data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 2, 2])/2
+                pp_file['enstrophy(t, k)'] = (
+                    data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] +
+                    data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] +
+                    data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2
+                pp_file['vel_max(t)'] = data_file['statistics/moments/velocity']  [ii0:ii1+1, 9, 3]
+                pp_file['renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2
+                if 'trS2_Q_R' in data_file['statistics/moments'].keys():
+                    pp_file['mean_trS2(t)'] = data_file['statistics/moments/trS2_Q_R'][:, 1, 0]
             for k in ['t',
                       'energy(t, k)',
                       'enstrophy(t, k)',
                       'vel_max(t)',
-                      'renergy(t)']:
-                self.statistics[k] = data_file['postprocess/' + k].value
+                      'renergy(t)',
+                      'mean_trS2(t)']:
+                if k in pp_file.keys():
+                    self.statistics[k] = pp_file[k].value
             self.compute_time_averages()
         return None
     def compute_time_averages(self):
         for key in ['energy', 'enstrophy']:
-            self.statistics[key + '(t)'] = np.sum(self.statistics[key + '(t, k)'], axis = 1)
-        for key in ['energy', 'enstrophy', 'vel_max']:
-            self.statistics[key] = np.average(self.statistics[key + '(t)'], axis = 0)
+            self.statistics[key + '(t)'] = (self.statistics['dk'] *
+                                            np.sum(self.statistics[key + '(t, k)'], axis = 1))
+        self.statistics['Uint(t)'] = np.sqrt(2*self.statistics['energy(t)'] / 3)
+        self.statistics['Lint(t)'] = ((self.statistics['dk']*np.pi / (2*self.statistics['Uint(t)']**2)) *
+                                      np.nansum(self.statistics['energy(t, k)'] /
+                                                self.statistics['kshell'][None, :], axis = 1))
+        for key in ['energy',
+                    'enstrophy',
+                    'vel_max',
+                    'mean_trS2',
+                    'Uint',
+                    'Lint']:
+            if key + '(t)' in self.statistics.keys():
+                self.statistics[key] = np.average(self.statistics[key + '(t)'], axis = 0)
         for suffix in ['', '(t)']:
             self.statistics['diss'    + suffix] = (self.parameters['nu'] *
                                                    self.statistics['enstrophy' + suffix]*2)
             self.statistics['etaK'    + suffix] = (self.parameters['nu']**3 /
                                                    self.statistics['diss' + suffix])**.25
-            self.statistics['Rlambda' + suffix] = (2*np.sqrt(5./3) *
-                                                   (self.statistics['energy' + suffix] /
-                                                   (self.parameters['nu']*self.statistics['diss' + suffix])**.5))
             self.statistics['tauK'    + suffix] =  (self.parameters['nu'] /
                                                     self.statistics['diss' + suffix])**.5
-        self.statistics['Tint'] = 2*self.statistics['energy'] / self.statistics['diss']
-        self.statistics['Lint'] = (2*self.statistics['energy'])**1.5 / self.statistics['diss']
-        self.statistics['Taylor_microscale'] = (10 * self.parameters['nu'] * self.statistics['energy'] / self.statistics['diss'])**.5
+            self.statistics['Re' + suffix] = (self.statistics['Uint' + suffix] *
+                                              self.statistics['Lint' + suffix] /
+                                              self.parameters['nu'])
+            self.statistics['lambda' + suffix] = (15 * self.parameters['nu'] *
+                                                  self.statistics['Uint' + suffix]**2 /
+                                                  self.statistics['diss' + suffix])**.5
+            self.statistics['Rlambda' + suffix] = (self.statistics['Uint' + suffix] *
+                                                   self.statistics['lambda' + suffix] /
+                                                   self.parameters['nu'])
+        self.statistics['Tint'] = self.statistics['Lint'] / self.statistics['Uint']
+        self.statistics['Taylor_microscale'] = self.statistics['lambda']
+        self.statistics['kMeta'] = self.statistics['kM']*self.statistics['etaK']
+        if self.parameters['dealias_type'] == 1:
+            self.statistics['kMeta'] *= 0.8
         return None
     def set_plt_style(
             self,
             style = {'dashes' : (None, None)}):
         self.style.update(style)
         return None
+    def read_cfield(
+            self,
+            field_name = 'vorticity',
+            iteration = 0):
+        return np.memmap(
+                os.path.join(self.work_dir,
+                             self.simname + '_{0}_i{1:0>5x}'.format('c' + field_name, iteration)),
+                dtype = self.ctype,
+                mode = 'r',
+                shape = (self.parameters['ny'],
+                         self.parameters['nz'],
+                         self.parameters['nx']//2+1,
+                         3))
+    def write_par(self, iter0 = 0):
+        super(NavierStokes, self).write_par(iter0 = iter0)
+        with h5py.File(os.path.join(self.work_dir, self.simname + '.h5'), 'r+') as ofile:
+            kspace = self.get_kspace()
+            nshells = kspace['nshell'].shape[0]
+            if self.QR_stats_on:
+                time_chunk = 2**20//(8*3*self.parameters['histogram_bins'])
+                time_chunk = max(time_chunk, 1)
+                ofile.create_dataset('statistics/histograms/trS2_Q_R',
+                                     (1,
+                                      self.parameters['histogram_bins'],
+                                      3),
+                                     chunks = (time_chunk,
+                                               self.parameters['histogram_bins'],
+                                               3),
+                                     maxshape = (None,
+                                                 self.parameters['histogram_bins'],
+                                                 3),
+                                     dtype = np.int64,
+                                     compression = 'gzip')
+                time_chunk = 2**20//(8*9*self.parameters['histogram_bins'])
+                time_chunk = max(time_chunk, 1)
+                ofile.create_dataset('statistics/histograms/velocity_gradient',
+                                     (1,
+                                      self.parameters['histogram_bins'],
+                                      3,
+                                      3),
+                                     chunks = (time_chunk,
+                                               self.parameters['histogram_bins'],
+                                               3,
+                                               3),
+                                     maxshape = (None,
+                                                 self.parameters['histogram_bins'],
+                                                 3,
+                                                 3),
+                                     dtype = np.int64,
+                                     compression = 'gzip')
+                time_chunk = 2**20//(8*3*10)
+                time_chunk = max(time_chunk, 1)
+                a = ofile.create_dataset('statistics/moments/trS2_Q_R',
+                                     (1, 10, 3),
+                                     chunks = (time_chunk, 10, 3),
+                                     maxshape = (None, 10, 3),
+                                     dtype = np.float64,
+                                     compression = 'gzip')
+                time_chunk = 2**20//(8*9*10)
+                time_chunk = max(time_chunk, 1)
+                a = ofile.create_dataset('statistics/moments/velocity_gradient',
+                                     (1, 10, 3, 3),
+                                     chunks = (time_chunk, 10, 3, 3),
+                                     maxshape = (None, 10, 3, 3),
+                                     dtype = np.float64,
+                                     compression = 'gzip')
+                time_chunk = 2**20//(8*self.parameters['QR2D_histogram_bins']**2)
+                time_chunk = max(time_chunk, 1)
+                ofile.create_dataset('statistics/histograms/QR2D',
+                                     (1,
+                                      self.parameters['QR2D_histogram_bins'],
+                                      self.parameters['QR2D_histogram_bins']),
+                                     chunks = (time_chunk,
+                                               self.parameters['QR2D_histogram_bins'],
+                                               self.parameters['QR2D_histogram_bins']),
+                                     maxshape = (None,
+                                                 self.parameters['QR2D_histogram_bins'],
+                                                 self.parameters['QR2D_histogram_bins']),
+                                     dtype = np.int64,
+                                     compression = 'gzip')
+        return None
 
 def launch(
         opt,
diff --git a/bfps/__init__.py b/bfps/__init__.py
index fffb3bb3516e9be9d30a324e48294b99393d6690..37d22327b1d40b4c33ca0db15e7d93c7dc4140a8 100644
--- a/bfps/__init__.py
+++ b/bfps/__init__.py
@@ -28,28 +28,20 @@ import os
 import subprocess
 import pickle
 
-from pkg_resources import get_distribution, DistributionNotFound
+import pkg_resources
 
-try:
-    _dist = get_distribution('bfps')
-    # Normalize case for Windows systems
-    dist_loc = os.path.normcase(os.path.realpath(_dist.location))
-    here = os.path.normcase(__file__)
-    if not here.startswith(os.path.join(dist_loc, 'bfps')):
-        # not installed, but there is another version that *is*
-        header_dir = os.path.join(os.path.dirname(here), 'cpp')
-        lib_dir = os.path.join(os.path.dirname(here), os.pardir)
-        raise DistributionNotFound
-    header_dir = os.path.join(os.path.join(dist_loc, 'bfps'), 'cpp')
-    lib_dir = _dist.location
-    __version__ = _dist.version
-except DistributionNotFound:
-    __version__ = ''
+__version__ = pkg_resources.require('bfps')[0].version
+
+_dist = pkg_resources.get_distribution('bfps')
+dist_loc = os.path.realpath(_dist.location)
+here = os.path.normcase(__file__)
+header_dir = os.path.join(os.path.join(dist_loc, 'bfps'), 'cpp')
+lib_dir = os.path.join(dist_loc, 'bfps')
 
 install_info = pickle.load(
         open(os.path.join(os.path.dirname(here),
                           'install_info.pickle'),
-             'r'))
+             'rb'))
 
 from .code import code
 from .fluid_converter import fluid_converter
@@ -85,7 +77,7 @@ def get_parser(base_class = NavierStokes,
     parser.add_argument('--njobs',
             type = int, dest = 'njobs',
             default = njobs)
-    c = base_class()
+    c = base_class(simname = simname)
     for k in sorted(c.parameters.keys()):
         parser.add_argument(
                 '--{0}'.format(k),
diff --git a/bfps/base.py b/bfps/base.py
index 528db8260fc561890481feaba56a116a29902abb..1301fe65d465066134755c1e10d6b72d707c3236 100644
--- a/bfps/base.py
+++ b/bfps/base.py
@@ -25,6 +25,7 @@
 
 
 import os
+import sys
 import numpy as np
 import h5py
 import bfps
@@ -48,8 +49,7 @@ class base(object):
         self.simname = simname
         return None
     def cdef_pars(self):
-        key = self.parameters.keys()
-        key.sort()
+        key = sorted(list(self.parameters.keys()))
         src_txt = ''
         for i in range(len(key)):
             if type(self.parameters[key[i]]) == int:
@@ -60,21 +60,18 @@ class base(object):
                 src_txt += 'double ' + key[i] + ';\n'
         return src_txt
     def cread_pars(self):
-        key = self.parameters.keys()
-        key.sort()
+        key = sorted(list(self.parameters.keys()))
         src_txt = ('int read_parameters(hid_t data_file_id)\n{\n'
                  + 'hid_t dset, memtype, space;\n'
                  + 'hsize_t dims[1];\n'
-                 + 'char *string_data;\n'
-                 + 'std::string tempstr;\n')
+                 + 'char *string_data;\n')
         for i in range(len(key)):
             src_txt += 'dset = H5Dopen(data_file_id, "parameters/{0}", H5P_DEFAULT);\n'.format(key[i])
             if type(self.parameters[key[i]]) == int:
                 src_txt += 'H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n'.format(key[i])
             elif type(self.parameters[key[i]]) == str:
                 src_txt += ('space = H5Dget_space(dset);\n' +
-                            'memtype = H5Tcopy(H5T_C_S1);\n' +
-                            'H5Tset_size(memtype, H5T_VARIABLE);\n' +
+                            'memtype = H5Dget_type(dset);\n' +
                             'H5Sget_simple_extent_dims(space, dims, NULL);\n' +
                             'string_data = (char*)malloc(dims[0]*sizeof(char));\n' +
                             'H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data);\n' +
@@ -88,8 +85,7 @@ class base(object):
         src_txt += 'return 0;\n}\n' # finishing read_parameters
         return src_txt
     def cprint_pars(self):
-        key = self.parameters.keys()
-        key.sort()
+        key = sorted(list(self.parameters.keys()))
         src_txt = ''
         for i in range(len(key)):
             if type(self.parameters[key[i]]) == int:
@@ -104,7 +100,14 @@ class base(object):
             os.makedirs(self.work_dir)
         ofile = h5py.File(os.path.join(self.work_dir, self.simname + '.h5'), 'w-')
         for k in self.parameters.keys():
-            ofile['parameters/' + k] = self.parameters[k]
+            if (type(self.parameters[k]) == str) and (sys.version[0] == 3):
+                ofile.create_dataset('parameters/' + k,
+                                     (1,),
+                                     dtype = 'S10')
+                #ofile['parameters/' + k] = self.parameters[k].encode('ascii', 'ignore')
+                #print (ofile['parameters/' + k])
+            else:
+                ofile['parameters/' + k] = self.parameters[k]
         ofile['iteration'] = int(iter0)
         for k in bfps.install_info.keys():
             ofile['install_info/' + k] = str(bfps.install_info[k])
diff --git a/bfps/code.py b/bfps/code.py
index 478ef8ad9b61c6f3e1d76bdb2c650f9060787103..e8baced47d29812354974f3b8f44cda896b5ff67 100644
--- a/bfps/code.py
+++ b/bfps/code.py
@@ -24,10 +24,11 @@
 
 
 
-import h5py
-import subprocess
 import os
+import sys
 import shutil
+import subprocess
+import h5py
 from datetime import datetime
 import math
 import bfps
@@ -139,7 +140,8 @@ class code(base):
             raise IOError('header not there:\n' +
                           '{0}\n'.format(os.path.join(bfps.header_dir, 'base.hpp')) +
                           '{0}\n'.format(bfps.dist_loc))
-        libraries = ['bfps'] + bfps.install_info['libraries']
+        libraries = ['bfps']
+        libraries += bfps.install_info['libraries']
 
         command_strings = ['g++']
         command_strings += [self.name + '.cpp', '-o', self.name]
diff --git a/bfps/cpp/Lagrange_polys.hpp b/bfps/cpp/Lagrange_polys.hpp
index 6b6e9053ea1469f366591e11fe0d1221f3181029..401b9b7f076eeaa4ac44c8190ebaff947cd62247 100644
--- a/bfps/cpp/Lagrange_polys.hpp
+++ b/bfps/cpp/Lagrange_polys.hpp
@@ -28,14 +28,14 @@
 
 #define LAGRANGE_POLYS
 
-void beta_Lagrange_n1(int deriv, double x, double *poly_val);
-void beta_Lagrange_n2(int deriv, double x, double *poly_val);
-void beta_Lagrange_n3(int deriv, double x, double *poly_val);
-void beta_Lagrange_n4(int deriv, double x, double *poly_val);
-void beta_Lagrange_n5(int deriv, double x, double *poly_val);
-void beta_Lagrange_n6(int deriv, double x, double *poly_val);
-void beta_Lagrange_n7(int deriv, double x, double *poly_val);
-void beta_Lagrange_n8(int deriv, double x, double *poly_val);
+void beta_Lagrange_n1(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_Lagrange_n2(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_Lagrange_n3(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_Lagrange_n4(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_Lagrange_n5(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_Lagrange_n6(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_Lagrange_n7(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_Lagrange_n8(const int deriv, const double x, double *__restrict__ poly_val);
 
 #endif//LAGRANGE_POLYS
 
diff --git a/bfps/cpp/fftw_tools.cpp b/bfps/cpp/fftw_tools.cpp
index c9e685268c1fda92881798670933ca0c455f6420..f6eacbf1dfe2dfe31e603e9239c42d4639327d3d 100644
--- a/bfps/cpp/fftw_tools.cpp
+++ b/bfps/cpp/fftw_tools.cpp
@@ -28,9 +28,7 @@
 #include "base.hpp"
 #include "fftw_tools.hpp"
 
-//#ifdef NDEBUG
-//#undef NDEBUG
-//#endif//NDEBUG
+#define NDEBUG
 
 template <class rnumber>
 int clip_zero_padding(
diff --git a/bfps/cpp/field_descriptor.cpp b/bfps/cpp/field_descriptor.cpp
index 87f25a169accb92d185cbda8dbed4753846cba14..b5025835903a37ea5384cb4102c716f527aabfe5 100644
--- a/bfps/cpp/field_descriptor.cpp
+++ b/bfps/cpp/field_descriptor.cpp
@@ -22,10 +22,9 @@
 *                                                                     *
 **********************************************************************/
 
-// code is generally compiled via setuptools, therefore NDEBUG is present
-//#ifdef NDEBUG
-//#undef NDEBUG
-//#endif//NDEBUG
+
+
+#define NDEBUG
 
 #include <stdlib.h>
 #include <algorithm>
diff --git a/bfps/cpp/fluid_solver.cpp b/bfps/cpp/fluid_solver.cpp
index ce7230e5b91e04bce51fb3c2a58d6651941562ce..4cb096fa0cb695c05ff564bd3cca85bf93ee6100 100644
--- a/bfps/cpp/fluid_solver.cpp
+++ b/bfps/cpp/fluid_solver.cpp
@@ -22,10 +22,9 @@
 *                                                                     *
 **********************************************************************/
 
-// code is generally compiled via setuptools, therefore NDEBUG is present
-//#ifdef NDEBUG
-//#undef NDEBUG
-//#endif//NDEBUG
+
+
+#define NDEBUG
 
 #include <cassert>
 #include <cmath>
@@ -71,8 +70,8 @@ fluid_solver<R>::fluid_solver( \
     this->cvorticity = FFTW(alloc_complex)(this->cd->local_size);\
     this->cvelocity  = FFTW(alloc_complex)(this->cd->local_size);\
     this->rvorticity = FFTW(alloc_real)(this->cd->local_size*2);\
-    this->rvelocity  = (R*)(this->cvelocity);\
-    /*this->rvelocity  = FFTW(alloc_real)(this->cd->local_size*2);*/\
+    /*this->rvelocity  = (R*)(this->cvelocity);*/\
+    this->rvelocity  = FFTW(alloc_real)(this->cd->local_size*2);\
  \
     this->ru = this->rvelocity;\
     this->cu = this->cvelocity;\
@@ -156,6 +155,7 @@ fluid_solver<R>::fluid_solver( \
     /* initialization of fields must be done AFTER planning */ \
     std::fill_n((R*)this->cvorticity, this->cd->local_size*2, 0.0); \
     std::fill_n((R*)this->cvelocity, this->cd->local_size*2, 0.0); \
+    std::fill_n(this->rvelocity, this->cd->local_size*2, 0.0); \
     std::fill_n(this->rvorticity, this->cd->local_size*2, 0.0); \
     std::fill_n((R*)this->cv[1], this->cd->local_size*2, 0.0); \
     std::fill_n(this->rv[1], this->cd->local_size*2, 0.0); \
@@ -191,6 +191,7 @@ fluid_solver<R>::~fluid_solver() \
     FFTW(free)(this->cvorticity);\
     FFTW(free)(this->rvorticity);\
     FFTW(free)(this->cvelocity);\
+    FFTW(free)(this->rvelocity);\
     DEBUG_MSG("freed arrays\n"); \
     DEBUG_MSG("exiting ~fluid_solver\n"); \
 } \
@@ -200,6 +201,7 @@ void fluid_solver<R>::compute_vorticity() \
 { \
     ptrdiff_t tindex; \
     CLOOP_K2( \
+            this, \
             tindex = 3*cindex; \
             if (k2 <= this->kM2) \
             { \
@@ -221,6 +223,7 @@ void fluid_solver<R>::compute_velocity(FFTW(complex) *vorticity) \
 { \
     ptrdiff_t tindex; \
     CLOOP_K2( \
+            this, \
             tindex = 3*cindex; \
             if (k2 <= this->kM2 && k2 > 0) \
             { \
@@ -288,6 +291,7 @@ void fluid_solver<R>::add_forcing(\
     { \
         double knorm; \
         CLOOP( \
+                this, \
                 knorm = sqrt(this->kx[xindex]*this->kx[xindex] + \
                              this->ky[yindex]*this->ky[yindex] + \
                              this->kz[zindex]*this->kz[zindex]); \
@@ -328,6 +332,7 @@ void fluid_solver<R>::omega_nonlin( \
     this->dealias(this->cu, 3); \
     /* $\imath k \times Fourier(u \times \omega)$ */ \
     CLOOP( \
+            this, \
             tindex = 3*cindex; \
             { \
                 tmp[0][0] = -(this->ky[yindex]*this->cu[tindex+2][1] - this->kz[zindex]*this->cu[tindex+1][1]); \
@@ -351,6 +356,7 @@ void fluid_solver<R>::step(double dt) \
     std::fill_n((R*)this->cv[1], this->cd->local_size*2, 0.0); \
     this->omega_nonlin(0); \
     CLOOP_K2( \
+            this, \
             if (k2 <= this->kM2) \
             { \
                 factor0 = exp(-this->nu * k2 * dt); \
@@ -362,6 +368,7 @@ void fluid_solver<R>::step(double dt) \
  \
     this->omega_nonlin(1); \
     CLOOP_K2( \
+            this, \
             if (k2 <= this->kM2) \
             { \
                 factor0 = exp(-this->nu * k2 * dt/2); \
@@ -375,6 +382,7 @@ void fluid_solver<R>::step(double dt) \
  \
     this->omega_nonlin(2); \
     CLOOP_K2( \
+            this, \
             if (k2 <= this->kM2) \
             { \
                 factor0 = exp(-this->nu * k2 * dt * 0.5); \
@@ -459,11 +467,13 @@ void fluid_solver<R>::compute_Eulerian_acceleration(R *acceleration) \
 { \
     this->omega_nonlin(0); \
     CLOOP_K2( \
+            this, \
             for (int cc=0; cc<3; cc++) \
                 for (int i=0; i<2; i++) \
                     this->cv[1][3*cindex+cc][i] = this->cu[3*cindex+cc][i] - this->nu*k2*this->cv[0][3*cindex+cc][i]; \
             ); \
     CLOOP_K2( \
+            this, \
             if (k2 <= this->kM2 && k2 > 0) \
             { \
                 this->cv[2][3*cindex+0][0] = -(this->ky[yindex]*this->cv[1][3*cindex+2][1] - this->kz[zindex]*this->cv[1][3*cindex+1][1]) / k2; \
@@ -500,6 +510,7 @@ void fluid_solver<R>::compute_pressure(FFTW(complex) *pressure) \
     FFTW(execute)(*((FFTW(plan)*)this->vr2c[1])); \
     this->dealias(this->cv[1], 3); \
     CLOOP_K2( \
+            this, \
             if (k2 <= this->kM2 && k2 > 0) \
             { \
                 tindex = 3*cindex; \
@@ -524,6 +535,7 @@ void fluid_solver<R>::compute_pressure(FFTW(complex) *pressure) \
     FFTW(execute)(*((FFTW(plan)*)this->vr2c[1])); \
     this->dealias(this->cv[1], 3); \
     CLOOP_K2( \
+            this, \
             if (k2 <= this->kM2 && k2 > 0) \
             { \
                 tindex = 3*cindex; \
@@ -539,41 +551,23 @@ void fluid_solver<R>::compute_pressure(FFTW(complex) *pressure) \
 } \
  \
 template<> \
-void fluid_solver<R>::compute_vel_gradient(FFTW(complex) *A) \
+void fluid_solver<R>::compute_gradient_statistics( \
+        FFTW(complex) *vec, \
+        double *gradu_moments, \
+        double *trS2QR_moments, \
+        ptrdiff_t *gradu_hist, \
+        ptrdiff_t *trS2QR_hist, \
+        ptrdiff_t *QR2D_hist, \
+        double trS2QR_max_estimates[], \
+        double gradu_max_estimates[], \
+        int nbins, \
+        int QR2D_nbins) \
 { \
-    ptrdiff_t tindex; \
-    this->compute_velocity(this->cvorticity); \
-    std::fill_n((R*)A, 3*2*this->cd->local_size, 0.0); \
-    FFTW(complex) *dx_u, *dy_u, *dz_u; \
-    dx_u = A; \
-    dy_u = A + this->cd->local_size; \
-    dz_u = A + 2*this->cd->local_size; \
-    CLOOP_K2( \
-            if (k2 <= this->kM2) \
-            { \
-                tindex = 3*cindex; \
-                for (int cc=0; cc<3; cc++) \
-                { \
-                    dx_u[tindex + cc][0] = -this->kx[xindex]*this->cu[tindex+cc][1]; \
-                    dx_u[tindex + cc][1] =  this->kx[xindex]*this->cu[tindex+cc][0]; \
-                    dy_u[tindex + cc][0] = -this->ky[yindex]*this->cu[tindex+cc][1]; \
-                    dy_u[tindex + cc][1] =  this->ky[yindex]*this->cu[tindex+cc][0]; \
-                    dz_u[tindex + cc][0] = -this->kz[zindex]*this->cu[tindex+cc][1]; \
-                    dz_u[tindex + cc][1] =  this->kz[zindex]*this->cu[tindex+cc][0]; \
-                } \
-            } \
-            ); \
-} \
- \
-template<> \
-void fluid_solver<R>::compute_trS2(R *trS2) \
-{ \
-    ptrdiff_t tindex; \
     FFTW(complex) *ca; \
     R *ra; \
     ca = FFTW(alloc_complex)(this->cd->local_size*3); \
     ra = (R*)(ca); \
-    this->compute_vel_gradient(ca); \
+    this->compute_vector_gradient(ca, vec); \
     for (int cc=0; cc<3; cc++) \
     { \
         std::copy( \
@@ -587,20 +581,93 @@ void fluid_solver<R>::compute_trS2(R *trS2) \
                 ra + cc*this->cd->local_size*2); \
     } \
     /* velocity gradient is now stored, in real space, in ra */ \
+    std::fill_n(this->rv[1], 2*this->cd->local_size, 0.0); \
     R *dx_u, *dy_u, *dz_u; \
     dx_u = ra; \
     dy_u = ra + 2*this->cd->local_size; \
     dz_u = ra + 4*this->cd->local_size; \
+    double binsize[2]; \
+    double tmp_max_estimate[3]; \
+    tmp_max_estimate[0] = trS2QR_max_estimates[0]; \
+    tmp_max_estimate[1] = trS2QR_max_estimates[1]; \
+    tmp_max_estimate[2] = trS2QR_max_estimates[2]; \
+    binsize[0] = 2*tmp_max_estimate[2] / QR2D_nbins; \
+    binsize[1] = 2*tmp_max_estimate[1] / QR2D_nbins; \
+    ptrdiff_t *local_hist = new ptrdiff_t[QR2D_nbins*QR2D_nbins]; \
+    std::fill_n(local_hist, QR2D_nbins*QR2D_nbins, 0); \
     RLOOP( \
             this, \
-            tindex = 3*rindex; \
-            trS2[rindex] = (dx_u[tindex+0]*dx_u[tindex+0] + \
-                            dy_u[tindex+1]*dy_u[tindex+1] + \
-                            dz_u[tindex+2]*dz_u[tindex+2] + \
-                          ((dx_u[tindex+1]+dy_u[tindex+0])*(dx_u[tindex+1]+dy_u[tindex+0]) + \
-                           (dy_u[tindex+2]+dz_u[tindex+1])*(dy_u[tindex+2]+dz_u[tindex+1]) + \
-                           (dz_u[tindex+0]+dx_u[tindex+2])*(dz_u[tindex+0]+dx_u[tindex+2]))/2) / this->normalization_factor; \
+            R AxxAxx; \
+            R AyyAyy; \
+            R AzzAzz; \
+            R AxyAyx; \
+            R AyzAzy; \
+            R AzxAxz; \
+            R Sxy; \
+            R Syz; \
+            R Szx; \
+            ptrdiff_t tindex = 3*rindex; \
+            AxxAxx = dx_u[tindex+0]*dx_u[tindex+0]; \
+            AyyAyy = dy_u[tindex+1]*dy_u[tindex+1]; \
+            AzzAzz = dz_u[tindex+2]*dz_u[tindex+2]; \
+            AxyAyx = dx_u[tindex+1]*dy_u[tindex+0]; \
+            AyzAzy = dy_u[tindex+2]*dz_u[tindex+1]; \
+            AzxAxz = dz_u[tindex+0]*dx_u[tindex+2]; \
+            this->rv[1][tindex+1] = - (AxxAxx + AyyAyy + AzzAzz)/2 - AxyAyx - AyzAzy - AzxAxz; \
+            this->rv[1][tindex+2] = - (dx_u[tindex+0]*(AxxAxx/3 + AxyAyx + AzxAxz) + \
+                                       dy_u[tindex+1]*(AyyAyy/3 + AxyAyx + AyzAzy) + \
+                                       dz_u[tindex+2]*(AzzAzz/3 + AzxAxz + AyzAzy) + \
+                                       dx_u[tindex+1]*dy_u[tindex+2]*dz_u[tindex+0] + \
+                                       dx_u[tindex+2]*dy_u[tindex+0]*dz_u[tindex+1]); \
+            int bin0 = int(floor((this->rv[1][tindex+2] + tmp_max_estimate[2]) / binsize[0])); \
+            int bin1 = int(floor((this->rv[1][tindex+1] + tmp_max_estimate[1]) / binsize[1])); \
+            if ((bin0 >= 0 && bin0 < QR2D_nbins) && \
+                (bin1 >= 0 && bin1 < QR2D_nbins)) \
+                local_hist[bin1*QR2D_nbins + bin0]++; \
+            Sxy = dx_u[tindex+1]+dy_u[tindex+0]; \
+            Syz = dy_u[tindex+2]+dz_u[tindex+1]; \
+            Szx = dz_u[tindex+0]+dx_u[tindex+2]; \
+            this->rv[1][tindex] = (AxxAxx + AyyAyy + AzzAzz + \
+                                   (Sxy*Sxy + Syz*Syz + Szx*Szx)/2); \
             ); \
+    MPI_Allreduce( \
+            local_hist, \
+            QR2D_hist, \
+            QR2D_nbins * QR2D_nbins, \
+            MPI_INT64_T, MPI_SUM, this->cd->comm); \
+    delete[] local_hist; \
+    this->compute_rspace_stats3( \
+            this->rv[1], \
+            trS2QR_moments, \
+            trS2QR_hist, \
+            tmp_max_estimate, \
+            nbins); \
+    double *tmp_moments = new double[10*3]; \
+    ptrdiff_t *tmp_hist = new ptrdiff_t[nbins*3]; \
+    for (int cc=0; cc<3; cc++) \
+    { \
+        tmp_max_estimate[0] = gradu_max_estimates[cc*3 + 0]; \
+        tmp_max_estimate[1] = gradu_max_estimates[cc*3 + 1]; \
+        tmp_max_estimate[2] = gradu_max_estimates[cc*3 + 2]; \
+        this->compute_rspace_stats3( \
+                dx_u, \
+                tmp_moments, \
+                tmp_hist, \
+                tmp_max_estimate, \
+                nbins); \
+        for (int n = 0; n < 10; n++) \
+        for (int i = 0; i < 3 ; i++) \
+        { \
+            gradu_moments[(n*3 + cc)*3 + i] = tmp_moments[n*3 + i]; \
+        } \
+        for (int n = 0; n < nbins; n++) \
+        for (int i = 0; i < 3; i++) \
+        { \
+            gradu_hist[(n*3 + cc)*3 + i] = tmp_hist[n*3 + i]; \
+        } \
+    } \
+    delete[] tmp_moments; \
+    delete[] tmp_hist; \
     FFTW(free)(ca); \
 } \
  \
@@ -612,34 +679,11 @@ void fluid_solver<R>::compute_Lagrangian_acceleration(R *acceleration) \
     pressure = FFTW(alloc_complex)(this->cd->local_size/3); \
     this->compute_velocity(this->cvorticity); \
     this->ift_velocity(); \
-    /*clip_zero_padding<R>(this->rd, this->rvelocity, 3); \
-    std::copy( \
-            this->rvelocity, \
-            this->rvelocity + this->rd->local_size, \
-            acceleration); \
-    return; */\
     this->compute_pressure(pressure); \
-    /*this->compute_trS2(this->ru); \
-    RLOOP( \
-            this, \
-            tindex = 3*rindex; \
-            this->rv[1][tindex+0] = this->ru[rindex]; \
-            this->rv[1][tindex+1] = (this->rv[0][tindex+0]*this->rv[0][tindex+0] + \
-                                     this->rv[0][tindex+1]*this->rv[0][tindex+1] + \
-                                     this->rv[0][tindex+2]*this->rv[0][tindex+2])/(2*this->normalization_factor); \
-             ); \
-    FFTW(execute)(*((FFTW(plan)*)this->vr2c[1])); \
-    CLOOP_K2( \
-            if (k2 < this->kM2 && k2 > 0) \
-            { \
-            tindex = 3*cindex; \
-            pressure[cindex][0] = (this->cv[1][tindex+0][0] - this->cv[1][tindex+1][0]) / k2; \
-            pressure[cindex][1] = (this->cv[1][tindex+0][1] - this->cv[1][tindex+1][1]) / k2; \
-            } \
-            );*/ \
     this->compute_velocity(this->cvorticity); \
     std::fill_n((R*)this->cv[1], 2*this->cd->local_size, 0.0); \
     CLOOP_K2( \
+            this, \
             if (k2 <= this->kM2) \
             { \
                 tindex = 3*cindex; \
@@ -668,51 +712,6 @@ void fluid_solver<R>::compute_Lagrangian_acceleration(R *acceleration) \
             this->rv[1], \
             this->rv[1] + 2*this->cd->local_size, \
             acceleration); \
-    /* ********* */ \
-    /* debugging */ \
-    /* check k2*p = -(trS2 - vorticity^2/2) */ \
-    /*this->compute_trS2(this->ru); \
-    RLOOP( \
-            this, \
-            tindex = 3*rindex; \
-            this->rv[1][tindex+0] = this->ru[rindex]; \
-            this->rv[1][tindex+1] = (this->rv[0][tindex+0]*this->rv[0][tindex+0] + \
-                                     this->rv[0][tindex+1]*this->rv[0][tindex+1] + \
-                                     this->rv[0][tindex+2]*this->rv[0][tindex+2])/(2*this->normalization_factor); \
-             ); \
-    this->dealias(this->cu, 3); \
-    FFTW(execute)(*((FFTW(plan)*)this->vr2c[1])); \
-    CLOOP_K2( \
-            if (k2 < this->kM2 && k2 > 0) \
-            { \
-                tindex = 3*cindex; \
-                this->cv[1][tindex+2][0] = k2*pressure[cindex][0]; \
-                this->cv[1][tindex+2][1] = k2*pressure[cindex][1]; \
-            } \
-            );*/ \
-    /*char fname[256]; \
-    this->fill_up_filename("pressure_trS2_enstrophy", fname); \
-    this->cd->write(fname, this->cv[1]);*/ \
-    /*double difference = 0.0; \
-    double itval, rtval; \
-    CLOOP_K2_NXMODES( \
-            if (k2 < this->kM2 && k2 > 0) \
-            { \
-            tindex = 3*cindex; \
-            rtval = k2*pressure[cindex][0] - this->cv[1][tindex+0][0] + this->cv[1][tindex+1][0]; \
-            itval = k2*pressure[cindex][1] - this->cv[1][tindex+0][1] + this->cv[1][tindex+1][1]; \
-            difference += nxmodes*(itval*itval + rtval*rtval); \
-            } \
-            ); \
-    itval = difference; \
-    MPI_Allreduce( \
-            &itval, \
-            &difference, \
-            1, \
-            MPI_DOUBLE, MPI_SUM, this->cd->comm); \
-    if (myrank == 0) DEBUG_MSG("difference is %g\n", difference);*/ \
-    /* debugging */ \
-    /* ********* */ \
     FFTW(free)(pressure); \
 } \
 
diff --git a/bfps/cpp/fluid_solver.hpp b/bfps/cpp/fluid_solver.hpp
index 9eead599b7421dbbe8171139d1533c4e9af2b096..cdec361989b3d5f5da76115dedfcb4be39c62c6f 100644
--- a/bfps/cpp/fluid_solver.hpp
+++ b/bfps/cpp/fluid_solver.hpp
@@ -82,13 +82,23 @@ class fluid_solver:public fluid_solver_base<rnumber>
                 unsigned FFTW_PLAN_RIGOR = FFTW_MEASURE);
         ~fluid_solver(void);
 
+        void compute_gradient_statistics(
+                rnumber (*__restrict__ vec)[2],
+                double *__restrict__ gradu_moments,
+                double *__restrict__ trS2_Q_R_moments,
+                ptrdiff_t *__restrict__ gradu_histograms,
+                ptrdiff_t *__restrict__ trS2_Q_R_histograms,
+                ptrdiff_t *__restrict__ QR2D_histogram,
+                double trS2_Q_R_max_estimates[3],
+                double gradu_max_estimates[9],
+                const int nbins_1D = 256,
+                const int nbins_2D = 64);
+
         void compute_vorticity(void);
-        void compute_velocity(rnumber (*vorticity)[2]);
-        void compute_pressure(rnumber (*pressure)[2]);
-        void compute_vel_gradient(rnumber (*A)[2]);
-        void compute_trS2(rnumber *trS2);
-        void compute_Eulerian_acceleration(rnumber *dst);
-        void compute_Lagrangian_acceleration(rnumber *dst);
+        void compute_velocity(rnumber (*__restrict__ vorticity)[2]);
+        void compute_pressure(rnumber (*__restrict__ pressure)[2]);
+        void compute_Eulerian_acceleration(rnumber *__restrict__ dst);
+        void compute_Lagrangian_acceleration(rnumber *__restrict__ dst);
         void ift_velocity();
         void dft_velocity();
         void ift_vorticity();
@@ -96,7 +106,7 @@ class fluid_solver:public fluid_solver_base<rnumber>
         void omega_nonlin(int src);
         void step(double dt);
         void impose_zero_modes(void);
-        void add_forcing(rnumber (*acc_field)[2], rnumber (*vort_field)[2], rnumber factor);
+        void add_forcing(rnumber (*__restrict__ acc_field)[2], rnumber (*__restrict__ vort_field)[2], rnumber factor);
 
         int read(char field, char representation);
         int write(char field, char representation);
diff --git a/bfps/cpp/fluid_solver_base.cpp b/bfps/cpp/fluid_solver_base.cpp
index bbdccad8f4b386007e42907bbe8ed5317c9728bd..5e2d93d76108e19dea050a22ede6a914de3cc28c 100644
--- a/bfps/cpp/fluid_solver_base.cpp
+++ b/bfps/cpp/fluid_solver_base.cpp
@@ -22,10 +22,9 @@
 *                                                                     *
 **********************************************************************/
 
-// code is generally compiled via setuptools, therefore NDEBUG is present
-//#ifdef NDEBUG
-//#undef NDEBUG
-//#endif//NDEBUG
+
+
+//#define NDEBUG
 
 #include <cassert>
 #include <cmath>
@@ -70,7 +69,8 @@ void fluid_solver_base<rnumber>::cospectrum(cnumber *a, cnumber *b, double *spec
     std::fill_n(cospec_local, this->nshells*9, 0);
     int tmp_int;
     CLOOP_K2_NXMODES(
-            if (k2 <= this->kM2)
+            this,
+            if (k2 <= this->kMspec2)
             {
                 tmp_int = int(sqrt(k2)/this->dk)*9;
                 for (int i=0; i<3; i++)
@@ -98,7 +98,8 @@ void fluid_solver_base<rnumber>::cospectrum(cnumber *a, cnumber *b, double *spec
     double factor = 1;
     int tmp_int;
     CLOOP_K2_NXMODES(
-            if (k2 <= this->kM2)
+            this,
+            if (k2 <= this->kMspec2)
             {
                 factor = nxmodes*pow(k2, k2exponent);
                 tmp_int = int(sqrt(k2)/this->dk)*9;
@@ -126,75 +127,80 @@ void fluid_solver_base<rnumber>::cospectrum(cnumber *a, cnumber *b, double *spec
 }
 
 template <class rnumber>
+template<int nvals>
 void fluid_solver_base<rnumber>::compute_rspace_stats(
         rnumber *a,
         double *moments,
         ptrdiff_t *hist,
-        double max_estimate,
-        int nbins)
+        double max_estimate[],
+        const int nbins)
 {
-    double *local_moments = fftw_alloc_real(10*4);
-    double val_tmp[4], binsize, pow_tmp[4];
-    ptrdiff_t *local_hist = new ptrdiff_t[nbins*4];
+    double *local_moments = fftw_alloc_real(10*nvals);
+    double val_tmp[nvals], binsize[nvals], pow_tmp[nvals];
+    ptrdiff_t *local_hist = new ptrdiff_t[nbins*nvals];
     int bin;
-    binsize = 2*max_estimate / nbins;
-    std::fill_n(local_hist, nbins*4, 0);
-    std::fill_n(local_moments, 10*4, 0);
-    local_moments[3] = max_estimate;
+    for (int i=0; i<nvals; i++)
+        binsize[i] = 2*max_estimate[i] / nbins;
+    std::fill_n(local_hist, nbins*nvals, 0);
+    std::fill_n(local_moments, 10*nvals, 0);
+    if (nvals == 4) local_moments[3] = max_estimate[3];
     RLOOP(
         this,
-        std::fill_n(pow_tmp, 4, 1.0);
-        val_tmp[3] = 0.0;
+        std::fill_n(pow_tmp, nvals, 1.0);
+        if (nvals == 4) val_tmp[3] = 0.0;
         for (int i=0; i<3; i++)
         {
             val_tmp[i] = a[rindex*3+i];
-            val_tmp[3] += val_tmp[i]*val_tmp[i];
+            if (nvals == 4) val_tmp[3] += val_tmp[i]*val_tmp[i];
+        }
+        if (nvals == 4)
+        {
+            val_tmp[3] = sqrt(val_tmp[3]);
+            if (val_tmp[3] < local_moments[0*nvals+3])
+                local_moments[0*nvals+3] = val_tmp[3];
+            if (val_tmp[3] > local_moments[9*nvals+3])
+                local_moments[9*nvals+3] = val_tmp[3];
+            bin = int(floor(val_tmp[3]*2/binsize[3]));
+            if (bin >= 0 && bin < nbins)
+                local_hist[bin*nvals+3]++;
         }
-        val_tmp[3] = sqrt(val_tmp[3]);
-        if (val_tmp[3] < local_moments[0*4+3])
-            local_moments[0*4+3] = val_tmp[3];
-        if (val_tmp[3] > local_moments[9*4+3])
-            local_moments[9*4+3] = val_tmp[3];
-        bin = int(val_tmp[3]*2/binsize);
-        if (bin >= 0 && bin < nbins)
-            local_hist[bin*4+3]++;
         for (int i=0; i<3; i++)
         {
-            if (val_tmp[i] < local_moments[0*4+i])
-                local_moments[0*4+i] = val_tmp[i];
-            if (val_tmp[i] > local_moments[9*4+i])
-                local_moments[9*4+i] = val_tmp[i];
-            bin = int((val_tmp[i] + max_estimate) / binsize);
+            if (val_tmp[i] < local_moments[0*nvals+i])
+                local_moments[0*nvals+i] = val_tmp[i];
+            if (val_tmp[i] > local_moments[9*nvals+i])
+                local_moments[9*nvals+i] = val_tmp[i];
+            bin = int(floor((val_tmp[i] + max_estimate[i]) / binsize[i]));
             if (bin >= 0 && bin < nbins)
-                local_hist[bin*4+i]++;
+                local_hist[bin*nvals+i]++;
         }
         for (int n=1; n<9; n++)
-            for (int i=0; i<4; i++)
-                local_moments[n*4 + i] += (pow_tmp[i] = val_tmp[i]*pow_tmp[i]);
+            for (int i=0; i<nvals; i++)
+                local_moments[n*nvals + i] += (pow_tmp[i] = val_tmp[i]*pow_tmp[i]);
         );
     MPI_Allreduce(
             (void*)local_moments,
             (void*)moments,
-            4,
+            nvals,
             MPI_DOUBLE, MPI_MIN, this->cd->comm);
     MPI_Allreduce(
-            (void*)(local_moments + 4),
-            (void*)(moments+4),
-            8*4,
+            (void*)(local_moments + nvals),
+            (void*)(moments+nvals),
+            8*nvals,
             MPI_DOUBLE, MPI_SUM, this->cd->comm);
     MPI_Allreduce(
-            (void*)(local_moments + 9*4),
-            (void*)(moments+9*4),
-            4,
+            (void*)(local_moments + 9*nvals),
+            (void*)(moments+9*nvals),
+            nvals,
             MPI_DOUBLE, MPI_MAX, this->cd->comm);
     MPI_Allreduce(
             (void*)local_hist,
             (void*)hist,
-            nbins*4,
+            nbins*nvals,
             MPI_INT64_T, MPI_SUM, this->cd->comm);
     for (int n=1; n<9; n++)
-        for (int i=0; i<4; i++)
-            moments[n*4 + i] /= this->normalization_factor;
+        for (int i=0; i<nvals; i++)
+            moments[n*nvals + i] /= this->normalization_factor;
     fftw_free(local_moments);
     delete[] local_hist;
 }
@@ -265,9 +271,9 @@ fluid_solver_base<R>::fluid_solver_base( \
     { \
         /* HL07 smooth filter */ \
         case 1: \
-            this->kMx = this->dkx*(int(this->rd->sizes[2] / 2)); \
-            this->kMy = this->dky*(int(this->rd->sizes[1] / 2)); \
-            this->kMz = this->dkz*(int(this->rd->sizes[0] / 2)); \
+            this->kMx = this->dkx*(int(this->rd->sizes[2] / 2)-1); \
+            this->kMy = this->dky*(int(this->rd->sizes[1] / 2)-1); \
+            this->kMz = this->dkz*(int(this->rd->sizes[0] / 2)-1); \
             break; \
         default: \
             this->kMx = this->dkx*(int(this->rd->sizes[2] / 3)-1); \
@@ -296,6 +302,8 @@ fluid_solver_base<R>::fluid_solver_base( \
     if (this->kM < this->kMy) this->kM = this->kMy; \
     if (this->kM < this->kMz) this->kM = this->kMz; \
     this->kM2 = this->kM * this->kM; \
+    this->kMspec = this->kM; \
+    this->kMspec2 = this->kM2; \
     this->dk = this->dkx; \
     if (this->dk > this->dky) this->dk = this->dky; \
     if (this->dk > this->dkz) this->dk = this->dkz; \
@@ -304,7 +312,10 @@ fluid_solver_base<R>::fluid_solver_base( \
             "kM = %g, kM2 = %g, dk = %g, dk2 = %g\n", \
             this->kM, this->kM2, this->dk, this->dk2); \
     /* spectra stuff */ \
-    this->nshells = int(this->kM / this->dk) + 2; \
+    this->nshells = int(this->kMspec / this->dk) + 2; \
+    DEBUG_MSG( \
+            "kMspec = %g, kMspec2 = %g, nshells = %ld\n", \
+            this->kMspec, this->kMspec2, this->nshells); \
     this->kshell = new double[this->nshells]; \
     std::fill_n(this->kshell, this->nshells, 0.0); \
     this->nshell = new int64_t[this->nshells]; \
@@ -315,6 +326,7 @@ fluid_solver_base<R>::fluid_solver_base( \
     std::fill_n(nshell_local, this->nshells, 0.0); \
     double knorm; \
     CLOOP_K2_NXMODES( \
+            this, \
             if (k2 < this->kM2) \
             { \
                 knorm = sqrt(k2); \
@@ -345,7 +357,6 @@ fluid_solver_base<R>::fluid_solver_base( \
 template<> \
 fluid_solver_base<R>::~fluid_solver_base() \
 { \
-    DEBUG_MSG("entered ~fluid_solver_base\n"); \
     delete[] this->kshell; \
     delete[] this->nshell; \
  \
@@ -355,7 +366,6 @@ fluid_solver_base<R>::~fluid_solver_base() \
  \
     delete this->cd; \
     delete this->rd; \
-    DEBUG_MSG("exiting ~fluid_solver_base\n"); \
 } \
  \
 template<> \
@@ -365,6 +375,7 @@ void fluid_solver_base<R>::low_pass_Fourier(FFTW(complex) *a, const int howmany,
     const int howmany2 = 2*howmany; \
     /*DEBUG_MSG("entered low_pass_Fourier, kmax=%lg km2=%lg howmany2=%d\n", kmax, km2, howmany2);*/ \
     CLOOP_K2( \
+            this, \
             /*DEBUG_MSG("kx=%lg ky=%lg kz=%lg k2=%lg\n", \
                       this->kx[xindex], \
                       this->ky[yindex], \
@@ -385,6 +396,7 @@ void fluid_solver_base<R>::dealias(FFTW(complex) *a, const int howmany) \
         } \
     double tval; \
     CLOOP_K2( \
+            this, \
             tval = this->Fourier_filter[int(round(k2/this->dk2))]; \
             for (int tcounter = 0; tcounter < howmany; tcounter++) \
             for (int i=0; i<2; i++) \
@@ -397,6 +409,7 @@ void fluid_solver_base<R>::force_divfree(FFTW(complex) *a) \
 { \
     FFTW(complex) tval; \
     CLOOP_K2( \
+            this, \
             if (k2 > 0) \
             { \
                 tval[0] = (this->kx[xindex]*((*(a + cindex*3  ))[0]) + \
@@ -418,6 +431,33 @@ void fluid_solver_base<R>::force_divfree(FFTW(complex) *a) \
 } \
  \
 template<> \
+void fluid_solver_base<R>::compute_vector_gradient(FFTW(complex) *A, FFTW(complex) *cvec) \
+{ \
+    ptrdiff_t tindex; \
+    std::fill_n((R*)A, 3*2*this->cd->local_size, 0.0); \
+    FFTW(complex) *dx_u, *dy_u, *dz_u; \
+    dx_u = A; \
+    dy_u = A + this->cd->local_size; \
+    dz_u = A + 2*this->cd->local_size; \
+    CLOOP_K2( \
+            this, \
+            if (k2 <= this->kM2) \
+            { \
+                tindex = 3*cindex; \
+                for (int cc=0; cc<3; cc++) \
+                { \
+                    dx_u[tindex + cc][0] = -this->kx[xindex]*cvec[tindex+cc][1]; \
+                    dx_u[tindex + cc][1] =  this->kx[xindex]*cvec[tindex+cc][0]; \
+                    dy_u[tindex + cc][0] = -this->ky[yindex]*cvec[tindex+cc][1]; \
+                    dy_u[tindex + cc][1] =  this->ky[yindex]*cvec[tindex+cc][0]; \
+                    dz_u[tindex + cc][0] = -this->kz[zindex]*cvec[tindex+cc][1]; \
+                    dz_u[tindex + cc][1] =  this->kz[zindex]*cvec[tindex+cc][0]; \
+                } \
+            } \
+            ); \
+} \
+ \
+template<> \
 void fluid_solver_base<R>::symmetrize(FFTW(complex) *data, const int howmany) \
 { \
     ptrdiff_t ii, cc; \
@@ -450,7 +490,6 @@ void fluid_solver_base<R>::symmetrize(FFTW(complex) *data, const int howmany) \
                         (*(data + howmany*((yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[imag_comp]; \
         if (ranksrc != rankdst) \
         { \
-            DEBUG_MSG("inside fluid_solver_base::symmetrize, about to send/recv data\n"); \
             if (this->cd->myrank == ranksrc) \
                 MPI_Send((void*)buffer, \
                          howmany*this->cd->sizes[1], MPI_CNUM, rankdst, yy, \
@@ -459,7 +498,6 @@ void fluid_solver_base<R>::symmetrize(FFTW(complex) *data, const int howmany) \
                 MPI_Recv((void*)buffer, \
                          howmany*this->cd->sizes[1], MPI_CNUM, ranksrc, yy, \
                          this->cd->comm, mpistatus); \
-            DEBUG_MSG("inside fluid_solver_base::symmetrize, after send/recv data\n"); \
         } \
         if (this->cd->myrank == rankdst) \
         { \
@@ -524,7 +562,10 @@ int fluid_solver_base<R>::write_base(const char *fname, FFTW(complex) *data) \
     char full_name[512]; \
     sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); \
     return this->cd->write(full_name, (void*)data); \
-}
+} \
+ \
+/* finally, force generation of code                                         */ \
+template class fluid_solver_base<R>; \
 
 /*****************************************************************************/
 
@@ -544,11 +585,3 @@ FLUID_SOLVER_BASE_DEFINITIONS(
         BFPS_MPICXX_DOUBLE_COMPLEX)
 /*****************************************************************************/
 
-
-
-/*****************************************************************************/
-/* finally, force generation of code for single precision                    */
-template class fluid_solver_base<float>;
-template class fluid_solver_base<double>;
-/*****************************************************************************/
-
diff --git a/bfps/cpp/fluid_solver_base.hpp b/bfps/cpp/fluid_solver_base.hpp
index 64659a0488ffc576b3959970e002521dde28d8eb..26ae5ded6399e1d3ac36c760f2ea53b2e10f011c 100644
--- a/bfps/cpp/fluid_solver_base.hpp
+++ b/bfps/cpp/fluid_solver_base.hpp
@@ -62,6 +62,7 @@ class fluid_solver_base
         /* mode and dealiasing information */
         int dealias_type;
         double kMx, kMy, kMz, kM, kM2;
+        double kMspec, kMspec2;
         double *kx, *ky, *kz;
         std::unordered_map<int, double> Fourier_filter;
         double *kshell;
@@ -82,19 +83,37 @@ class fluid_solver_base
                 unsigned FFTW_PLAN_RIGOR = FFTW_ESTIMATE);
         ~fluid_solver_base();
 
-        void low_pass_Fourier(cnumber *a, int howmany, double kmax);
-        void dealias(cnumber *a, int howmany);
-        void force_divfree(cnumber *a);
-        void symmetrize(cnumber *a, int howmany);
-        void clean_up_real_space(rnumber *a, int howmany);
-        void cospectrum(cnumber *a, cnumber *b, double *spec);
-        void cospectrum(cnumber *a, cnumber *b, double *spec, const double k2exponent);
-        double autocorrel(cnumber *a);
-        void compute_rspace_stats(rnumber *a,
-                                  double *moments,
-                                  ptrdiff_t *hist,
-                                  double max_estimate = 1.0,
-                                  int nbins = 256);
+        void low_pass_Fourier(cnumber *__restrict__ a, int howmany, double kmax);
+        void dealias(cnumber *__restrict__ a, int howmany);
+        void force_divfree(cnumber *__restrict__ a);
+        void symmetrize(cnumber *__restrict__ a, int howmany);
+        void clean_up_real_space(rnumber *__restrict__ a, int howmany);
+        void cospectrum(cnumber *__restrict__ a, cnumber *__restrict__ b, double *__restrict__ spec);
+        void cospectrum(cnumber *__restrict__ a, cnumber *__restrict__ b, double *__restrict__ spec, const double k2exponent);
+        double autocorrel(cnumber *__restrict__ a);
+        template <int nvals>
+        void compute_rspace_stats(rnumber *__restrict__ a,
+                                  double *__restrict__ moments,
+                                  ptrdiff_t *__restrict__ hist,
+                                  double max_estimate[nvals],
+                                  const int nbins = 256);
+        inline void compute_rspace_stats3(rnumber *__restrict__ a,
+                                  double *__restrict__ moments,
+                                  ptrdiff_t *__restrict__ hist,
+                                  double max_estimate[3],
+                                  const int nbins = 256)
+        {
+            this->compute_rspace_stats<3>(a, moments, hist, max_estimate, nbins);
+        }
+        inline void compute_rspace_stats4(rnumber *__restrict__ a,
+                                  double *__restrict__ moments,
+                                  ptrdiff_t *__restrict__ hist,
+                                  double max_estimate[4],
+                                  const int nbins = 256)
+        {
+            this->compute_rspace_stats<4>(a, moments, hist, max_estimate, nbins);
+        }
+        void compute_vector_gradient(rnumber (*__restrict__ A)[2], rnumber(*__restrict__ source)[2]);
         void write_spectrum(const char *fname, cnumber *a, const double k2exponent = 0.0);
         void fill_up_filename(const char *base_name, char *full_name);
         int read_base(const char *fname, rnumber *data);
@@ -109,32 +128,32 @@ class fluid_solver_base
 /* macros for loops                                                          */
 
 /* Fourier space loop */
-#define CLOOP(expression) \
+#define CLOOP(obj, expression) \
  \
 { \
     ptrdiff_t cindex = 0; \
-    for (ptrdiff_t yindex = 0; yindex < this->cd->subsizes[0]; yindex++) \
-    for (ptrdiff_t zindex = 0; zindex < this->cd->subsizes[1]; zindex++) \
-    for (ptrdiff_t xindex = 0; xindex < this->cd->subsizes[2]; xindex++) \
+    for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++) \
+    for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) \
+    for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++) \
         { \
             expression; \
             cindex++; \
         } \
 }
 
-#define CLOOP_NXMODES(expression) \
+#define CLOOP_NXMODES(obj, expression) \
  \
 { \
     ptrdiff_t cindex = 0; \
-    for (ptrdiff_t yindex = 0; yindex < this->cd->subsizes[0]; yindex++) \
-    for (ptrdiff_t zindex = 0; zindex < this->cd->subsizes[1]; zindex++) \
+    for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++) \
+    for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) \
     { \
         int nxmodes = 1; \
         ptrdiff_t xindex = 0; \
         expression; \
         cindex++; \
         nxmodes = 2; \
-    for (xindex = 1; xindex < this->cd->subsizes[2]; xindex++) \
+    for (xindex = 1; xindex < obj->cd->subsizes[2]; xindex++) \
         { \
             expression; \
             cindex++; \
@@ -142,44 +161,44 @@ class fluid_solver_base
     } \
 }
 
-#define CLOOP_K2(expression) \
+#define CLOOP_K2(obj, expression) \
  \
 { \
     double k2; \
     ptrdiff_t cindex = 0; \
-    for (ptrdiff_t yindex = 0; yindex < this->cd->subsizes[0]; yindex++) \
-    for (ptrdiff_t zindex = 0; zindex < this->cd->subsizes[1]; zindex++) \
-    for (ptrdiff_t xindex = 0; xindex < this->cd->subsizes[2]; xindex++) \
+    for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++) \
+    for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) \
+    for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++) \
         { \
-            k2 = (this->kx[xindex]*this->kx[xindex] + \
-                  this->ky[yindex]*this->ky[yindex] + \
-                  this->kz[zindex]*this->kz[zindex]); \
+            k2 = (obj->kx[xindex]*obj->kx[xindex] + \
+                  obj->ky[yindex]*obj->ky[yindex] + \
+                  obj->kz[zindex]*obj->kz[zindex]); \
             expression; \
             cindex++; \
         } \
 }
 
-#define CLOOP_K2_NXMODES(expression) \
+#define CLOOP_K2_NXMODES(obj, expression) \
  \
 { \
     double k2; \
     ptrdiff_t cindex = 0; \
-    for (ptrdiff_t yindex = 0; yindex < this->cd->subsizes[0]; yindex++) \
-    for (ptrdiff_t zindex = 0; zindex < this->cd->subsizes[1]; zindex++) \
+    for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++) \
+    for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) \
     { \
         int nxmodes = 1; \
         ptrdiff_t xindex = 0; \
-        k2 = (this->kx[xindex]*this->kx[xindex] + \
-              this->ky[yindex]*this->ky[yindex] + \
-              this->kz[zindex]*this->kz[zindex]); \
+        k2 = (obj->kx[xindex]*obj->kx[xindex] + \
+              obj->ky[yindex]*obj->ky[yindex] + \
+              obj->kz[zindex]*obj->kz[zindex]); \
         expression; \
         cindex++; \
         nxmodes = 2; \
-    for (xindex = 1; xindex < this->cd->subsizes[2]; xindex++) \
+    for (xindex = 1; xindex < obj->cd->subsizes[2]; xindex++) \
         { \
-            k2 = (this->kx[xindex]*this->kx[xindex] + \
-                  this->ky[yindex]*this->ky[yindex] + \
-                  this->kz[zindex]*this->kz[zindex]); \
+            k2 = (obj->kx[xindex]*obj->kx[xindex] + \
+                  obj->ky[yindex]*obj->ky[yindex] + \
+                  obj->kz[zindex]*obj->kz[zindex]); \
             expression; \
             cindex++; \
         } \
diff --git a/bfps/cpp/interpolator.cpp b/bfps/cpp/interpolator.cpp
index 4f1467952dfed988f2d854cc3130611c9dadda5c..cf04af668c0cee55b1e4181ebc2654d1d4612965 100644
--- a/bfps/cpp/interpolator.cpp
+++ b/bfps/cpp/interpolator.cpp
@@ -43,16 +43,32 @@ interpolator<rnumber, interp_neighbours>::interpolator(
             4, tdims,
             this->unbuffered_descriptor->mpi_dtype,
             this->unbuffered_descriptor->comm);
-    this->f0 = new rnumber[this->descriptor->local_size];
-    this->f1 = new rnumber[this->descriptor->local_size];
+    if (sizeof(rnumber) == 4)
+    {
+        this->f0 = (rnumber*)((void*)fftwf_alloc_real(this->descriptor->local_size));
+        this->f1 = (rnumber*)((void*)fftwf_alloc_real(this->descriptor->local_size));
+    }
+    else if (sizeof(rnumber) == 8)
+    {
+        this->f0 = (rnumber*)((void*)fftw_alloc_real(this->descriptor->local_size));
+        this->f1 = (rnumber*)((void*)fftw_alloc_real(this->descriptor->local_size));
+    }
     this->temp = this->f1 + this->buffer_size;
 }
 
 template <class rnumber, int interp_neighbours>
 interpolator<rnumber, interp_neighbours>::~interpolator()
 {
-    delete[] this->f0;
-    delete[] this->f1;
+    if (sizeof(rnumber) == 4)
+    {
+        fftwf_free((float*)((void*)this->f0));
+        fftwf_free((float*)((void*)this->f1));
+    }
+    else if (sizeof(rnumber) == 8)
+    {
+        fftw_free((double*)((void*)this->f0));
+        fftw_free((double*)((void*)this->f1));
+    }
     delete this->descriptor;
 }
 
@@ -146,18 +162,34 @@ void interpolator<rnumber, interp_neighbours>::operator()(
         this->compute_beta(deriv[2], xx[2], bz);
     }
     std::fill_n(dest, 3, 0);
+    ptrdiff_t bigiz, bigiy, bigix;
+    double tval[3];
     for (int iz = -interp_neighbours; iz <= interp_neighbours+1; iz++)
-    for (int iy = -interp_neighbours; iy <= interp_neighbours+1; iy++)
-    for (int ix = -interp_neighbours; ix <= interp_neighbours+1; ix++)
-        for (int c=0; c<3; c++)
+    {
+        bigiz = ptrdiff_t(xg[2]+iz);
+        std::fill_n(tval, 3, 0);
+        for (int iy = -interp_neighbours; iy <= interp_neighbours+1; iy++)
         {
-            ptrdiff_t tindex = ((ptrdiff_t(    xg[2]+iz                             ) *this->descriptor->sizes[1] +
-                                 ptrdiff_t(MOD(xg[1]+iy, this->descriptor->sizes[1])))*this->descriptor->sizes[2] +
-                                 ptrdiff_t(MOD(xg[0]+ix, this->descriptor->sizes[2])))*3+c + this->buffer_size;
-            dest[c] += (this->f0[tindex]*(1-t) + t*this->f1[tindex])*(bz[iz+interp_neighbours]*
-                                                                      by[iy+interp_neighbours]*
-                                                                      bx[ix+interp_neighbours]);
+            bigiy = ptrdiff_t(MOD(xg[1]+iy, this->descriptor->sizes[1]));
+            for (int ix = -interp_neighbours; ix <= interp_neighbours+1; ix++)
+            {
+                bigix = ptrdiff_t(MOD(xg[0]+ix, this->descriptor->sizes[2]));
+                for (int c=0; c<3; c++)
+                {
+                    ptrdiff_t tindex = ((bigiz *this->descriptor->sizes[1] +
+                                         bigiy)*this->descriptor->sizes[2] +
+                                         bigix)*3+c + this->buffer_size;
+                    dest[c] += (this->f0[tindex]*(1-t) + t*this->f1[tindex])*(bz[iz+interp_neighbours]*
+                                                                              by[iy+interp_neighbours]*
+                                                                              bx[ix+interp_neighbours]);
+                    tval[c] += (this->f0[tindex]*(1-t) + t*this->f1[tindex])*(bz[iz+interp_neighbours]*
+                                                                              by[iy+interp_neighbours]*
+                                                                              bx[ix+interp_neighbours]);
+                }
+            }
         }
+        DEBUG_MSG("%ld %d %d %g %g %g\n", bigiz, xg[1], xg[0], tval[0], tval[1], tval[2]);
+    }
 }
 
 template class interpolator<float, 1>;
diff --git a/bfps/cpp/interpolator.hpp b/bfps/cpp/interpolator.hpp
index 4f2716a27a589880dcebce282e696b76160ce4cf..299fef4910693b878d3f2734636b43dbf8f98a6d 100644
--- a/bfps/cpp/interpolator.hpp
+++ b/bfps/cpp/interpolator.hpp
@@ -40,9 +40,9 @@
 #define INTERPOLATOR
 
 typedef void (*base_polynomial_values)(
-        int derivative,
-        double fraction,
-        double *destination);
+        const int derivative,
+        const double fraction,
+        double *__restrict__ destination);
 
 template <class rnumber, int interp_neighbours>
 class interpolator
@@ -59,7 +59,7 @@ class interpolator
                 base_polynomial_values BETA_POLYS);
         ~interpolator();
 
-        void operator()(double t, int *xg, double *xx, double *dest, int *deriv = NULL);
+        void operator()(double t, int *__restrict__ xg, double *__restrict__ xx, double *__restrict__ dest, int *deriv = NULL);
         /* destroys input */
         int read_rFFTW(void *src);
 };
diff --git a/bfps/cpp/particles.cpp b/bfps/cpp/particles.cpp
index 8a4848a78d93a6bd5d3c1d8c54a5e17f14f0f79e..6d7e14290f1bd69decff1ffa5c38d0e015001d90 100644
--- a/bfps/cpp/particles.cpp
+++ b/bfps/cpp/particles.cpp
@@ -22,10 +22,9 @@
 *                                                                     *
 **********************************************************************/
 
-// code is generally compiled via setuptools, therefore NDEBUG is present
-//#ifdef NDEBUG
-//#undef NDEBUG
-//#endif//NDEBUG
+
+
+#define NDEBUG
 
 
 #include <cmath>
diff --git a/bfps/cpp/particles.hpp b/bfps/cpp/particles.hpp
index 49537de1d7baca127485e001f214c85ecb51e116..4b743a006c15c1c83b65d66b9fd8056caee6e2e9 100644
--- a/bfps/cpp/particles.hpp
+++ b/bfps/cpp/particles.hpp
@@ -107,19 +107,19 @@ class particles
         /* an Euler step is needed to compute an estimate of future positions,
          * which is needed for synchronization.
          * */
-        void jump_estimate(double *jump_length);
-        void get_rhs(double *x, double *rhs);
-        void get_rhs(double t, double *x, double *rhs);
+        void jump_estimate(double *__restrict__ jump_length);
+        void get_rhs(double *__restrict__ x, double *__restrict__ rhs);
+        void get_rhs(double t, double *__restrict__ x, double *__restrict__ rhs);
 
         int get_rank(double z); // get rank for given value of z
         void synchronize();
-        void synchronize_single_particle_state(int p, double *x, int source_id = -1);
-        void get_grid_coordinates(double *x, int *xg, double *xx);
+        void synchronize_single_particle_state(int p, double *__restrict__ x, int source_id = -1);
+        void get_grid_coordinates(double *__restrict__ x, int *__restrict__ xg, double *__restrict__ xx);
         void sample_vec_field(
             interpolator<rnumber, interp_neighbours> *vec,
             double t,
-            double *x,
-            double *y,
+            double *__restrict__ x,
+            double *__restrict__ y,
             const bool synch = false,
             int *deriv = NULL);
         inline void sample_vec_field(interpolator<rnumber, interp_neighbours> *field, double *vec_values)
diff --git a/bfps/cpp/rFFTW_interpolator.cpp b/bfps/cpp/rFFTW_interpolator.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6f1a49bc7b538b94286c397ab4613540a1b72f9f
--- /dev/null
+++ b/bfps/cpp/rFFTW_interpolator.cpp
@@ -0,0 +1,152 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2015 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+
+#define NDEBUG
+
+#include "rFFTW_interpolator.hpp"
+
+template <class rnumber, int interp_neighbours>
+rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator(
+        fluid_solver_base<rnumber> *fs,
+        base_polynomial_values BETA_POLYS)
+{
+    this->descriptor = fs->rd;
+    this->field_size = 2*fs->cd->local_size;
+    this->compute_beta = BETA_POLYS;
+    if (sizeof(rnumber) == 4)
+    {
+        this->f0 = (rnumber*)((void*)fftwf_alloc_real(this->field_size));
+        this->f1 = (rnumber*)((void*)fftwf_alloc_real(this->field_size));
+    }
+    else if (sizeof(rnumber) == 8)
+    {
+        this->f0 = (rnumber*)((void*)fftw_alloc_real(this->field_size));
+        this->f1 = (rnumber*)((void*)fftw_alloc_real(this->field_size));
+    }
+    this->temp = this->f1;
+}
+
+template <class rnumber, int interp_neighbours>
+rFFTW_interpolator<rnumber, interp_neighbours>::~rFFTW_interpolator()
+{
+    if (sizeof(rnumber) == 4)
+    {
+        fftwf_free((float*)((void*)this->f0));
+        fftwf_free((float*)((void*)this->f1));
+    }
+    else if (sizeof(rnumber) == 8)
+    {
+        fftw_free((double*)((void*)this->f0));
+        fftw_free((double*)((void*)this->f1));
+    }
+}
+
+template <class rnumber, int interp_neighbours>
+int rFFTW_interpolator<rnumber, interp_neighbours>::read_rFFTW(void *void_src)
+{
+    /* first, roll fields */
+    rnumber *tmp = this->f0;
+    this->f0 = this->f1;
+    this->f1 = tmp;
+    this->temp = this->f0;
+    /* now do regular things */
+    rnumber *src = (rnumber*)void_src;
+    rnumber *dst = this->f1;
+    /* do big copy of middle stuff */
+    std::copy(src,
+              src + this->field_size,
+              dst);
+    return EXIT_SUCCESS;
+}
+
+template <class rnumber, int interp_neighbours>
+void rFFTW_interpolator<rnumber, interp_neighbours>::operator()(
+        double t,
+        int *xg,
+        double *xx,
+        double *dest,
+        int *deriv)
+{
+    double bx[interp_neighbours*2+2], by[interp_neighbours*2+2], bz[interp_neighbours*2+2];
+    if (deriv == NULL)
+    {
+        this->compute_beta(0, xx[0], bx);
+        this->compute_beta(0, xx[1], by);
+        this->compute_beta(0, xx[2], bz);
+    }
+    else
+    {
+        this->compute_beta(deriv[0], xx[0], bx);
+        this->compute_beta(deriv[1], xx[1], by);
+        this->compute_beta(deriv[2], xx[2], bz);
+    }
+    std::fill_n(dest, 3, 0);
+    ptrdiff_t bigiz, bigiy, bigix;
+    double tval[3];
+    for (int iz = -interp_neighbours; iz <= interp_neighbours+1; iz++)
+    {
+        bigiz = ptrdiff_t(((xg[2]+iz) + this->descriptor->sizes[0]) % this->descriptor->sizes[0]);
+        if (this->descriptor->myrank == this->descriptor->rank[bigiz])
+        {
+            std::fill_n(tval, 3, 0);
+            for (int iy = -interp_neighbours; iy <= interp_neighbours+1; iy++)
+            {
+                bigiy = ptrdiff_t(MOD(xg[1]+iy, this->descriptor->sizes[1]));
+                for (int ix = -interp_neighbours; ix <= interp_neighbours+1; ix++)
+                {
+                    bigix = ptrdiff_t(MOD(xg[0]+ix, this->descriptor->sizes[2]));
+                    ptrdiff_t tindex = (((bigiz-this->descriptor->starts[0])*this->descriptor->sizes[1] +
+                                         bigiy)*(this->descriptor->sizes[2]+2) +
+                                         bigix)*3;
+                    for (int c=0; c<3; c++)
+                    {
+                        dest[c] += (this->f0[tindex+c]*(1-t) + t*this->f1[tindex+c])*(bz[iz+interp_neighbours]*
+                                                                                      by[iy+interp_neighbours]*
+                                                                                      bx[ix+interp_neighbours]);
+                        tval[c] += (this->f0[tindex+c]*(1-t) + t*this->f1[tindex+c])*(bz[iz+interp_neighbours]*
+                                                                                      by[iy+interp_neighbours]*
+                                                                                      bx[ix+interp_neighbours]);
+                    }
+                }
+            }
+            DEBUG_MSG("%ld %d %d %g %g %g\n", bigiz, xg[1], xg[0], tval[0], tval[1], tval[2]);
+        }
+    }
+}
+
+template class rFFTW_interpolator<float, 1>;
+template class rFFTW_interpolator<float, 2>;
+template class rFFTW_interpolator<float, 3>;
+template class rFFTW_interpolator<float, 4>;
+template class rFFTW_interpolator<float, 5>;
+template class rFFTW_interpolator<float, 6>;
+template class rFFTW_interpolator<double, 1>;
+template class rFFTW_interpolator<double, 2>;
+template class rFFTW_interpolator<double, 3>;
+template class rFFTW_interpolator<double, 4>;
+template class rFFTW_interpolator<double, 5>;
+template class rFFTW_interpolator<double, 6>;
+
diff --git a/bfps/cpp/rFFTW_interpolator.hpp b/bfps/cpp/rFFTW_interpolator.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..7a32e75808ce572b2e902d2ebd0a22b489a4b81e
--- /dev/null
+++ b/bfps/cpp/rFFTW_interpolator.hpp
@@ -0,0 +1,62 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2015 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+
+#include "field_descriptor.hpp"
+#include "fftw_tools.hpp"
+#include "fluid_solver_base.hpp"
+#include "spline_n1.hpp"
+#include "spline_n2.hpp"
+#include "spline_n3.hpp"
+#include "spline_n4.hpp"
+#include "spline_n5.hpp"
+#include "spline_n6.hpp"
+#include "Lagrange_polys.hpp"
+#include "interpolator.hpp"
+
+#ifndef RFFTW_INTERPOLATOR
+
+#define RFFTW_INTERPOLATOR
+
+template <class rnumber, int interp_neighbours>
+class rFFTW_interpolator
+{
+    public:
+        ptrdiff_t field_size;
+        base_polynomial_values compute_beta;
+        field_descriptor<rnumber> *descriptor;
+        rnumber *f0, *f1, *temp;
+
+        rFFTW_interpolator(
+                fluid_solver_base<rnumber> *FSOLVER,
+                base_polynomial_values BETA_POLYS);
+        ~rFFTW_interpolator();
+
+        void operator()(double t, int *__restrict__ xg, double *__restrict__ xx, double *__restrict__ dest, int *deriv = NULL);
+        int read_rFFTW(void *src);
+};
+
+#endif//RFFTW_INTERPOLATOR
+
diff --git a/bfps/cpp/rFFTW_particles.cpp b/bfps/cpp/rFFTW_particles.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..af225f9541d62adf93f1ff89aeadfb9a7cbe0cf1
--- /dev/null
+++ b/bfps/cpp/rFFTW_particles.cpp
@@ -0,0 +1,423 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2015 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+
+//#define NDEBUG
+
+#include <cmath>
+#include <cassert>
+#include <cstring>
+#include <string>
+#include <sstream>
+
+#include "base.hpp"
+#include "rFFTW_particles.hpp"
+#include "fftw_tools.hpp"
+
+
+extern int myrank, nprocs;
+
+template <int particle_type, class rnumber, int interp_neighbours>
+rFFTW_particles<particle_type, rnumber, interp_neighbours>::rFFTW_particles(
+        const char *NAME,
+        fluid_solver_base<rnumber> *FSOLVER,
+        rFFTW_interpolator<rnumber, interp_neighbours> *FIELD,
+        const int NPARTICLES,
+        const int TRAJ_SKIP,
+        const int INTEGRATION_STEPS)
+{
+    switch(particle_type)
+    {
+        case VELOCITY_TRACER:
+            this->ncomponents = 3;
+            break;
+        default:
+            this->ncomponents = 3;
+            break;
+    }
+    assert((INTEGRATION_STEPS <= 6) &&
+           (INTEGRATION_STEPS >= 1));
+    strncpy(this->name, NAME, 256);
+    this->fs = FSOLVER;
+    this->nparticles = NPARTICLES;
+    this->vel = FIELD;
+    this->integration_steps = INTEGRATION_STEPS;
+    this->traj_skip = TRAJ_SKIP;
+    this->myrank = this->vel->descriptor->myrank;
+    this->nprocs = this->vel->descriptor->nprocs;
+    this->comm = this->vel->descriptor->comm;
+    this->array_size = this->nparticles * this->ncomponents;
+    this->state = new double[this->array_size];
+    std::fill_n(this->state, this->array_size, 0.0);
+    for (int i=0; i < this->integration_steps; i++)
+    {
+        this->rhs[i] = new double[this->array_size];
+        std::fill_n(this->rhs[i], this->array_size, 0.0);
+    }
+
+    // compute dx, dy, dz;
+    this->dx = 4*acos(0) / (this->fs->dkx*this->fs->rd->sizes[2]);
+    this->dy = 4*acos(0) / (this->fs->dky*this->fs->rd->sizes[1]);
+    this->dz = 4*acos(0) / (this->fs->dkz*this->fs->rd->sizes[0]);
+
+    // compute lower and upper bounds
+    this->lbound = new double[nprocs];
+    this->ubound = new double[nprocs];
+    double *tbound = new double[nprocs];
+    std::fill_n(tbound, nprocs, 0.0);
+    tbound[this->myrank] = this->fs->rd->starts[0]*this->dz;
+    MPI_Allreduce(
+            tbound,
+            this->lbound,
+            nprocs,
+            MPI_DOUBLE,
+            MPI_SUM,
+            this->comm);
+    std::fill_n(tbound, nprocs, 0.0);
+    tbound[this->myrank] = (this->fs->rd->starts[0] + this->fs->rd->subsizes[0])*this->dz;
+    MPI_Allreduce(
+            tbound,
+            this->ubound,
+            nprocs,
+            MPI_DOUBLE,
+            MPI_SUM,
+            this->comm);
+    delete[] tbound;
+    //for (int r = 0; r<nprocs; r++)
+    //    DEBUG_MSG(
+    //            "lbound[%d] = %lg, ubound[%d] = %lg\n",
+    //            r, this->lbound[r],
+    //            r, this->ubound[r]
+    //            );
+}
+
+template <int particle_type, class rnumber, int interp_neighbours>
+rFFTW_particles<particle_type, rnumber, interp_neighbours>::~rFFTW_particles()
+{
+    delete[] this->state;
+    for (int i=0; i < this->integration_steps; i++)
+    {
+        delete[] this->rhs[i];
+    }
+    delete[] this->lbound;
+    delete[] this->ubound;
+}
+
+template <int particle_type, class rnumber,  int interp_neighbours>
+void rFFTW_particles<particle_type, rnumber, interp_neighbours>::sample_vec_field(
+        rFFTW_interpolator<rnumber, interp_neighbours> *vec,
+        double t,
+        double *x,
+        double *y,
+        int *deriv)
+{
+    /* get grid coordinates */
+    int *xg = new int[3*this->nparticles];
+    double *xx = new double[3*this->nparticles];
+    double *yy =  new double[3*this->nparticles];
+    std::fill_n(yy, 3*this->nparticles, 0.0);
+    this->get_grid_coordinates(x, xg, xx);
+    /* perform interpolation */
+    for (int p=0; p<this->nparticles; p++)
+        (*vec)(t, xg + p*3, xx + p*3, yy + p*3, deriv);
+    MPI_Allreduce(
+            yy,
+            y,
+            3*this->nparticles,
+            MPI_DOUBLE,
+            MPI_SUM,
+            this->comm);
+    delete[] yy;
+    delete[] xg;
+    delete[] xx;
+}
+
+template <int particle_type, class rnumber, int interp_neighbours>
+void rFFTW_particles<particle_type, rnumber, interp_neighbours>::get_rhs(double t, double *x, double *y)
+{
+    switch(particle_type)
+    {
+        case VELOCITY_TRACER:
+            this->sample_vec_field(this->vel, t, x, y);
+            break;
+    }
+}
+
+
+template <int particle_type, class rnumber, int interp_neighbours>
+int rFFTW_particles<particle_type, rnumber, interp_neighbours>::get_rank(double z)
+{
+    int tmp = this->fs->rd->rank[MOD(int(floor(z/this->dz)), this->fs->rd->sizes[0])];
+    assert(tmp >= 0 && tmp < this->fs->rd->nprocs);
+    return tmp;
+}
+
+template <int particle_type, class rnumber, int interp_neighbours>
+void rFFTW_particles<particle_type, rnumber, interp_neighbours>::roll_rhs()
+{
+    for (int i=this->integration_steps-2; i>=0; i--)
+        std::copy(this->rhs[i],
+                  this->rhs[i] + this->array_size,
+                  this->rhs[i+1]);
+}
+
+
+
+template <int particle_type, class rnumber, int interp_neighbours>
+void rFFTW_particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth(int nsteps)
+{
+    ptrdiff_t ii;
+    this->get_rhs(0, this->state, this->rhs[0]);
+    switch(nsteps)
+    {
+        case 1:
+            for (int p=0; p<this->nparticles; p++)
+                for (int i=0; i<this->ncomponents; i++)
+                {
+                    ii = p*this->ncomponents+i;
+                    this->state[ii] += this->dt*this->rhs[0][ii];
+                }
+            break;
+        case 2:
+            for (int p=0; p<this->nparticles; p++)
+                for (int i=0; i<this->ncomponents; i++)
+                {
+                    ii = p*this->ncomponents+i;
+                    this->state[ii] += this->dt*(3*this->rhs[0][ii]
+                                               -   this->rhs[1][ii])/2;
+                }
+            break;
+        case 3:
+            for (int p=0; p<this->nparticles; p++)
+                for (int i=0; i<this->ncomponents; i++)
+                {
+                    ii = p*this->ncomponents+i;
+                    this->state[ii] += this->dt*(23*this->rhs[0][ii]
+                                               - 16*this->rhs[1][ii]
+                                               +  5*this->rhs[2][ii])/12;
+                }
+            break;
+        case 4:
+            for (int p=0; p<this->nparticles; p++)
+                for (int i=0; i<this->ncomponents; i++)
+                {
+                    ii = p*this->ncomponents+i;
+                    this->state[ii] += this->dt*(55*this->rhs[0][ii]
+                                               - 59*this->rhs[1][ii]
+                                               + 37*this->rhs[2][ii]
+                                               -  9*this->rhs[3][ii])/24;
+                }
+            break;
+        case 5:
+            for (int p=0; p<this->nparticles; p++)
+                for (int i=0; i<this->ncomponents; i++)
+                {
+                    ii = p*this->ncomponents+i;
+                    this->state[ii] += this->dt*(1901*this->rhs[0][ii]
+                                               - 2774*this->rhs[1][ii]
+                                               + 2616*this->rhs[2][ii]
+                                               - 1274*this->rhs[3][ii]
+                                               +  251*this->rhs[4][ii])/720;
+                }
+            break;
+        case 6:
+            for (int p=0; p<this->nparticles; p++)
+                for (int i=0; i<this->ncomponents; i++)
+                {
+                    ii = p*this->ncomponents+i;
+                    this->state[ii] += this->dt*(4277*this->rhs[0][ii]
+                                               - 7923*this->rhs[1][ii]
+                                               + 9982*this->rhs[2][ii]
+                                               - 7298*this->rhs[3][ii]
+                                               + 2877*this->rhs[4][ii]
+                                               -  475*this->rhs[5][ii])/1440;
+                }
+            break;
+    }
+    this->roll_rhs();
+}
+
+
+template <int particle_type, class rnumber, int interp_neighbours>
+void rFFTW_particles<particle_type, rnumber, interp_neighbours>::step()
+{
+    this->AdamsBashforth((this->iteration < this->integration_steps) ?
+                            this->iteration+1 :
+                            this->integration_steps);
+    this->iteration++;
+}
+
+
+
+template <int particle_type, class rnumber, int interp_neighbours>
+void rFFTW_particles<particle_type, rnumber, interp_neighbours>::get_grid_coordinates(double *x, int *xg, double *xx)
+{
+    static double grid_size[] = {this->dx, this->dy, this->dz};
+    double tval;
+    std::fill_n(xg, this->nparticles*3, 0);
+    std::fill_n(xx, this->nparticles*3, 0.0);
+    for (int p=0; p<this->nparticles; p++)
+    {
+        for (int c=0; c<3; c++)
+        {
+            tval = floor(x[p*this->ncomponents+c]/grid_size[c]);
+            xg[p*3+c] = MOD(int(tval), this->fs->rd->sizes[2-c]);
+            xx[p*3+c] = (x[p*this->ncomponents+c] - tval*grid_size[c]) / grid_size[c];
+        }
+        /*xg[p*3+2] -= this->fs->rd->starts[0];
+        if (this->myrank == this->fs->rd->rank[0] &&
+            xg[p*3+2] > this->fs->rd->subsizes[0])
+            xg[p*3+2] -= this->fs->rd->sizes[0];*/
+    }
+}
+
+template <int particle_type, class rnumber, int interp_neighbours>
+void rFFTW_particles<particle_type, rnumber, interp_neighbours>::read(hid_t data_file_id)
+{
+    if (this->myrank == 0)
+    {
+        std::string temp_string = (std::string("/particles/") +
+                                   std::string(this->name) +
+                                   std::string("/state"));
+        hid_t dset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT);
+        hid_t mspace, rspace;
+        hsize_t count[4], offset[4];
+        rspace = H5Dget_space(dset);
+        H5Sget_simple_extent_dims(rspace, count, NULL);
+        count[0] = 1;
+        offset[0] = this->iteration / this->traj_skip;
+        offset[1] = 0;
+        offset[2] = 0;
+        mspace = H5Screate_simple(3, count, NULL);
+        H5Sselect_hyperslab(rspace, H5S_SELECT_SET, offset, NULL, count, NULL);
+        H5Dread(dset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, this->state);
+        H5Sclose(mspace);
+        H5Sclose(rspace);
+        H5Dclose(dset);
+        if (this->iteration > 0)
+        {
+            temp_string = (std::string("/particles/") +
+                           std::string(this->name) +
+                           std::string("/rhs"));
+            dset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT);
+            rspace = H5Dget_space(dset);
+            H5Sget_simple_extent_dims(rspace, count, NULL);
+            //reading from last available position
+            offset[0] = count[0] - 1;
+            offset[3] = 0;
+            count[0] = 1;
+            count[1] = 1;
+            mspace = H5Screate_simple(4, count, NULL);
+            for (int i=0; i<this->integration_steps; i++)
+            {
+                offset[1] = i;
+                H5Sselect_hyperslab(rspace, H5S_SELECT_SET, offset, NULL, count, NULL);
+                H5Dread(dset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, this->rhs[i]);
+            }
+            H5Sclose(mspace);
+            H5Sclose(rspace);
+            H5Dclose(dset);
+        }
+    }
+    MPI_Bcast(
+            this->state,
+            this->array_size,
+            MPI_DOUBLE,
+            0,
+            this->comm);
+    for (int i = 0; i<this->integration_steps; i++)
+        MPI_Bcast(
+                this->rhs[i],
+                this->array_size,
+                MPI_DOUBLE,
+                0,
+                this->comm);
+}
+
+template <int particle_type, class rnumber, int interp_neighbours>
+void rFFTW_particles<particle_type, rnumber, interp_neighbours>::write(hid_t data_file_id, bool write_rhs)
+{
+    if (this->myrank == 0)
+    {
+        std::string temp_string = (std::string("/particles/") +
+                                   std::string(this->name) +
+                                   std::string("/state"));
+        hid_t dset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT);
+        hid_t mspace, wspace;
+        hsize_t count[4], offset[4];
+        wspace = H5Dget_space(dset);
+        H5Sget_simple_extent_dims(wspace, count, NULL);
+        count[0] = 1;
+        offset[0] = this->iteration / this->traj_skip;
+        offset[1] = 0;
+        offset[2] = 0;
+        mspace = H5Screate_simple(3, count, NULL);
+        H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
+        H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, this->state);
+        H5Sclose(mspace);
+        H5Sclose(wspace);
+        H5Dclose(dset);
+        if (write_rhs)
+        {
+            temp_string = (std::string("/particles/") +
+                           std::string(this->name) +
+                           std::string("/rhs"));
+            dset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT);
+            wspace = H5Dget_space(dset);
+            H5Sget_simple_extent_dims(wspace, count, NULL);
+            //writing to last available position
+            offset[0] = count[0] - 1;
+            count[0] = 1;
+            count[1] = 1;
+            offset[3] = 0;
+            mspace = H5Screate_simple(4, count, NULL);
+            for (int i=0; i<this->integration_steps; i++)
+            {
+                offset[1] = i;
+                H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
+                H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, this->rhs[i]);
+            }
+            H5Sclose(mspace);
+            H5Sclose(wspace);
+            H5Dclose(dset);
+        }
+    }
+}
+
+
+/*****************************************************************************/
+template class rFFTW_particles<VELOCITY_TRACER, float, 1>;
+template class rFFTW_particles<VELOCITY_TRACER, float, 2>;
+template class rFFTW_particles<VELOCITY_TRACER, float, 3>;
+template class rFFTW_particles<VELOCITY_TRACER, float, 4>;
+template class rFFTW_particles<VELOCITY_TRACER, float, 5>;
+template class rFFTW_particles<VELOCITY_TRACER, float, 6>;
+template class rFFTW_particles<VELOCITY_TRACER, double, 1>;
+template class rFFTW_particles<VELOCITY_TRACER, double, 2>;
+template class rFFTW_particles<VELOCITY_TRACER, double, 3>;
+template class rFFTW_particles<VELOCITY_TRACER, double, 4>;
+template class rFFTW_particles<VELOCITY_TRACER, double, 5>;
+template class rFFTW_particles<VELOCITY_TRACER, double, 6>;
+/*****************************************************************************/
diff --git a/bfps/cpp/rFFTW_particles.hpp b/bfps/cpp/rFFTW_particles.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ac44cc784d4a1f08bd50e7e3aa6070d3e399cbbe
--- /dev/null
+++ b/bfps/cpp/rFFTW_particles.hpp
@@ -0,0 +1,130 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2015 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <iostream>
+#include <hdf5.h>
+#include "base.hpp"
+#include "fluid_solver_base.hpp"
+#include "rFFTW_interpolator.hpp"
+#include "particles.hpp"
+
+#ifndef RFFTW_PARTICLES
+
+#define RFFTW_PARTICLES
+
+template <int particle_type, class rnumber, int interp_neighbours>
+class rFFTW_particles
+{
+    public:
+        int myrank, nprocs;
+        MPI_Comm comm;
+        fluid_solver_base<rnumber> *fs;
+        rnumber *data;
+
+        /* watching is an array of shape [nparticles], with
+         * watching[p] being true if particle p is in the domain of myrank
+         * or in the buffer regions.
+         * only used if multistep is false.
+         * */
+        bool *watching;
+        /* computing is an array of shape [nparticles], with
+         * computing[p] being the rank that is currently working on particle p
+         * */
+        int *computing;
+
+        /* state will generally hold all the information about the particles.
+         * in the beginning, we will only need to solve 3D ODEs, but I figured
+         * a general ncomponents is better, since we may change our minds.
+         * */
+        double *state;
+        double *rhs[6];
+        int nparticles;
+        int ncomponents;
+        int array_size;
+        int integration_steps;
+        int traj_skip;
+        double *lbound;
+        double *ubound;
+        rFFTW_interpolator<rnumber, interp_neighbours> *vel;
+
+        /* simulation parameters */
+        char name[256];
+        int iteration;
+        double dt;
+
+        /* physical parameters of field */
+        double dx, dy, dz;
+
+        /* methods */
+
+        /* constructor and destructor.
+         * allocate and deallocate:
+         *  this->state
+         *  this->rhs
+         *  this->lbound
+         *  this->ubound
+         *  this->computing
+         *  this->watching
+         * */
+        rFFTW_particles(
+                const char *NAME,
+                fluid_solver_base<rnumber> *FSOLVER,
+                rFFTW_interpolator<rnumber, interp_neighbours> *FIELD,
+                const int NPARTICLES,
+                const int TRAJ_SKIP,
+                const int INTEGRATION_STEPS = 2);
+        ~rFFTW_particles();
+
+        void get_rhs(double *__restrict__ x, double *__restrict__ rhs);
+        void get_rhs(double t, double *__restrict__ x, double *__restrict__ rhs);
+
+        int get_rank(double z); // get rank for given value of z
+        void get_grid_coordinates(double *__restrict__ x, int *__restrict__ xg, double *__restrict__ xx);
+        void sample_vec_field(
+            rFFTW_interpolator<rnumber, interp_neighbours> *vec,
+            double t,
+            double *__restrict__ x,
+            double *__restrict__ y,
+            int *deriv = NULL);
+        inline void sample_vec_field(rFFTW_interpolator<rnumber, interp_neighbours> *field, double *vec_values)
+        {
+            this->sample_vec_field(field, 1.0, this->state, vec_values, NULL);
+        }
+
+        /* input/output */
+        void read(hid_t data_file_id);
+        void write(hid_t data_file_id, bool write_rhs = true);
+
+        /* solvers */
+        void step();
+        void roll_rhs();
+        void AdamsBashforth(int nsteps);
+};
+
+#endif//RFFTW_PARTICLES
+
diff --git a/bfps/cpp/slab_field_particles.cpp b/bfps/cpp/slab_field_particles.cpp
index 3988659a0caba5eb46fd2070b178f68a5694a343..15fa363f6d277d34c6081fd545c4578e1f735929 100644
--- a/bfps/cpp/slab_field_particles.cpp
+++ b/bfps/cpp/slab_field_particles.cpp
@@ -22,10 +22,9 @@
 *                                                                     *
 **********************************************************************/
 
-// code is generally compiled via setuptools, therefore NDEBUG is present
-//#ifdef NDEBUG
-//#undef NDEBUG
-//#endif//NDEBUG
+
+
+#define NDEBUG
 
 
 #include <cmath>
diff --git a/bfps/cpp/spline_n1.hpp b/bfps/cpp/spline_n1.hpp
index ebb440adad954557da56d8f6fb114a5d366d487a..707e9fbcd3a1772c07ba59e6494260f0d8740a7d 100644
--- a/bfps/cpp/spline_n1.hpp
+++ b/bfps/cpp/spline_n1.hpp
@@ -28,9 +28,9 @@
 
 #define SPLINE_N1
 
-void beta_n1_m0(int deriv, double x, double *poly_val);
-void beta_n1_m1(int deriv, double x, double *poly_val);
-void beta_n1_m2(int deriv, double x, double *poly_val);
+void beta_n1_m0(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n1_m1(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n1_m2(const int deriv, const double x, double *__restrict__ poly_val);
 
 #endif//SPLINE_N1
 
diff --git a/bfps/cpp/spline_n2.hpp b/bfps/cpp/spline_n2.hpp
index 181cca2d73da4f6d6d3222e700e185c2184e392c..b29e7be13f663d9bdbca310b91b7e3a1db92a205 100644
--- a/bfps/cpp/spline_n2.hpp
+++ b/bfps/cpp/spline_n2.hpp
@@ -28,11 +28,11 @@
 
 #define SPLINE_N2
 
-void beta_n2_m0(int deriv, double x, double *poly_val);
-void beta_n2_m1(int deriv, double x, double *poly_val);
-void beta_n2_m2(int deriv, double x, double *poly_val);
-void beta_n2_m3(int deriv, double x, double *poly_val);
-void beta_n2_m4(int deriv, double x, double *poly_val);
+void beta_n2_m0(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n2_m1(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n2_m2(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n2_m3(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n2_m4(const int deriv, const double x, double *__restrict__ poly_val);
 
 #endif//SPLINE_N2
 
diff --git a/bfps/cpp/spline_n3.hpp b/bfps/cpp/spline_n3.hpp
index e53507573438db3275eff1bafa9089b318cfe4cc..159f8b970947b507205f40fcfb5140ca683770ce 100644
--- a/bfps/cpp/spline_n3.hpp
+++ b/bfps/cpp/spline_n3.hpp
@@ -28,13 +28,13 @@
 
 #define SPLINE_N3
 
-void beta_n3_m0(int deriv, double x, double *poly_val);
-void beta_n3_m1(int deriv, double x, double *poly_val);
-void beta_n3_m2(int deriv, double x, double *poly_val);
-void beta_n3_m3(int deriv, double x, double *poly_val);
-void beta_n3_m4(int deriv, double x, double *poly_val);
-void beta_n3_m5(int deriv, double x, double *poly_val);
-void beta_n3_m6(int deriv, double x, double *poly_val);
+void beta_n3_m0(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n3_m1(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n3_m2(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n3_m3(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n3_m4(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n3_m5(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n3_m6(const int deriv, const double x, double *__restrict__ poly_val);
 
 #endif//SPLINE_N3
 
diff --git a/bfps/cpp/spline_n4.hpp b/bfps/cpp/spline_n4.hpp
index a3cb59609f07ddf12700839f06b24beba9d44294..4b80d16ea110fe1738f1eecca5f2f9cbe6e43e84 100644
--- a/bfps/cpp/spline_n4.hpp
+++ b/bfps/cpp/spline_n4.hpp
@@ -28,15 +28,15 @@
 
 #define SPLINE_N4
 
-void beta_n4_m0(int deriv, double x, double *poly_val);
-void beta_n4_m1(int deriv, double x, double *poly_val);
-void beta_n4_m2(int deriv, double x, double *poly_val);
-void beta_n4_m3(int deriv, double x, double *poly_val);
-void beta_n4_m4(int deriv, double x, double *poly_val);
-void beta_n4_m5(int deriv, double x, double *poly_val);
-void beta_n4_m6(int deriv, double x, double *poly_val);
-void beta_n4_m7(int deriv, double x, double *poly_val);
-void beta_n4_m8(int deriv, double x, double *poly_val);
+void beta_n4_m0(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n4_m1(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n4_m2(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n4_m3(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n4_m4(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n4_m5(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n4_m6(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n4_m7(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n4_m8(const int deriv, const double x, double *__restrict__ poly_val);
 
 #endif//SPLINE_N4
 
diff --git a/bfps/cpp/spline_n5.hpp b/bfps/cpp/spline_n5.hpp
index e256ef31f88c7e231d58a163011e067b8309f04c..fccd512076dc571fdded3fe3ccf44f7e4132ec48 100644
--- a/bfps/cpp/spline_n5.hpp
+++ b/bfps/cpp/spline_n5.hpp
@@ -28,17 +28,17 @@
 
 #define SPLINE_N5
 
-void beta_n5_m0(int deriv, double x, double *poly_val);
-void beta_n5_m1(int deriv, double x, double *poly_val);
-void beta_n5_m2(int deriv, double x, double *poly_val);
-void beta_n5_m3(int deriv, double x, double *poly_val);
-void beta_n5_m4(int deriv, double x, double *poly_val);
-void beta_n5_m5(int deriv, double x, double *poly_val);
-void beta_n5_m6(int deriv, double x, double *poly_val);
-void beta_n5_m7(int deriv, double x, double *poly_val);
-void beta_n5_m8(int deriv, double x, double *poly_val);
-void beta_n5_m9(int deriv, double x, double *poly_val);
-void beta_n5_m10(int deriv, double x, double *poly_val);
+void beta_n5_m0( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n5_m1( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n5_m2( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n5_m3( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n5_m4( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n5_m5( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n5_m6( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n5_m7( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n5_m8( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n5_m9( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n5_m10(const int deriv, const double x, double *__restrict__ poly_val);
 
 #endif//SPLINE_N5
 
diff --git a/bfps/cpp/spline_n6.hpp b/bfps/cpp/spline_n6.hpp
index 6e0a4b0ef33831868bd579c144be7ed9a6d05bb6..8eb99092eafd0f1f7c783a023eae49d714479ce1 100644
--- a/bfps/cpp/spline_n6.hpp
+++ b/bfps/cpp/spline_n6.hpp
@@ -28,19 +28,19 @@
 
 #define SPLINE_N6
 
-void beta_n6_m0(int deriv, double x, double *poly_val);
-void beta_n6_m1(int deriv, double x, double *poly_val);
-void beta_n6_m2(int deriv, double x, double *poly_val);
-void beta_n6_m3(int deriv, double x, double *poly_val);
-void beta_n6_m4(int deriv, double x, double *poly_val);
-void beta_n6_m5(int deriv, double x, double *poly_val);
-void beta_n6_m6(int deriv, double x, double *poly_val);
-void beta_n6_m7(int deriv, double x, double *poly_val);
-void beta_n6_m8(int deriv, double x, double *poly_val);
-void beta_n6_m9(int deriv, double x, double *poly_val);
-void beta_n6_m10(int deriv, double x, double *poly_val);
-void beta_n6_m11(int deriv, double x, double *poly_val);
-void beta_n6_m12(int deriv, double x, double *poly_val);
+void beta_n6_m0( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n6_m1( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n6_m2( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n6_m3( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n6_m4( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n6_m5( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n6_m6( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n6_m7( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n6_m8( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n6_m9( const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n6_m10(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n6_m11(const int deriv, const double x, double *__restrict__ poly_val);
+void beta_n6_m12(const int deriv, const double x, double *__restrict__ poly_val);
 
 #endif//SPLINE_N6
 
diff --git a/bfps/cpp/tracers.cpp b/bfps/cpp/tracers.cpp
index 5e0e106ea658741fdf1b1bb209e100b22474504c..3d9fbfb6a1e357d70452466b6cc901659444539d 100644
--- a/bfps/cpp/tracers.cpp
+++ b/bfps/cpp/tracers.cpp
@@ -22,10 +22,9 @@
 *                                                                     *
 **********************************************************************/
 
-// code is generally compiled via setuptools, therefore NDEBUG is present
-//#ifdef NDEBUG
-//#undef NDEBUG
-//#endif//NDEBUG
+
+
+#define NDEBUG
 
 
 #include <cmath>
diff --git a/bfps/fluid_base.py b/bfps/fluid_base.py
index 8a8bc169846312fd31bce1f60dbea23bc8101a05..62dfcfd264e2b6f0987ac0cfe547873e840fb686 100644
--- a/bfps/fluid_base.py
+++ b/bfps/fluid_base.py
@@ -44,7 +44,7 @@ class fluid_particle_base(bfps.code):
                 work_dir = work_dir,
                 simname = simname)
         self.use_fftw_wisdom = use_fftw_wisdom
-        self.name = name
+        self.name = name + '_' + simname
         self.particle_species = 0
         if dtype in [np.float32, np.float64]:
             self.dtype = dtype
@@ -60,6 +60,7 @@ class fluid_particle_base(bfps.code):
         elif self.rtype == np.float64:
             self.ctype = np.dtype(np.complex128)
             self.C_dtype = 'double'
+        self.parameters['dealias_type'] = 1
         self.parameters['dkx'] = 1.0
         self.parameters['dky'] = 1.0
         self.parameters['dkz'] = 1.0
@@ -78,7 +79,10 @@ class fluid_particle_base(bfps.code):
         self.parameters['histogram_bins'] = 256
         self.parameters['max_velocity_estimate'] = 1.0
         self.parameters['max_vorticity_estimate'] = 1.0
-        self.parameters['dealias_type'] = 1
+        self.parameters['QR2D_histogram_bins'] = 64
+        self.parameters['max_trS2_estimate'] = 1.0
+        self.parameters['max_Q_estimate'] = 1.0
+        self.parameters['max_R_estimate'] = 1.0
         self.fluid_includes = '#include "fluid_solver.hpp"\n'
         self.fluid_variables = ''
         self.fluid_definitions = ''
@@ -106,7 +110,7 @@ class fluid_particle_base(bfps.code):
             self.definitions += self.particle_definitions
         self.definitions += ('int grow_single_dataset(hid_t dset, int tincrement)\n{\n' +
                              'int ndims;\n' +
-                             'hsize_t dims[4];\n' +
+                             'hsize_t dims[5];\n' +
                              'hsize_t space;\n' +
                              'space = H5Dget_space(dset);\n' +
                              'ndims = H5Sget_simple_extent_dims(space, dims, NULL);\n' +
@@ -269,7 +273,7 @@ class fluid_particle_base(bfps.code):
             self,
             rseed = 7547,
             spectra_slope = 1.,
-            precision = 'single',
+            amplitude = 1.,
             iteration = 0,
             field_name = 'vorticity',
             write_to_file = False):
@@ -278,17 +282,20 @@ class fluid_particle_base(bfps.code):
                 self.parameters['nz']//2,
                 self.parameters['ny']//2,
                 self.parameters['nx']//2,
-                p = spectra_slope).astype(self.ctype)
+                p = spectra_slope,
+                amplitude = amplitude).astype(self.ctype)
         Kdata01 = bfps.tools.generate_data_3D(
                 self.parameters['nz']//2,
                 self.parameters['ny']//2,
                 self.parameters['nx']//2,
-                p = spectra_slope).astype(self.ctype)
+                p = spectra_slope,
+                amplitude = amplitude).astype(self.ctype)
         Kdata02 = bfps.tools.generate_data_3D(
                 self.parameters['nz']//2,
                 self.parameters['ny']//2,
                 self.parameters['nx']//2,
-                p = spectra_slope).astype(self.ctype)
+                p = spectra_slope,
+                amplitude = amplitude).astype(self.ctype)
         Kdata0 = np.zeros(
                 Kdata00.shape + (3,),
                 Kdata00.dtype)
@@ -354,9 +361,9 @@ class fluid_particle_base(bfps.code):
     def get_kspace(self):
         kspace = {}
         if self.parameters['dealias_type'] == 1:
-            kMx = self.parameters['dkx']*(self.parameters['nx']//2)
-            kMy = self.parameters['dky']*(self.parameters['ny']//2)
-            kMz = self.parameters['dkz']*(self.parameters['nz']//2)
+            kMx = self.parameters['dkx']*(self.parameters['nx']//2 - 1)
+            kMy = self.parameters['dky']*(self.parameters['ny']//2 - 1)
+            kMz = self.parameters['dkz']*(self.parameters['nz']//2 - 1)
         else:
             kMx = self.parameters['dkx']*(self.parameters['nx']//3 - 1)
             kMy = self.parameters['dky']*(self.parameters['ny']//3 - 1)
@@ -376,8 +383,6 @@ class fluid_particle_base(bfps.code):
         kspace['kz'] = np.arange(-self.parameters['nz']//2 + 1,
                                   self.parameters['nz']//2 + 1).astype(np.float64)*self.parameters['dkz']
         kspace['kz'] = np.roll(kspace['kz'], self.parameters['nz']//2+1)
-        kspace['ksample_indices'] = bfps.tools.get_kindices(n = self.parameters['nx'])
-        print kspace['ksample_indices'].shape[0]
         return kspace
     def write_par(self, iter0 = 0):
         assert (self.parameters['niter_todo'] % self.parameters['niter_stat'] == 0)
@@ -392,14 +397,6 @@ class fluid_particle_base(bfps.code):
             for k in kspace.keys():
                 ofile['kspace/' + k] = kspace[k]
             nshells = kspace['nshell'].shape[0]
-            time_chunk = 2**20//(self.ctype.itemsize*3*kspace['ksample_indices'].shape[0])
-            time_chunk = max(time_chunk, 1)
-            ofile.create_dataset('statistics/ksamples/velocity',
-                                 (1, kspace['ksample_indices'].shape[0], 3),
-                                 chunks = (time_chunk, kspace['ksample_indices'].shape[0], 3),
-                                 maxshape = (None, kspace['ksample_indices'].shape[0], 3),
-                                 dtype = self.ctype,
-                                 compression = 'gzip')
             for k in ['velocity', 'vorticity']:
                 time_chunk = 2**20//(8*3*self.parameters['nx'])
                 time_chunk = max(time_chunk, 1)
diff --git a/bfps/fluid_converter.py b/bfps/fluid_converter.py
index b448d8db870303bc7b8822277711b7db198a527a..04f113c4ed44d99fb020d4a782bb7d28ae09adbb 100644
--- a/bfps/fluid_converter.py
+++ b/bfps/fluid_converter.py
@@ -37,12 +37,14 @@ class fluid_converter(bfps.fluid_base.fluid_particle_base):
             name = 'fluid_converter',
             work_dir = './',
             simname = 'test',
-            fluid_precision = 'single'):
+            fluid_precision = 'single',
+            use_fftw_wisdom = True):
         super(fluid_converter, self).__init__(
                 name = name,
                 work_dir = work_dir,
                 simname = simname,
-                dtype = fluid_precision)
+                dtype = fluid_precision,
+                use_fftw_wisdom = use_fftw_wisdom)
         self.parameters['write_rvelocity']  = 1
         self.parameters['write_rvorticity'] = 1
         self.parameters['fluid_name'] = 'test'
diff --git a/bfps/fluid_resize.py b/bfps/fluid_resize.py
index c022627931c29534c24f26934504712def959e90..5c4e6c5ed001c9ca698d19824677c335a6a0ff53 100644
--- a/bfps/fluid_resize.py
+++ b/bfps/fluid_resize.py
@@ -35,12 +35,14 @@ class fluid_resize(bfps.fluid_base.fluid_particle_base):
             name = 'fluid_resize',
             work_dir = './',
             simname = 'test',
-            dtype = np.float32):
+            dtype = np.float32,
+            use_fftw_wisdom = False):
         super(fluid_resize, self).__init__(
                 name = name,
                 work_dir = work_dir,
                 simname = simname,
-                dtype = dtype)
+                dtype = dtype,
+                use_fftw_wisdom = use_fftw_wisdom)
         self.parameters['src_simname'] = 'test'
         self.parameters['dst_iter'] = 0
         self.parameters['dst_nx'] = 32
diff --git a/bfps/tools.py b/bfps/tools.py
index 54e5b6823d85e6c1326434a1eba31dfefc808fe8..7ae7133feac96f56eaca2d7b3ca64a59a4ede999 100644
--- a/bfps/tools.py
+++ b/bfps/tools.py
@@ -30,13 +30,14 @@ import numpy as np
 def generate_data_3D(
         n0, n1, n2,
         dtype = np.complex128,
-        p = 1.5):
+        p = 1.5,
+        amplitude = 0.5):
     """
     generate something that has the proper shape
     """
     assert(n0 % 2 == 0 and n1 % 2 == 0 and n2 % 2 == 0)
     a = np.zeros((n1, n0, n2/2+1), dtype = dtype)
-    a[:] = np.random.randn(*a.shape) + 1j*np.random.randn(*a.shape)
+    a[:] = amplitude*(np.random.randn(*a.shape) + 1j*np.random.randn(*a.shape))
     k, j, i = np.mgrid[-n1/2+1:n1/2+1, -n0/2+1:n0/2+1, 0:n2/2+1]
     k = (k**2 + j**2 + i**2)**.5
     k = np.roll(k, n1//2+1, axis = 0)
@@ -55,6 +56,18 @@ def generate_data_3D(
     a[ii] = 0
     return a
 
+def randomize_phases(v):
+    phi = np.random.random(v.shape[:3])*(2*np.pi)
+    phi[0, 0, 0] = 0.0
+    for ky in range(1, phi.shape[0]//2):
+        phi[phi.shape[0] - ky, 0, 0] = - phi[ky, 0, 0]
+    for kz in range(1, phi.shape[1]//2):
+        phi[0, phi.shape[1] - kz, 0] = - phi[0, kz, 0]
+    for ky in range(1, phi.shape[0]//2):
+        for kz in range(1, phi.shape[1]):
+            phi[phi.shape[0] - ky, phi.shape[1] - kz, 0] = - phi[ky, kz, 0]
+    return v*(np.exp(1j*phi)[:, :, :, None]).astype(v.dtype)
+
 def padd_with_zeros(
         a,
         n0, n1, n2,
@@ -82,8 +95,8 @@ def get_kindices(
     kvals = []
     radii = set([])
     index = []
-    for iz in range(kx.shape[0]):
-        for ix in range(0, kx.shape[0]):
+    for iz in range(1, kx.shape[0]):
+        for ix in range(1, kx.shape[0]):
             kval = (kx[iz]**2+kx[ix]**2)**.5
             tmp = math.modf(kval)
             if (tmp[0] == 0 and tmp[1] <= nx//2):
diff --git a/done.txt b/done.txt
new file mode 100644
index 0000000000000000000000000000000000000000..787392010cce3f4cb0f5921eba4a7fa8ba31e89a
--- /dev/null
+++ b/done.txt
@@ -0,0 +1,2 @@
+x 2015-12-04 make code py3 compatible                                @python3
+x 2015-12-23 decide on versioning system                                           +merge0
diff --git a/machine_settings.py b/machine_settings.py
index b12620123b60fe87f5bda1cc770427b61f20f665..79cda4702434a7cbc9670075e04e0543d75563f8 100644
--- a/machine_settings.py
+++ b/machine_settings.py
@@ -39,7 +39,7 @@ extra_libraries = ['hdf5']
 if hostname == 'chichi-G':
     include_dirs = ['/usr/local/include',
                     '/usr/include/mpich']
-    library_dirs = ['/usr/local/lib'
+    library_dirs = ['/usr/local/lib',
                     '/usr/lib/mpich']
     extra_libraries += ['mpich']
 
@@ -71,7 +71,7 @@ if hostname in ['frontend01', 'frontend02']:
                           '-mhard-float',
                           '-mieee-fp',
                           '-ffast-math',
-                          '-mlarge-data-threshold=65536',
+#                          '-mlarge-data-threshold=65536',
                           '-mno-sse4',
                           '-mpush-args',
                           '-mred-zone',
diff --git a/meta/Velocity gradient.ipynb b/meta/Velocity gradient.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..deea6cb898882ec954e5ecf9b6ca42ba7214172a
--- /dev/null
+++ b/meta/Velocity gradient.ipynb	
@@ -0,0 +1,203 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-Axx**2/2 - Axy*Ayx - Axz*Azx - Ayy**2/2 - Ayz*Azy - Azz**2/2\n",
+      "-Axx*(Axx**2/3 + Axy*Ayx + Axz*Azx) - Axy*(Ayx*Ayy + Ayz*Azx) - Axz*(Ayx*Azy + Azx*Azz) - Ayy*(Ayy**2/3 + Ayz*Azy) - Ayz*Azy*Azz - Azz**3/3\n",
+      "Axx**2 + Axy*(Axy/2 + Ayx) + Axz*(Axz/2 + Azx) + Ayx**2/2 + Ayy**2 + Ayz*(Ayz/2 + Azy) + Azx**2/2 + Azy**2/2 + Azz**2\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sympy as sp\n",
+    "\n",
+    "A = []\n",
+    "for deriv in ['x', 'y', 'z']:\n",
+    "    A.append([])\n",
+    "    for field in ['x', 'y', 'z']:\n",
+    "        A[-1].append(sp.Symbol('A' + deriv + field))\n",
+    "\n",
+    "A = sp.Matrix(A)\n",
+    "\n",
+    "A2 = A**2\n",
+    "A3 = A**3\n",
+    "Q = -sp.horner(sp.simplify(sum(A2[i, i] for i in range(3))/2))\n",
+    "R = -sp.horner(sp.simplify(sum(A3[i, i] for i in range(3))/3))\n",
+    "print(Q)\n",
+    "print(R)\n",
+    "\n",
+    "S = (A + A.T)/2\n",
+    "S2 = S**2\n",
+    "trS2 = sp.horner(sp.simplify(sum(S2[i, i] for i in range(3))))\n",
+    "print(trS2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-Axx**2/2 - Axy*Ayx - Axz*Azx - Ayy**2/2 - Ayz*Azy - Azz**2/2\n",
+      "3*DIV + 3*MUL + NEG + 3*POW + 5*SUB\n",
+      "-Axx**3/3 - Axx*Axy*Ayx - Axx*Axz*Azx - Axy*Ayx*Ayy - Axy*Ayz*Azx - Axz*Ayx*Azy - Axz*Azx*Azz - Ayy**3/3 - Ayy*Ayz*Azy - Ayz*Azy*Azz - Azz**3/3\n",
+      "3*DIV + 16*MUL + NEG + 3*POW + 10*SUB\n"
+     ]
+    }
+   ],
+   "source": [
+    "def alt_measure(expr):\n",
+    "    POW = sp.Symbol('POW')\n",
+    "    count = sp.count_ops(expr, visual=True).subs(POW, 10)\n",
+    "    count = count.replace(sp.Symbol, type(sp.S.One))\n",
+    "    return count\n",
+    "\n",
+    "Qalt = sp.simplify(Q, measure = alt_measure)\n",
+    "print(Qalt)\n",
+    "print(sp.count_ops(Qalt, visual=True))\n",
+    "Ralt = sp.simplify(R, measure = alt_measure)\n",
+    "print(Ralt)\n",
+    "print(sp.count_ops(Ralt, visual=True))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-Axx**2/2 - Axy*Ayx - Axz*Azx - Ayy**2/2 - Ayz*Azy - Azz**2/2\n",
+      "0\n"
+     ]
+    }
+   ],
+   "source": [
+    "Qalt = - sum(A[i, (i+1)%3] * A[(i+1)%3, i] for i in range(3)) - sum(A[i, i]**2 for i in range(3))/2\n",
+    "print Qalt\n",
+    "print(sp.simplify(Qalt - Q))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-Axx*(Axx**2/3 + Axy*Ayx + Axz*Azx) - Axy*Ayz*Azx - Axz*Ayx*Azy - Ayy*(Axy*Ayx + Ayy**2/3 + Ayz*Azy) - Azz*(Axz*Azx + Ayz*Azy + Azz**2/3)\n",
+      "6*ADD + 3*DIV + 13*MUL + NEG + 3*POW + 4*SUB\n",
+      "0\n"
+     ]
+    }
+   ],
+   "source": [
+    "Ralt = - (sum(A[i, i]*(A[i, i]**2/3 + sum(A[i, (i+j)%3]*A[(i+j)%3, i]\n",
+    "                                          for j in range(1, 3)))\n",
+    "              for i in range(3)) +\n",
+    "          A[0, 1]*A[1, 2]*A[2, 0] + A[0, 2]*A[1, 0]*A[2, 1])\n",
+    "print Ralt\n",
+    "print(sp.count_ops(Ralt, visual=True))\n",
+    "print(sp.simplify(Ralt - R))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0\n",
+      "0\n",
+      "0\n",
+      "-Axx*(Axx**2/3 + Axy*Ayx + Axz*Azx) - Axy*Ayz*Azx - Axz*Ayx*Azy - Ayy*(Axy*Ayx + Ayy**2/3 + Ayz*Azy) - Azz*(Axz*Azx + Ayz*Azy + Azz**2/3)\n",
+      "Axx*Ayy*Azz - Axx*Ayz*Azy - Axy*Ayx*Azz + Axy*Ayz*Azx + Axz*Ayx*Azy - Axz*Ayy*Azx\n"
+     ]
+    }
+   ],
+   "source": [
+    "AxxAxx = A[0, 0]**2\n",
+    "AyyAyy = A[1, 1]**2\n",
+    "AzzAzz = A[2, 2]**2\n",
+    "AxyAyx = A[0, 1]*A[1, 0]\n",
+    "AyzAzy = A[1, 2]*A[2, 1]\n",
+    "AzxAxz = A[2, 0]*A[0, 2]\n",
+    "\n",
+    "Qalt = - (AxxAxx + AyyAyy + AzzAzz)/2 - AxyAyx -AyzAzy - AzxAxz\n",
+    "print(sp.simplify(Qalt - Q))\n",
+    "Ralt = - (A[0, 0]*(AxxAxx/3 + AxyAyx + AzxAxz) +\n",
+    "          A[1, 1]*(AyyAyy/3 + AxyAyx + AyzAzy) +\n",
+    "          A[2, 2]*(AzzAzz/3 + AzxAxz + AyzAzy) +\n",
+    "          A[0, 1]*A[1, 2]*A[2, 0] +\n",
+    "          A[0, 2]*A[1, 0]*A[2, 1])\n",
+    "print sp.simplify(Ralt - R)\n",
+    "#print sp.simplify(Ralt + A.det())\n",
+    "trS2alt = (AxxAxx + AyyAyy + AzzAzz +\n",
+    "           ((A[0, 1] + A[1, 0])**2 +\n",
+    "            (A[1, 2] + A[2, 1])**2 +\n",
+    "            (A[2, 0] + A[0, 2])**2)/2)\n",
+    "print sp.simplify(trS2alt - trS2)\n",
+    "\n",
+    "print Ralt\n",
+    "print A.det()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/meta/smooth dealiasing.ipynb b/meta/smooth dealiasing.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..664bc0bf5614553854581bd27524007ff9467434
--- /dev/null
+++ b/meta/smooth dealiasing.ipynb	
@@ -0,0 +1,804 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/javascript": [
+       "/* Put everything inside the global mpl namespace */\n",
+       "window.mpl = {};\n",
+       "\n",
+       "mpl.get_websocket_type = function() {\n",
+       "    if (typeof(WebSocket) !== 'undefined') {\n",
+       "        return WebSocket;\n",
+       "    } else if (typeof(MozWebSocket) !== 'undefined') {\n",
+       "        return MozWebSocket;\n",
+       "    } else {\n",
+       "        alert('Your browser does not have WebSocket support.' +\n",
+       "              'Please try Chrome, Safari or Firefox ≥ 6. ' +\n",
+       "              'Firefox 4 and 5 are also supported but you ' +\n",
+       "              'have to enable WebSockets in about:config.');\n",
+       "    };\n",
+       "}\n",
+       "\n",
+       "mpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n",
+       "    this.id = figure_id;\n",
+       "\n",
+       "    this.ws = websocket;\n",
+       "\n",
+       "    this.supports_binary = (this.ws.binaryType != undefined);\n",
+       "\n",
+       "    if (!this.supports_binary) {\n",
+       "        var warnings = document.getElementById(\"mpl-warnings\");\n",
+       "        if (warnings) {\n",
+       "            warnings.style.display = 'block';\n",
+       "            warnings.textContent = (\n",
+       "                \"This browser does not support binary websocket messages. \" +\n",
+       "                    \"Performance may be slow.\");\n",
+       "        }\n",
+       "    }\n",
+       "\n",
+       "    this.imageObj = new Image();\n",
+       "\n",
+       "    this.context = undefined;\n",
+       "    this.message = undefined;\n",
+       "    this.canvas = undefined;\n",
+       "    this.rubberband_canvas = undefined;\n",
+       "    this.rubberband_context = undefined;\n",
+       "    this.format_dropdown = undefined;\n",
+       "\n",
+       "    this.image_mode = 'full';\n",
+       "\n",
+       "    this.root = $('<div/>');\n",
+       "    this._root_extra_style(this.root)\n",
+       "    this.root.attr('style', 'display: inline-block');\n",
+       "\n",
+       "    $(parent_element).append(this.root);\n",
+       "\n",
+       "    this._init_header(this);\n",
+       "    this._init_canvas(this);\n",
+       "    this._init_toolbar(this);\n",
+       "\n",
+       "    var fig = this;\n",
+       "\n",
+       "    this.waiting = false;\n",
+       "\n",
+       "    this.ws.onopen =  function () {\n",
+       "            fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n",
+       "            fig.send_message(\"send_image_mode\", {});\n",
+       "            fig.send_message(\"refresh\", {});\n",
+       "        }\n",
+       "\n",
+       "    this.imageObj.onload = function() {\n",
+       "            if (fig.image_mode == 'full') {\n",
+       "                // Full images could contain transparency (where diff images\n",
+       "                // almost always do), so we need to clear the canvas so that\n",
+       "                // there is no ghosting.\n",
+       "                fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n",
+       "            }\n",
+       "            fig.context.drawImage(fig.imageObj, 0, 0);\n",
+       "            fig.waiting = false;\n",
+       "        };\n",
+       "\n",
+       "    this.imageObj.onunload = function() {\n",
+       "        this.ws.close();\n",
+       "    }\n",
+       "\n",
+       "    this.ws.onmessage = this._make_on_message_function(this);\n",
+       "\n",
+       "    this.ondownload = ondownload;\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype._init_header = function() {\n",
+       "    var titlebar = $(\n",
+       "        '<div class=\"ui-dialog-titlebar ui-widget-header ui-corner-all ' +\n",
+       "        'ui-helper-clearfix\"/>');\n",
+       "    var titletext = $(\n",
+       "        '<div class=\"ui-dialog-title\" style=\"width: 100%; ' +\n",
+       "        'text-align: center; padding: 3px;\"/>');\n",
+       "    titlebar.append(titletext)\n",
+       "    this.root.append(titlebar);\n",
+       "    this.header = titletext[0];\n",
+       "}\n",
+       "\n",
+       "\n",
+       "\n",
+       "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n",
+       "\n",
+       "}\n",
+       "\n",
+       "\n",
+       "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n",
+       "\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype._init_canvas = function() {\n",
+       "    var fig = this;\n",
+       "\n",
+       "    var canvas_div = $('<div/>');\n",
+       "\n",
+       "    canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n",
+       "\n",
+       "    function canvas_keyboard_event(event) {\n",
+       "        return fig.key_event(event, event['data']);\n",
+       "    }\n",
+       "\n",
+       "    canvas_div.keydown('key_press', canvas_keyboard_event);\n",
+       "    canvas_div.keyup('key_release', canvas_keyboard_event);\n",
+       "    this.canvas_div = canvas_div\n",
+       "    this._canvas_extra_style(canvas_div)\n",
+       "    this.root.append(canvas_div);\n",
+       "\n",
+       "    var canvas = $('<canvas/>');\n",
+       "    canvas.addClass('mpl-canvas');\n",
+       "    canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n",
+       "\n",
+       "    this.canvas = canvas[0];\n",
+       "    this.context = canvas[0].getContext(\"2d\");\n",
+       "\n",
+       "    var rubberband = $('<canvas/>');\n",
+       "    rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n",
+       "\n",
+       "    var pass_mouse_events = true;\n",
+       "\n",
+       "    canvas_div.resizable({\n",
+       "        start: function(event, ui) {\n",
+       "            pass_mouse_events = false;\n",
+       "        },\n",
+       "        resize: function(event, ui) {\n",
+       "            fig.request_resize(ui.size.width, ui.size.height);\n",
+       "        },\n",
+       "        stop: function(event, ui) {\n",
+       "            pass_mouse_events = true;\n",
+       "            fig.request_resize(ui.size.width, ui.size.height);\n",
+       "        },\n",
+       "    });\n",
+       "\n",
+       "    function mouse_event_fn(event) {\n",
+       "        if (pass_mouse_events)\n",
+       "            return fig.mouse_event(event, event['data']);\n",
+       "    }\n",
+       "\n",
+       "    rubberband.mousedown('button_press', mouse_event_fn);\n",
+       "    rubberband.mouseup('button_release', mouse_event_fn);\n",
+       "    // Throttle sequential mouse events to 1 every 20ms.\n",
+       "    rubberband.mousemove('motion_notify', mouse_event_fn);\n",
+       "\n",
+       "    rubberband.mouseenter('figure_enter', mouse_event_fn);\n",
+       "    rubberband.mouseleave('figure_leave', mouse_event_fn);\n",
+       "\n",
+       "    canvas_div.on(\"wheel\", function (event) {\n",
+       "        event = event.originalEvent;\n",
+       "        event['data'] = 'scroll'\n",
+       "        if (event.deltaY < 0) {\n",
+       "            event.step = 1;\n",
+       "        } else {\n",
+       "            event.step = -1;\n",
+       "        }\n",
+       "        mouse_event_fn(event);\n",
+       "    });\n",
+       "\n",
+       "    canvas_div.append(canvas);\n",
+       "    canvas_div.append(rubberband);\n",
+       "\n",
+       "    this.rubberband = rubberband;\n",
+       "    this.rubberband_canvas = rubberband[0];\n",
+       "    this.rubberband_context = rubberband[0].getContext(\"2d\");\n",
+       "    this.rubberband_context.strokeStyle = \"#000000\";\n",
+       "\n",
+       "    this._resize_canvas = function(width, height) {\n",
+       "        // Keep the size of the canvas, canvas container, and rubber band\n",
+       "        // canvas in synch.\n",
+       "        canvas_div.css('width', width)\n",
+       "        canvas_div.css('height', height)\n",
+       "\n",
+       "        canvas.attr('width', width);\n",
+       "        canvas.attr('height', height);\n",
+       "\n",
+       "        rubberband.attr('width', width);\n",
+       "        rubberband.attr('height', height);\n",
+       "    }\n",
+       "\n",
+       "    // Set the figure to an initial 600x600px, this will subsequently be updated\n",
+       "    // upon first draw.\n",
+       "    this._resize_canvas(600, 600);\n",
+       "\n",
+       "    // Disable right mouse context menu.\n",
+       "    $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n",
+       "        return false;\n",
+       "    });\n",
+       "\n",
+       "    function set_focus () {\n",
+       "        canvas.focus();\n",
+       "        canvas_div.focus();\n",
+       "    }\n",
+       "\n",
+       "    window.setTimeout(set_focus, 100);\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype._init_toolbar = function() {\n",
+       "    var fig = this;\n",
+       "\n",
+       "    var nav_element = $('<div/>')\n",
+       "    nav_element.attr('style', 'width: 100%');\n",
+       "    this.root.append(nav_element);\n",
+       "\n",
+       "    // Define a callback function for later on.\n",
+       "    function toolbar_event(event) {\n",
+       "        return fig.toolbar_button_onclick(event['data']);\n",
+       "    }\n",
+       "    function toolbar_mouse_event(event) {\n",
+       "        return fig.toolbar_button_onmouseover(event['data']);\n",
+       "    }\n",
+       "\n",
+       "    for(var toolbar_ind in mpl.toolbar_items) {\n",
+       "        var name = mpl.toolbar_items[toolbar_ind][0];\n",
+       "        var tooltip = mpl.toolbar_items[toolbar_ind][1];\n",
+       "        var image = mpl.toolbar_items[toolbar_ind][2];\n",
+       "        var method_name = mpl.toolbar_items[toolbar_ind][3];\n",
+       "\n",
+       "        if (!name) {\n",
+       "            // put a spacer in here.\n",
+       "            continue;\n",
+       "        }\n",
+       "        var button = $('<button/>');\n",
+       "        button.addClass('ui-button ui-widget ui-state-default ui-corner-all ' +\n",
+       "                        'ui-button-icon-only');\n",
+       "        button.attr('role', 'button');\n",
+       "        button.attr('aria-disabled', 'false');\n",
+       "        button.click(method_name, toolbar_event);\n",
+       "        button.mouseover(tooltip, toolbar_mouse_event);\n",
+       "\n",
+       "        var icon_img = $('<span/>');\n",
+       "        icon_img.addClass('ui-button-icon-primary ui-icon');\n",
+       "        icon_img.addClass(image);\n",
+       "        icon_img.addClass('ui-corner-all');\n",
+       "\n",
+       "        var tooltip_span = $('<span/>');\n",
+       "        tooltip_span.addClass('ui-button-text');\n",
+       "        tooltip_span.html(tooltip);\n",
+       "\n",
+       "        button.append(icon_img);\n",
+       "        button.append(tooltip_span);\n",
+       "\n",
+       "        nav_element.append(button);\n",
+       "    }\n",
+       "\n",
+       "    var fmt_picker_span = $('<span/>');\n",
+       "\n",
+       "    var fmt_picker = $('<select/>');\n",
+       "    fmt_picker.addClass('mpl-toolbar-option ui-widget ui-widget-content');\n",
+       "    fmt_picker_span.append(fmt_picker);\n",
+       "    nav_element.append(fmt_picker_span);\n",
+       "    this.format_dropdown = fmt_picker[0];\n",
+       "\n",
+       "    for (var ind in mpl.extensions) {\n",
+       "        var fmt = mpl.extensions[ind];\n",
+       "        var option = $(\n",
+       "            '<option/>', {selected: fmt === mpl.default_extension}).html(fmt);\n",
+       "        fmt_picker.append(option)\n",
+       "    }\n",
+       "\n",
+       "    // Add hover states to the ui-buttons\n",
+       "    $( \".ui-button\" ).hover(\n",
+       "        function() { $(this).addClass(\"ui-state-hover\");},\n",
+       "        function() { $(this).removeClass(\"ui-state-hover\");}\n",
+       "    );\n",
+       "\n",
+       "    var status_bar = $('<span class=\"mpl-message\"/>');\n",
+       "    nav_element.append(status_bar);\n",
+       "    this.message = status_bar[0];\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype.request_resize = function(x_pixels, y_pixels) {\n",
+       "    // Request matplotlib to resize the figure. Matplotlib will then trigger a resize in the client,\n",
+       "    // which will in turn request a refresh of the image.\n",
+       "    this.send_message('resize', {'width': x_pixels, 'height': y_pixels});\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype.send_message = function(type, properties) {\n",
+       "    properties['type'] = type;\n",
+       "    properties['figure_id'] = this.id;\n",
+       "    this.ws.send(JSON.stringify(properties));\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype.send_draw_message = function() {\n",
+       "    if (!this.waiting) {\n",
+       "        this.waiting = true;\n",
+       "        this.ws.send(JSON.stringify({type: \"draw\", figure_id: this.id}));\n",
+       "    }\n",
+       "}\n",
+       "\n",
+       "\n",
+       "mpl.figure.prototype.handle_save = function(fig, msg) {\n",
+       "    var format_dropdown = fig.format_dropdown;\n",
+       "    var format = format_dropdown.options[format_dropdown.selectedIndex].value;\n",
+       "    fig.ondownload(fig, format);\n",
+       "}\n",
+       "\n",
+       "\n",
+       "mpl.figure.prototype.handle_resize = function(fig, msg) {\n",
+       "    var size = msg['size'];\n",
+       "    if (size[0] != fig.canvas.width || size[1] != fig.canvas.height) {\n",
+       "        fig._resize_canvas(size[0], size[1]);\n",
+       "        fig.send_message(\"refresh\", {});\n",
+       "    };\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype.handle_rubberband = function(fig, msg) {\n",
+       "    var x0 = msg['x0'];\n",
+       "    var y0 = fig.canvas.height - msg['y0'];\n",
+       "    var x1 = msg['x1'];\n",
+       "    var y1 = fig.canvas.height - msg['y1'];\n",
+       "    x0 = Math.floor(x0) + 0.5;\n",
+       "    y0 = Math.floor(y0) + 0.5;\n",
+       "    x1 = Math.floor(x1) + 0.5;\n",
+       "    y1 = Math.floor(y1) + 0.5;\n",
+       "    var min_x = Math.min(x0, x1);\n",
+       "    var min_y = Math.min(y0, y1);\n",
+       "    var width = Math.abs(x1 - x0);\n",
+       "    var height = Math.abs(y1 - y0);\n",
+       "\n",
+       "    fig.rubberband_context.clearRect(\n",
+       "        0, 0, fig.canvas.width, fig.canvas.height);\n",
+       "\n",
+       "    fig.rubberband_context.strokeRect(min_x, min_y, width, height);\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype.handle_figure_label = function(fig, msg) {\n",
+       "    // Updates the figure title.\n",
+       "    fig.header.textContent = msg['label'];\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype.handle_cursor = function(fig, msg) {\n",
+       "    var cursor = msg['cursor'];\n",
+       "    switch(cursor)\n",
+       "    {\n",
+       "    case 0:\n",
+       "        cursor = 'pointer';\n",
+       "        break;\n",
+       "    case 1:\n",
+       "        cursor = 'default';\n",
+       "        break;\n",
+       "    case 2:\n",
+       "        cursor = 'crosshair';\n",
+       "        break;\n",
+       "    case 3:\n",
+       "        cursor = 'move';\n",
+       "        break;\n",
+       "    }\n",
+       "    fig.rubberband_canvas.style.cursor = cursor;\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype.handle_message = function(fig, msg) {\n",
+       "    fig.message.textContent = msg['message'];\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype.handle_draw = function(fig, msg) {\n",
+       "    // Request the server to send over a new figure.\n",
+       "    fig.send_draw_message();\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype.handle_image_mode = function(fig, msg) {\n",
+       "    fig.image_mode = msg['mode'];\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype.updated_canvas_event = function() {\n",
+       "    // Called whenever the canvas gets updated.\n",
+       "    this.send_message(\"ack\", {});\n",
+       "}\n",
+       "\n",
+       "// A function to construct a web socket function for onmessage handling.\n",
+       "// Called in the figure constructor.\n",
+       "mpl.figure.prototype._make_on_message_function = function(fig) {\n",
+       "    return function socket_on_message(evt) {\n",
+       "        if (evt.data instanceof Blob) {\n",
+       "            /* FIXME: We get \"Resource interpreted as Image but\n",
+       "             * transferred with MIME type text/plain:\" errors on\n",
+       "             * Chrome.  But how to set the MIME type?  It doesn't seem\n",
+       "             * to be part of the websocket stream */\n",
+       "            evt.data.type = \"image/png\";\n",
+       "\n",
+       "            /* Free the memory for the previous frames */\n",
+       "            if (fig.imageObj.src) {\n",
+       "                (window.URL || window.webkitURL).revokeObjectURL(\n",
+       "                    fig.imageObj.src);\n",
+       "            }\n",
+       "\n",
+       "            fig.imageObj.src = (window.URL || window.webkitURL).createObjectURL(\n",
+       "                evt.data);\n",
+       "            fig.updated_canvas_event();\n",
+       "            return;\n",
+       "        }\n",
+       "        else if (typeof evt.data === 'string' && evt.data.slice(0, 21) == \"data:image/png;base64\") {\n",
+       "            fig.imageObj.src = evt.data;\n",
+       "            fig.updated_canvas_event();\n",
+       "            return;\n",
+       "        }\n",
+       "\n",
+       "        var msg = JSON.parse(evt.data);\n",
+       "        var msg_type = msg['type'];\n",
+       "\n",
+       "        // Call the  \"handle_{type}\" callback, which takes\n",
+       "        // the figure and JSON message as its only arguments.\n",
+       "        try {\n",
+       "            var callback = fig[\"handle_\" + msg_type];\n",
+       "        } catch (e) {\n",
+       "            console.log(\"No handler for the '\" + msg_type + \"' message type: \", msg);\n",
+       "            return;\n",
+       "        }\n",
+       "\n",
+       "        if (callback) {\n",
+       "            try {\n",
+       "                // console.log(\"Handling '\" + msg_type + \"' message: \", msg);\n",
+       "                callback(fig, msg);\n",
+       "            } catch (e) {\n",
+       "                console.log(\"Exception inside the 'handler_\" + msg_type + \"' callback:\", e, e.stack, msg);\n",
+       "            }\n",
+       "        }\n",
+       "    };\n",
+       "}\n",
+       "\n",
+       "// from http://stackoverflow.com/questions/1114465/getting-mouse-location-in-canvas\n",
+       "mpl.findpos = function(e) {\n",
+       "    //this section is from http://www.quirksmode.org/js/events_properties.html\n",
+       "    var targ;\n",
+       "    if (!e)\n",
+       "        e = window.event;\n",
+       "    if (e.target)\n",
+       "        targ = e.target;\n",
+       "    else if (e.srcElement)\n",
+       "        targ = e.srcElement;\n",
+       "    if (targ.nodeType == 3) // defeat Safari bug\n",
+       "        targ = targ.parentNode;\n",
+       "\n",
+       "    // jQuery normalizes the pageX and pageY\n",
+       "    // pageX,Y are the mouse positions relative to the document\n",
+       "    // offset() returns the position of the element relative to the document\n",
+       "    var x = e.pageX - $(targ).offset().left;\n",
+       "    var y = e.pageY - $(targ).offset().top;\n",
+       "\n",
+       "    return {\"x\": x, \"y\": y};\n",
+       "};\n",
+       "\n",
+       "mpl.figure.prototype.mouse_event = function(event, name) {\n",
+       "    var canvas_pos = mpl.findpos(event)\n",
+       "\n",
+       "    if (name === 'button_press')\n",
+       "    {\n",
+       "        this.canvas.focus();\n",
+       "        this.canvas_div.focus();\n",
+       "    }\n",
+       "\n",
+       "    var x = canvas_pos.x;\n",
+       "    var y = canvas_pos.y;\n",
+       "\n",
+       "    this.send_message(name, {x: x, y: y, button: event.button,\n",
+       "                             step: event.step});\n",
+       "\n",
+       "    /* This prevents the web browser from automatically changing to\n",
+       "     * the text insertion cursor when the button is pressed.  We want\n",
+       "     * to control all of the cursor setting manually through the\n",
+       "     * 'cursor' event from matplotlib */\n",
+       "    event.preventDefault();\n",
+       "    return false;\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype._key_event_extra = function(event, name) {\n",
+       "    // Handle any extra behaviour associated with a key event\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype.key_event = function(event, name) {\n",
+       "\n",
+       "    // Prevent repeat events\n",
+       "    if (name == 'key_press')\n",
+       "    {\n",
+       "        if (event.which === this._key)\n",
+       "            return;\n",
+       "        else\n",
+       "            this._key = event.which;\n",
+       "    }\n",
+       "    if (name == 'key_release')\n",
+       "        this._key = null;\n",
+       "\n",
+       "    var value = '';\n",
+       "    if (event.ctrlKey && event.which != 17)\n",
+       "        value += \"ctrl+\";\n",
+       "    if (event.altKey && event.which != 18)\n",
+       "        value += \"alt+\";\n",
+       "    if (event.shiftKey && event.which != 16)\n",
+       "        value += \"shift+\";\n",
+       "\n",
+       "    value += 'k';\n",
+       "    value += event.which.toString();\n",
+       "\n",
+       "    this._key_event_extra(event, name);\n",
+       "\n",
+       "    this.send_message(name, {key: value});\n",
+       "    return false;\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype.toolbar_button_onclick = function(name) {\n",
+       "    if (name == 'download') {\n",
+       "        this.handle_save(this, null);\n",
+       "    } else {\n",
+       "        this.send_message(\"toolbar_button\", {name: name});\n",
+       "    }\n",
+       "};\n",
+       "\n",
+       "mpl.figure.prototype.toolbar_button_onmouseover = function(tooltip) {\n",
+       "    this.message.textContent = tooltip;\n",
+       "};\n",
+       "mpl.toolbar_items = [[\"Home\", \"Reset original view\", \"fa fa-home icon-home\", \"home\"], [\"Back\", \"Back to  previous view\", \"fa fa-arrow-left icon-arrow-left\", \"back\"], [\"Forward\", \"Forward to next view\", \"fa fa-arrow-right icon-arrow-right\", \"forward\"], [\"\", \"\", \"\", \"\"], [\"Pan\", \"Pan axes with left mouse, zoom with right\", \"fa fa-arrows icon-move\", \"pan\"], [\"Zoom\", \"Zoom to rectangle\", \"fa fa-square-o icon-check-empty\", \"zoom\"], [\"\", \"\", \"\", \"\"], [\"Download\", \"Download plot\", \"fa fa-floppy-o icon-save\", \"download\"]];\n",
+       "\n",
+       "mpl.extensions = [\"eps\", \"jpeg\", \"pdf\", \"png\", \"ps\", \"raw\", \"svg\", \"tif\"];\n",
+       "\n",
+       "mpl.default_extension = \"png\";var comm_websocket_adapter = function(comm) {\n",
+       "    // Create a \"websocket\"-like object which calls the given IPython comm\n",
+       "    // object with the appropriate methods. Currently this is a non binary\n",
+       "    // socket, so there is still some room for performance tuning.\n",
+       "    var ws = {};\n",
+       "\n",
+       "    ws.close = function() {\n",
+       "        comm.close()\n",
+       "    };\n",
+       "    ws.send = function(m) {\n",
+       "        //console.log('sending', m);\n",
+       "        comm.send(m);\n",
+       "    };\n",
+       "    // Register the callback with on_msg.\n",
+       "    comm.on_msg(function(msg) {\n",
+       "        //console.log('receiving', msg['content']['data'], msg);\n",
+       "        // Pass the mpl event to the overriden (by mpl) onmessage function.\n",
+       "        ws.onmessage(msg['content']['data'])\n",
+       "    });\n",
+       "    return ws;\n",
+       "}\n",
+       "\n",
+       "mpl.mpl_figure_comm = function(comm, msg) {\n",
+       "    // This is the function which gets called when the mpl process\n",
+       "    // starts-up an IPython Comm through the \"matplotlib\" channel.\n",
+       "\n",
+       "    var id = msg.content.data.id;\n",
+       "    // Get hold of the div created by the display call when the Comm\n",
+       "    // socket was opened in Python.\n",
+       "    var element = $(\"#\" + id);\n",
+       "    var ws_proxy = comm_websocket_adapter(comm)\n",
+       "\n",
+       "    function ondownload(figure, format) {\n",
+       "        window.open(figure.imageObj.src);\n",
+       "    }\n",
+       "\n",
+       "    var fig = new mpl.figure(id, ws_proxy,\n",
+       "                           ondownload,\n",
+       "                           element.get(0));\n",
+       "\n",
+       "    // Call onopen now - mpl needs it, as it is assuming we've passed it a real\n",
+       "    // web socket which is closed, not our websocket->open comm proxy.\n",
+       "    ws_proxy.onopen();\n",
+       "\n",
+       "    fig.parent_element = element.get(0);\n",
+       "    fig.cell_info = mpl.find_output_cell(\"<div id='\" + id + \"'></div>\");\n",
+       "    if (!fig.cell_info) {\n",
+       "        console.error(\"Failed to find cell for figure\", id, fig);\n",
+       "        return;\n",
+       "    }\n",
+       "\n",
+       "    var output_index = fig.cell_info[2]\n",
+       "    var cell = fig.cell_info[0];\n",
+       "\n",
+       "};\n",
+       "\n",
+       "mpl.figure.prototype.handle_close = function(fig, msg) {\n",
+       "    // Update the output cell to use the data from the current canvas.\n",
+       "    fig.push_to_output();\n",
+       "    var dataURL = fig.canvas.toDataURL();\n",
+       "    // Re-enable the keyboard manager in IPython - without this line, in FF,\n",
+       "    // the notebook keyboard shortcuts fail.\n",
+       "    IPython.keyboard_manager.enable()\n",
+       "    $(fig.parent_element).html('<img src=\"' + dataURL + '\">');\n",
+       "    fig.send_message('closing', {});\n",
+       "    fig.ws.close()\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype.push_to_output = function(remove_interactive) {\n",
+       "    // Turn the data on the canvas into data in the output cell.\n",
+       "    var dataURL = this.canvas.toDataURL();\n",
+       "    this.cell_info[1]['text/html'] = '<img src=\"' + dataURL + '\">';\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype.updated_canvas_event = function() {\n",
+       "    // Tell IPython that the notebook contents must change.\n",
+       "    IPython.notebook.set_dirty(true);\n",
+       "    this.send_message(\"ack\", {});\n",
+       "    var fig = this;\n",
+       "    // Wait a second, then push the new image to the DOM so\n",
+       "    // that it is saved nicely (might be nice to debounce this).\n",
+       "    setTimeout(function () { fig.push_to_output() }, 1000);\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype._init_toolbar = function() {\n",
+       "    var fig = this;\n",
+       "\n",
+       "    var nav_element = $('<div/>')\n",
+       "    nav_element.attr('style', 'width: 100%');\n",
+       "    this.root.append(nav_element);\n",
+       "\n",
+       "    // Define a callback function for later on.\n",
+       "    function toolbar_event(event) {\n",
+       "        return fig.toolbar_button_onclick(event['data']);\n",
+       "    }\n",
+       "    function toolbar_mouse_event(event) {\n",
+       "        return fig.toolbar_button_onmouseover(event['data']);\n",
+       "    }\n",
+       "\n",
+       "    for(var toolbar_ind in mpl.toolbar_items){\n",
+       "        var name = mpl.toolbar_items[toolbar_ind][0];\n",
+       "        var tooltip = mpl.toolbar_items[toolbar_ind][1];\n",
+       "        var image = mpl.toolbar_items[toolbar_ind][2];\n",
+       "        var method_name = mpl.toolbar_items[toolbar_ind][3];\n",
+       "\n",
+       "        if (!name) { continue; };\n",
+       "\n",
+       "        var button = $('<button class=\"btn btn-default\" href=\"#\" title=\"' + name + '\"><i class=\"fa ' + image + ' fa-lg\"></i></button>');\n",
+       "        button.click(method_name, toolbar_event);\n",
+       "        button.mouseover(tooltip, toolbar_mouse_event);\n",
+       "        nav_element.append(button);\n",
+       "    }\n",
+       "\n",
+       "    // Add the status bar.\n",
+       "    var status_bar = $('<span class=\"mpl-message\" style=\"text-align:right; float: right;\"/>');\n",
+       "    nav_element.append(status_bar);\n",
+       "    this.message = status_bar[0];\n",
+       "\n",
+       "    // Add the close button to the window.\n",
+       "    var buttongrp = $('<div class=\"btn-group inline pull-right\"></div>');\n",
+       "    var button = $('<button class=\"btn btn-mini btn-danger\" href=\"#\" title=\"Close figure\"><i class=\"fa fa-times icon-remove icon-large\"></i></button>');\n",
+       "    button.click(function (evt) { fig.handle_close(fig, {}); } );\n",
+       "    button.mouseover('Close figure', toolbar_mouse_event);\n",
+       "    buttongrp.append(button);\n",
+       "    var titlebar = this.root.find($('.ui-dialog-titlebar'));\n",
+       "    titlebar.prepend(buttongrp);\n",
+       "}\n",
+       "\n",
+       "\n",
+       "mpl.figure.prototype._canvas_extra_style = function(el){\n",
+       "    // this is important to make the div 'focusable\n",
+       "    el.attr('tabindex', 0)\n",
+       "    // reach out to IPython and tell the keyboard manager to turn it's self\n",
+       "    // off when our div gets focus\n",
+       "\n",
+       "    // location in version 3\n",
+       "    if (IPython.notebook.keyboard_manager) {\n",
+       "        IPython.notebook.keyboard_manager.register_events(el);\n",
+       "    }\n",
+       "    else {\n",
+       "        // location in version 2\n",
+       "        IPython.keyboard_manager.register_events(el);\n",
+       "    }\n",
+       "\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype._key_event_extra = function(event, name) {\n",
+       "    var manager = IPython.notebook.keyboard_manager;\n",
+       "    if (!manager)\n",
+       "        manager = IPython.keyboard_manager;\n",
+       "\n",
+       "    // Check for shift+enter\n",
+       "    if (event.shiftKey && event.which == 13) {\n",
+       "        this.canvas_div.blur();\n",
+       "        event.shiftKey = false;\n",
+       "        // Send a \"J\" for go to next cell\n",
+       "        event.which = 74;\n",
+       "        event.keyCode = 74;\n",
+       "        manager.command_mode();\n",
+       "        manager.handle_keydown(event);\n",
+       "    }\n",
+       "}\n",
+       "\n",
+       "mpl.figure.prototype.handle_save = function(fig, msg) {\n",
+       "    fig.ondownload(fig, null);\n",
+       "}\n",
+       "\n",
+       "\n",
+       "mpl.find_output_cell = function(html_output) {\n",
+       "    // Return the cell and output element which can be found *uniquely* in the notebook.\n",
+       "    // Note - this is a bit hacky, but it is done because the \"notebook_saving.Notebook\"\n",
+       "    // IPython event is triggered only after the cells have been serialised, which for\n",
+       "    // our purposes (turning an active figure into a static one), is too late.\n",
+       "    var cells = IPython.notebook.get_cells();\n",
+       "    var ncells = cells.length;\n",
+       "    for (var i=0; i<ncells; i++) {\n",
+       "        var cell = cells[i];\n",
+       "        if (cell.cell_type === 'code'){\n",
+       "            for (var j=0; j<cell.output_area.outputs.length; j++) {\n",
+       "                var data = cell.output_area.outputs[j];\n",
+       "                if (data.data) {\n",
+       "                    // IPython >= 3 moved mimebundle to data attribute of output\n",
+       "                    data = data.data;\n",
+       "                }\n",
+       "                if (data['text/html'] == html_output) {\n",
+       "                    return [cell, data, j];\n",
+       "                }\n",
+       "            }\n",
+       "        }\n",
+       "    }\n",
+       "}\n",
+       "\n",
+       "// Register the function which deals with the matplotlib target/channel.\n",
+       "// The kernel may be null if the page has been refreshed.\n",
+       "if (IPython.notebook.kernel != null) {\n",
+       "    IPython.notebook.kernel.comm_manager.register_target('matplotlib', mpl.mpl_figure_comm);\n",
+       "}\n"
+      ],
+      "text/plain": [
+       "<IPython.core.display.Javascript object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<img src=\"\">"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "%matplotlib nbagg\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "# the plot says a an effective kM = 0.8 is reasonable\n",
+    "k = np.arange(0, 2.**10)\n",
+    "kM = k[-1]\n",
+    "fig = plt.figure(figsize=(4,3))\n",
+    "a = fig.add_subplot(111)\n",
+    "a.plot(k/kM, np.exp(-36*(k / kM)**32))\n",
+    "fig.tight_layout()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.4.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/setup.py b/setup.py
index 76de4ad3f5e3122ad46e3bd3c6702aeb5a47f83a..3fdf80d57f41f6620de62d62ce25c00f6603e3b4 100644
--- a/setup.py
+++ b/setup.py
@@ -31,26 +31,48 @@ import pickle
 AUTHOR = 'Cristian C Lalescu'
 AUTHOR_EMAIL = 'Cristian.Lalescu@ds.mpg.de'
 
+import os
 import datetime
 import subprocess
 from subprocess import CalledProcessError
 
 now = datetime.datetime.now()
-date_name = '{0:0>4}{1:0>2}{2:0>2}'.format(now.year, now.month, now.day)
 
 try:
+    git_branch = subprocess.check_output(['git',
+                                          'rev-parse',
+                                          '--abbrev-ref',
+                                          'HEAD']).strip().split()[-1].decode()
     git_revision = subprocess.check_output(['git', 'rev-parse', 'HEAD']).strip()
+    git_date = datetime.datetime.fromtimestamp(int(subprocess.check_output(['git', 'log', '-1', '--format=%ct']).strip()))
 except:
     git_revision = ''
-
-VERSION = date_name
+    git_branch = ''
+    git_date = now
+
+if git_branch == '':
+    # there's no git available or something
+    VERSION = '{0:0>4}{1:0>2}{2:0>2}.{3:0>2}{4:0>2}{5:0>2}'.format(
+                git_date.year, git_date.month, git_date.day,
+                git_date.hour, git_date.minute, git_date.second)
+else:
+    if (('develop' in git_branch) or
+        ('feature' in git_branch) or
+        ('bugfix'  in git_branch)):
+        VERSION = subprocess.check_output(['git', 'describe', '--tags']).strip().decode()
+    else:
+        VERSION = subprocess.check_output(['git', 'describe', '--tags']).strip().decode().split('-')[0]
+
+print(VERSION)
 
 src_file_list = ['field_descriptor',
+                 'fluid_solver_base',
+                 'fluid_solver',
                  'interpolator',
+                 'rFFTW_interpolator',
                  'particles',
+                 'rFFTW_particles',
                  'fftw_tools',
-                 'fluid_solver_base',
-                 'fluid_solver',
                  'spline_n1',
                  'spline_n2',
                  'spline_n3',
@@ -61,10 +83,9 @@ src_file_list = ['field_descriptor',
 
 header_list = ['cpp/base.hpp'] + ['cpp/' + fname + '.hpp' for fname in src_file_list]
 
-# not sure we need the MANIFEST.in file, but I might as well
-#with open('MANIFEST.in', 'w') as manifest_in_file:
-#    for fname in ['bfps/cpp/' + fname + '.cpp' for fname in src_file_list] + header_list:
-#        manifest_in_file.write('include {0}\n'.format(fname))
+with open('MANIFEST.in', 'w') as manifest_in_file:
+    for fname in ['bfps/cpp/' + fname + '.cpp' for fname in src_file_list] + header_list:
+        manifest_in_file.write('include {0}\n'.format(fname))
 
 libraries = ['fftw3_mpi',
              'fftw3',
@@ -83,22 +104,65 @@ pickle.dump(
         open('bfps/install_info.pickle', 'wb'),
         protocol = 2)
 
-from setuptools import setup, Extension
-
-libbfps = Extension(
-        'libbfps',
-        sources = ['bfps/cpp/' + fname + '.cpp' for fname in src_file_list],
-        include_dirs = include_dirs,
-        libraries = libraries,
-        extra_compile_args = extra_compile_args,
-        library_dirs = library_dirs)
+def compile_bfps_library():
+    if not os.path.isdir('obj'):
+        os.makedirs('obj')
+        need_to_compile = True
+    else:
+        ofile = 'bfps/libbfps.a'
+        libtime = datetime.datetime.fromtimestamp(os.path.getctime(ofile))
+        latest = libtime
+        for fname in header_list:
+            latest = max(latest,
+                         datetime.datetime.fromtimestamp(os.path.getctime('bfps/' + fname)))
+        need_to_compile = (latest > libtime)
+    for fname in src_file_list:
+        ifile = 'bfps/cpp/' + fname + '.cpp'
+        ofile = 'obj/' + fname + '.o'
+        if not os.path.exists(ofile):
+            need_to_compile_file = True
+        else:
+            need_to_compile_file = (need_to_compile or
+                                    (datetime.datetime.fromtimestamp(os.path.getctime(ofile)) <
+                                     datetime.datetime.fromtimestamp(os.path.getctime(ifile))))
+        if need_to_compile_file:
+            command_strings = ['g++', '-c']
+            command_strings += ['bfps/cpp/' + fname + '.cpp']
+            command_strings += ['-o', 'obj/' + fname + '.o']
+            command_strings += extra_compile_args
+            command_strings += ['-I' + idir for idir in include_dirs]
+            command_strings.append('-Ibfps/cpp/')
+            print(' '.join(command_strings))
+            assert(subprocess.call(command_strings) == 0)
+    command_strings = ['ar', 'rvs', 'bfps/libbfps.a']
+    command_strings += ['obj/' + fname + '.o' for fname in src_file_list]
+    print(' '.join(command_strings))
+    assert(subprocess.call(command_strings) == 0)
+    return None
+
+from distutils.command.build import build as DistutilsBuild
+from distutils.command.install import install as DistutilsInstall
+
+class CustomBuild(DistutilsBuild):
+    def run(self):
+        compile_bfps_library()
+        DistutilsBuild.run(self)
+
+# this custom install leads to a broken installation. no idea why...
+class CustomInstall(DistutilsInstall):
+    def run(self):
+        compile_bfps_library()
+        DistutilsInstall.run(self)
+
+from setuptools import setup
 
 setup(
         name = 'bfps',
         packages = ['bfps'],
         install_requires = ['numpy>=1.8', 'h5py>=2.2.1'],
-        ext_modules = [libbfps],
+        cmdclass={'build' : CustomBuild},
         package_data = {'bfps': header_list + ['../machine_settings.py',
+                                               'libbfps.a',
                                                'install_info.pickle']},
 ########################################################################
 # useless stuff folows
diff --git a/tests/test_base.py b/tests/test_base.py
index 97950f010d5ab5e39f538a4aedf1b53c5a2d5c47..3be7d2f5dfdbcca79f0615fde141f340e8bd5be8 100755
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -40,6 +40,11 @@ parser.add_argument('--neighbours',
         type = int, dest = 'neighbours', default = 3)
 parser.add_argument('--smoothness',
         type = int, dest = 'smoothness', default = 2)
+parser.add_argument(
+        '--kMeta',
+        type = float,
+        dest = 'kMeta',
+        default = 2.0)
 
 def double(opt):
     old_simname = 'N{0:0>3x}'.format(opt.n)
@@ -77,23 +82,26 @@ def launch(
             fluid_precision = opt.precision,
             frozen_fields = opt.frozen,
             use_fftw_wisdom = False)
+    if code_class == bfps.NavierStokes:
+        c.QR_stats_on = True
     c.pars_from_namespace(opt)
     c.parameters['nx'] = opt.n
     c.parameters['ny'] = opt.n
     c.parameters['nz'] = opt.n
-    if type(nu) == type(None):
-        c.parameters['nu'] = 5.5*opt.n**(-4./3)
-    else:
-        c.parameters['nu'] = nu
+    c.parameters['nu'] = (opt.kMeta * 2 / opt.n)**(4./3)
     if type(dt) == type(None):
-        c.parameters['dt'] = .4 / opt.n
+        c.parameters['dt'] = (0.5 / opt.n)
     else:
         c.parameters['dt'] = dt
     c.parameters['niter_out'] = c.parameters['niter_todo']
     c.parameters['niter_part'] = 1
     c.parameters['famplitude'] = 0.2
+    c.fill_up_fluid_code()
     if c.parameters['nparticles'] > 0:
-        c.add_particle_fields(name = 'regular', neighbours = opt.neighbours, smoothness = opt.smoothness)
+        c.add_particle_fields(
+                name = 'regular',
+                neighbours = opt.neighbours,
+                smoothness = opt.smoothness)
         c.add_particle_fields(kcut = 'fs->kM/2', name = 'filtered', neighbours = opt.neighbours)
         c.add_particles(
                 kcut = 'fs->kM/2',
@@ -105,16 +113,14 @@ def launch(
         #            neighbours = opt.neighbours,
         #            smoothness = opt.smoothness,
         #            fields_name = 'regular')
-        for info in [(2, 'Heun'),
-                     (2, 'AdamsBashforth'),
-                     (4, 'cRK4'),
+        for info in [(2, 'AdamsBashforth'),
+                     (3, 'AdamsBashforth'),
                      (4, 'AdamsBashforth'),
                      (6, 'AdamsBashforth')]:
             c.add_particles(
                     integration_steps = info[0],
                     integration_method = info[1],
                     fields_name = 'regular')
-    c.fill_up_fluid_code()
     c.finalize_code()
     c.write_src()
     c.write_par()
@@ -122,7 +128,9 @@ def launch(
     if opt.run:
         if opt.iteration == 0 and opt.initialize:
             if type(vorticity_field) == type(None):
-                c.generate_vector_field(write_to_file = True, spectra_slope = 1.5)
+                c.generate_vector_field(write_to_file = True,
+                                        spectra_slope = 2.0,
+                                        amplitude = 0.25)
             else:
                 vorticity_field.tofile(
                         os.path.join(c.work_dir,
diff --git a/tests/test_convergence.py b/tests/test_convergence.py
index 0b27504d10c275c79fabb4aa5df8fb715758a1b9..636dea9bc9487241b62166d35ac7367272e513b3 100755
--- a/tests/test_convergence.py
+++ b/tests/test_convergence.py
@@ -71,7 +71,9 @@ def convergence_test(
             code_class = code_class,
             tracer_state_file = h5py.File(os.path.join(c0.work_dir, c0.simname + '.h5'), 'r'))
     # get real space fields
-    converter = bfps.fluid_converter(fluid_precision = opt.precision)
+    converter = bfps.fluid_converter(
+            fluid_precision = opt.precision,
+            use_fftw_wisdom = False)
     converter.write_src()
     converter.set_host_info({'type' : 'pc'})
     for c in [c0, c1, c2]:
diff --git a/tests/test_plain.py b/tests/test_plain.py
index 596eb0e9323c81e4d0c8c504b093aa2b1c95b0f9..7437f1f61c9694c0e8828b4946c54e840a22461e 100755
--- a/tests/test_plain.py
+++ b/tests/test_plain.py
@@ -38,7 +38,11 @@ def plain(opt):
     opt.work_dir = wd + '/N{0:0>3x}_1'.format(opt.n)
     c0 = launch(opt, dt = 0.2/opt.n)
     c0.compute_statistics()
-    df = c0.get_data_file()
+    print ('Re = {0:.0f}'.format(c0.statistics['Re']))
+    print ('Rlambda = {0:.0f}'.format(c0.statistics['Rlambda']))
+    print ('Lint = {0:.4e}, etaK = {1:.4e}'.format(c0.statistics['Lint'], c0.statistics['etaK']))
+    print ('Tint = {0:.4e}, tauK = {1:.4e}'.format(c0.statistics['Tint'], c0.statistics['tauK']))
+    print ('kMetaK = {0:.4e}'.format(c0.statistics['kMeta']))
     for s in range(c0.particle_species):
         acceleration_test(c0, species = s, m = 1)
     if not opt.multiplejob:
@@ -46,12 +50,12 @@ def plain(opt):
     assert(opt.niter_todo % 3 == 0)
     opt.work_dir = wd + '/N{0:0>3x}_2'.format(opt.n)
     opt.njobs *= 2
-    opt.niter_todo /= 2
+    opt.niter_todo = opt.niter_todo//2
     c1 = launch(opt, dt = c0.parameters['dt'])
     c1.compute_statistics()
     opt.work_dir = wd + '/N{0:0>3x}_3'.format(opt.n)
-    opt.njobs = 3*opt.njobs/2
-    opt.niter_todo = 2*opt.niter_todo/3
+    opt.njobs = 3*opt.njobs//2
+    opt.niter_todo = 2*opt.niter_todo//3
     c2 = launch(opt, dt = c0.parameters['dt'])
     c2.compute_statistics()
     # plot energy and enstrophy
diff --git a/todo.txt b/todo.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71c2677e5fa6af740a768f8fbee676c3357e1a75
--- /dev/null
+++ b/todo.txt
@@ -0,0 +1,16 @@
+(B) FFTW interpolator doesn't need its own field            @optimization +v1.0
+(B) compute z polynomials only when needed                  @optimization +v1.0
+(B) get rid of temporal interpolation                       @optimization +v1.0
+(B) move get_grid coords to interpolator                    @optimization +v1.0
+(B) use less memory                                         @optimization
+(C) clean up machine_settings mess                          @design @documentation +v2.0
+(C) code overview                                           @documentation
+(C) move stat I/O to cpp lib                                @design @HDF5
+(C) set up mechanism for adding in new PDEs                 @design +v2.0 +alternate_algorithms
+(C) test involving hydrodynamic similarity                  @tests
+(C) use HDF5 io for fields                                  @design @HDF5 +I/O
+(D) test anisotropic grids                                  @tests
+(D) test non-cubic domains                                  @tests
+(E) add u-equation algorithm for testing purposes           @tests +alternate_algorithms
+(E) pure python DNS addon: pros and cons                    @tests +alternate_algorithms
+(F) add switch to turn off simulation
diff --git a/tox_convergence.ini b/tox_convergence.ini
index a0123d4a011508ae6a25e5a7cd22d77e97241f48..39a368ff0d1954cb841cbaab41e5a8d98bb68031 100644
--- a/tox_convergence.ini
+++ b/tox_convergence.ini
@@ -1,6 +1,7 @@
 [tox]
-envlist = py27
+envlist = py34
 [testenv]
+deps = matplotlib
 whitelist_externals =
     echo
     cp
@@ -14,7 +15,7 @@ commands =
         --run \
         --initialize \
         --ncpu 2 \
-        --nparticles 32 \
+        --nparticles 1 \
         --niter_todo 16 \
         --precision single \
         --wd "data/single"
@@ -23,7 +24,7 @@ commands =
         --run \
         --initialize \
         --ncpu 2 \
-        --nparticles 32 \
+        --nparticles 1 \
         --niter_todo 16 \
         --precision double \
         --wd "data/double"
diff --git a/tox_io.ini b/tox_io.ini
index 994d35525286ef31df4a896a341f796287c3ba2e..de8c2fc5d0bb5fa7498527ecb4b96b32ca000c69 100644
--- a/tox_io.ini
+++ b/tox_io.ini
@@ -1,6 +1,7 @@
 [tox]
-envlist = py27
+envlist = py27, py34
 [testenv]
+deps = matplotlib
 whitelist_externals =
     echo
     cp
diff --git a/tox_plain.ini b/tox_plain.ini
index f01ef6218ab4ae9937535a55134439dffb4975ee..1aad555519ed22f2e3c678b96639089120dc7f8c 100644
--- a/tox_plain.ini
+++ b/tox_plain.ini
@@ -1,20 +1,31 @@
 [tox]
 envlist = py27
 [testenv]
+sitepackages = True
+#deps = matplotlib
 whitelist_externals =
     echo
     cp
+    g++
 passenv = LD_LIBRARY_PATH
 changedir =
     {envtmpdir}
 commands =
     cp -r {toxinidir}/tests {envtmpdir}
     #python tests/test_plain.py -n 256 --run --initialize --ncpu 8 --niter_todo 8 --precision single --wd "data/single"
-    python tests/test_plain.py \
-        -n 32 --run --initialize --ncpu 4 \
-        --nparticles 10 --niter_todo 24 \
-        --precision single --wd "data/single" #\
-        #--multiplejob
-    #python tests/test_plain.py -n 32 --run --initialize --multiplejob --ncpu 2 --nparticles 16 --niter_todo 64 --precision single --wd "data/single"
-    #python tests/test_plain.py -n 32 --run --initialize --multiplejob --ncpu 2 --nparticles 16 --niter_todo 64 --precision double --wd "data/double"
+    #python tests/test_plain.py \
+    #    -n 128 --run --initialize --ncpu 2 \
+    #    --nparticles 100 --niter_todo 24 \
+    #    --precision single --wd "data/single" #\
+    #    #--multiplejob
+    python tests/test_plain.py -n 32 \
+        --run --initialize --multiplejob \
+        --ncpu 2 --nparticles 16 \
+        --niter_todo 48 \
+        --precision single --wd "data/single"
+    python tests/test_plain.py -n 32 \
+        --run --initialize --multiplejob \
+        --ncpu 2 --nparticles 16 \
+        --niter_todo 48 \
+        --precision double --wd "data/double"