diff --git a/bfps/FluidConvert.py b/bfps/FluidConvert.py
index 14be9b985139fabf3b7e1cda1b5f9ee9618a8307..ca462ed3c3700455f5e3f6591003224f5c1b2452 100644
--- a/bfps/FluidConvert.py
+++ b/bfps/FluidConvert.py
@@ -43,7 +43,7 @@ class FluidConvert(_fluid_particle_base):
             work_dir = './',
             simname = 'test',
             fluid_precision = 'single',
-            use_fftw_wisdom = True):
+            use_fftw_wisdom = False):
         _fluid_particle_base.__init__(
                 self,
                 name = name + '-' + fluid_precision,
@@ -109,11 +109,13 @@ class FluidConvert(_fluid_particle_base):
                 """
         self.fluid_end += 'delete fs;\n'
         return None
-    def add_parser_arguments(
+    def specific_parser_arguments(
             self,
             parser):
-        _fluid_particle_base.add_parser_arguments(self, parser)
-        self.parameters_to_parser_arguments(parser, parameters = self.spec_parameters)
+        _fluid_particle_base.specific_parser_arguments(self, parser)
+        self.parameters_to_parser_arguments(
+                parser,
+                parameters = self.spec_parameters)
         return None
     def launch(
             self,
@@ -125,13 +127,13 @@ class FluidConvert(_fluid_particle_base):
         self.pars_from_namespace(
                 opt,
                 parameters = self.spec_parameters)
-        self.set_host_info(bfps.host_info)
         self.rewrite_par(
                 group = 'conversion_parameters',
                 parameters = self.spec_parameters)
-        self.run(
-                ncpu = opt.ncpu,
-                err_file = 'err_convert',
-                out_file = 'out_convert')
+        self.run(ncpu = opt.ncpu,
+                 hours = opt.minutes // 60,
+                 minutes = opt.minutes % 60,
+                 err_file = 'err_convert',
+                 out_file = 'out_convert')
         return None
 
diff --git a/bfps/NSVorticityEquation.py b/bfps/NSVorticityEquation.py
new file mode 100644
index 0000000000000000000000000000000000000000..36ccfbc349a84986b33b84c7fa50cb42d4937b4b
--- /dev/null
+++ b/bfps/NSVorticityEquation.py
@@ -0,0 +1,610 @@
+#######################################################################
+#                                                                     #
+#  Copyright 2015 Max Planck Institute                                #
+#                 for Dynamics and Self-Organization                  #
+#                                                                     #
+#  This file is part of bfps.                                         #
+#                                                                     #
+#  bfps is free software: you can redistribute it and/or modify       #
+#  it under the terms of the GNU General Public License as published  #
+#  by the Free Software Foundation, either version 3 of the License,  #
+#  or (at your option) any later version.                             #
+#                                                                     #
+#  bfps is distributed in the hope that it will be useful,            #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
+#  GNU General Public License for more details.                       #
+#                                                                     #
+#  You should have received a copy of the GNU General Public License  #
+#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
+#                                                                     #
+# Contact: Cristian.Lalescu@ds.mpg.de                                 #
+#                                                                     #
+#######################################################################
+
+
+
+import sys
+import os
+import numpy as np
+import h5py
+import argparse
+
+import bfps
+import bfps.tools
+from bfps._code import _code
+from bfps._fluid_base import _fluid_particle_base
+
+class NSVorticityEquation(_fluid_particle_base):
+    def __init__(
+            self,
+            name = 'NSVE-v' + bfps.__version__,
+            work_dir = './',
+            simname = 'test',
+            fluid_precision = 'single',
+            fftw_plan_rigor = 'FFTW_MEASURE',
+            use_fftw_wisdom = True):
+        self.fftw_plan_rigor = fftw_plan_rigor
+        _fluid_particle_base.__init__(
+                self,
+                name = name + '-' + fluid_precision,
+                work_dir = work_dir,
+                simname = simname,
+                dtype = fluid_precision,
+                use_fftw_wisdom = use_fftw_wisdom)
+        self.parameters['nu'] = 0.1
+        self.parameters['fmode'] = 1
+        self.parameters['famplitude'] = 0.5
+        self.parameters['fk0'] = 2.0
+        self.parameters['fk1'] = 4.0
+        self.parameters['forcing_type'] = 'linear'
+        self.parameters['histogram_bins'] = 256
+        self.parameters['max_velocity_estimate'] = 1.0
+        self.parameters['max_vorticity_estimate'] = 1.0
+        self.file_datasets_grow = """
+                //begincpp
+                hid_t group;
+                group = H5Gopen(stat_file, "/statistics", H5P_DEFAULT);
+                H5Ovisit(group, H5_INDEX_NAME, H5_ITER_NATIVE, grow_statistics_dataset, NULL);
+                H5Gclose(group);
+                //endcpp
+                """
+        self.style = {}
+        self.statistics = {}
+        self.fluid_output = """
+                {
+                    char file_name[512];
+                    fs->fill_up_filename("cvorticity", file_name);
+                    fs->cvorticity->io(
+                        file_name,
+                        "vorticity",
+                        fs->iteration,
+                        false);
+                }
+                """
+        # vorticity_equation specific things
+        self.includes += '#include "vorticity_equation.hpp"\n'
+        self.store_kspace = """
+                //begincpp
+                if (myrank == 0 && iteration == 0)
+                {
+                    DEBUG_MSG("about to store kspace\\n");
+                    TIMEZONE("fluid_base::store_kspace");
+                    hsize_t dims[4];
+                    hid_t space, dset;
+                    // store kspace information
+                    hid_t parameter_file = stat_file;
+                    //char fname[256];
+                    //sprintf(fname, "%s.h5", simname);
+                    //parameter_file = H5Fopen(fname, H5F_ACC_RDWR, H5P_DEFAULT);
+                    dset = H5Dopen(parameter_file, "/kspace/kshell", H5P_DEFAULT);
+                    space = H5Dget_space(dset);
+                    H5Sget_simple_extent_dims(space, dims, NULL);
+                    H5Sclose(space);
+                    if (fs->kk->nshells != dims[0])
+                    {
+                        DEBUG_MSG(
+                            "ERROR: computed nshells %d not equal to data file nshells %d\\n",
+                            fs->kk->nshells, dims[0]);
+                    }
+                    H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->kshell.front());
+                    H5Dclose(dset);
+                    dset = H5Dopen(parameter_file, "/kspace/nshell", H5P_DEFAULT);
+                    H5Dwrite(dset, H5T_NATIVE_INT64, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->nshell.front());
+                    H5Dclose(dset);
+                    dset = H5Dopen(parameter_file, "/kspace/kM", H5P_DEFAULT);
+                    H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->kM);
+                    H5Dclose(dset);
+                    dset = H5Dopen(parameter_file, "/kspace/dk", H5P_DEFAULT);
+                    H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->dk);
+                    H5Dclose(dset);
+                    //H5Fclose(parameter_file);
+                    DEBUG_MSG("finished storing kspace\\n");
+                }
+                //endcpp
+                """
+        return None
+    def create_stat_output(
+            self,
+            dset_name,
+            data_buffer,
+            data_type = 'H5T_NATIVE_DOUBLE',
+            size_setup = None,
+            close_spaces = True):
+        new_stat_output_txt = 'Cdset = H5Dopen(stat_file, "{0}", H5P_DEFAULT);\n'.format(dset_name)
+        if not type(size_setup) == type(None):
+            new_stat_output_txt += (
+                    size_setup +
+                    'wspace = H5Dget_space(Cdset);\n' +
+                    'ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);\n' +
+                    'mspace = H5Screate_simple(ndims, count, NULL);\n' +
+                    'H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);\n')
+        new_stat_output_txt += ('H5Dwrite(Cdset, {0}, mspace, wspace, H5P_DEFAULT, {1});\n' +
+                                'H5Dclose(Cdset);\n').format(data_type, data_buffer)
+        if close_spaces:
+            new_stat_output_txt += ('H5Sclose(mspace);\n' +
+                                    'H5Sclose(wspace);\n')
+        return new_stat_output_txt
+    def write_fluid_stats(self):
+        self.fluid_includes += '#include <cmath>\n'
+        self.fluid_includes += '#include "fftw_tools.hpp"\n'
+        self.stat_src += """
+                //begincpp
+                DEBUG_MSG("inside stat src\\n");
+                hid_t stat_group;
+                if (myrank == 0)
+                    stat_group = H5Gopen(stat_file, "statistics", H5P_DEFAULT);
+                fs->compute_velocity(fs->cvorticity);
+                *tmp_vec_field = fs->cvelocity->get_cdata();
+                tmp_vec_field->compute_stats(
+                    fs->kk,
+                    stat_group,
+                    "velocity",
+                    fs->iteration / niter_stat,
+                    max_velocity_estimate/sqrt(3));
+                DEBUG_MSG("after stats for %d\\n", fs->iteration);
+                //endcpp
+                """
+        self.stat_src += """
+                //begincpp
+                *tmp_vec_field = fs->cvorticity->get_cdata();
+                DEBUG_MSG("%g %g\\n",
+                    tmp_vec_field->cval(tmp_vec_field->get_cindex(2, 1, 1), 0, 0),
+                    tmp_vec_field->cval(tmp_vec_field->get_cindex(2, 1, 1), 0, 1));
+                tmp_vec_field->compute_stats(
+                    fs->kk,
+                    stat_group,
+                    "vorticity",
+                    fs->iteration / niter_stat,
+                    max_vorticity_estimate/sqrt(3));
+                DEBUG_MSG("after vort stats for %d\\n", fs->iteration);
+                //endcpp
+                """
+        self.stat_src += """
+                //begincpp
+                if (myrank == 0)
+                    H5Gclose(stat_group);
+                if (myrank == 0)
+                {{
+                    hid_t Cdset, wspace, mspace;
+                    int ndims;
+                    hsize_t count[4], offset[4], dims[4];
+                    offset[0] = fs->iteration/niter_stat;
+                    offset[1] = 0;
+                    offset[2] = 0;
+                    offset[3] = 0;
+                //endcpp
+                """.format(self.C_dtype)
+        if self.dtype == np.float32:
+            field_H5T = 'H5T_NATIVE_FLOAT'
+        elif self.dtype == np.float64:
+            field_H5T = 'H5T_NATIVE_DOUBLE'
+        self.stat_src += self.create_stat_output(
+                '/statistics/xlines/velocity',
+                'fs->rvelocity->get_rdata()',
+                data_type = field_H5T,
+                size_setup = """
+                    count[0] = 1;
+                    count[1] = nx;
+                    count[2] = 3;
+                    """,
+                close_spaces = False)
+        self.stat_src += self.create_stat_output(
+                '/statistics/xlines/vorticity',
+                'fs->rvorticity->get_rdata()',
+                data_type = field_H5T)
+        self.stat_src += '}\n'
+        return None
+    def fill_up_fluid_code(self):
+        self.fluid_includes += '#include <cstring>\n'
+        self.fluid_variables += (
+                'vorticity_equation<{0}, FFTW> *fs;\n'.format(self.C_dtype) +
+                'field<{0}, FFTW, THREE> *tmp_vec_field;\n'.format(self.C_dtype) +
+                'field<{0}, FFTW, ONE> *tmp_scal_field;\n'.format(self.C_dtype))
+        self.fluid_definitions += """
+                    typedef struct {{
+                        {0} re;
+                        {0} im;
+                    }} tmp_complex_type;
+                    """.format(self.C_dtype)
+        self.write_fluid_stats()
+        if self.dtype == np.float32:
+            field_H5T = 'H5T_NATIVE_FLOAT'
+        elif self.dtype == np.float64:
+            field_H5T = 'H5T_NATIVE_DOUBLE'
+        self.fluid_start += """
+                //begincpp
+                char fname[512];
+                fs = new vorticity_equation<{0}, FFTW>(
+                        simname,
+                        nx, ny, nz,
+                        dkx, dky, dkz,
+                        {1});
+                tmp_vec_field = new field<{0}, FFTW, THREE>(
+                        nx, ny, nz,
+                        MPI_COMM_WORLD,
+                        {1});
+                tmp_scal_field = new field<{0}, FFTW, ONE>(
+                        nx, ny, nz,
+                        MPI_COMM_WORLD,
+                        {1});
+                fs->nu = nu;
+                fs->fmode = fmode;
+                fs->famplitude = famplitude;
+                fs->fk0 = fk0;
+                fs->fk1 = fk1;
+                strncpy(fs->forcing_type, forcing_type, 128);
+                fs->iteration = iteration;
+                {{
+                    char file_name[512];
+                    fs->fill_up_filename("cvorticity", file_name);
+                    fs->cvorticity->real_space_representation = false;
+                    fs->cvorticity->io(
+                        file_name,
+                        "vorticity",
+                        fs->iteration,
+                        true);
+                    fs->kk->template low_pass<{0}, THREE>(fs->cvorticity->get_cdata(), fs->kk->kM);
+                    fs->kk->template force_divfree<{0}>(fs->cvorticity->get_cdata());
+                }}
+                //endcpp
+                """.format(self.C_dtype, self.fftw_plan_rigor, field_H5T)
+        self.fluid_start += self.store_kspace
+        self.fluid_loop = ('fs->step(dt);\n' +
+                           'DEBUG_MSG("this is step %d\\n", fs->iteration);\n')
+        self.fluid_loop += ('if (fs->iteration % niter_out == 0)\n{\n' +
+                            self.fluid_output + '\n}\n')
+        self.fluid_end = ('if (fs->iteration % niter_out != 0)\n{\n' +
+                          self.fluid_output + '\n}\n' +
+                          'delete fs;\n' +
+                          'delete tmp_vec_field;\n' +
+                          'delete tmp_scal_field;\n')
+        return None
+    def get_postprocess_file_name(self):
+        return os.path.join(self.work_dir, self.simname + '_postprocess.h5')
+    def get_postprocess_file(self):
+        return h5py.File(self.get_postprocess_file_name(), 'r')
+    def compute_statistics(self, iter0 = 0, iter1 = None):
+        """Run basic postprocessing on raw data.
+        The energy spectrum :math:`E(t, k)` and the enstrophy spectrum
+        :math:`\\frac{1}{2}\omega^2(t, k)` are computed from the
+
+        .. math::
+
+            \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{u_i} \\hat{u_j}^*, \\hskip .5cm
+            \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{\omega_i} \\hat{\\omega_j}^*
+
+        tensors, and the enstrophy spectrum is also used to
+        compute the dissipation :math:`\\varepsilon(t)`.
+        These basic quantities are stored in a newly created HDF5 file,
+        ``simname_postprocess.h5``.
+        """
+        if len(list(self.statistics.keys())) > 0:
+            return None
+        self.read_parameters()
+        with self.get_data_file() as data_file:
+            if 'moments' not in data_file['statistics'].keys():
+                return None
+            iter0 = min((data_file['statistics/moments/velocity'].shape[0] *
+                         self.parameters['niter_stat']-1),
+                        iter0)
+            if type(iter1) == type(None):
+                iter1 = data_file['iteration'].value
+            else:
+                iter1 = min(data_file['iteration'].value, iter1)
+            ii0 = iter0 // self.parameters['niter_stat']
+            ii1 = iter1 // self.parameters['niter_stat']
+            self.statistics['kshell'] = data_file['kspace/kshell'].value
+            self.statistics['kM'] = data_file['kspace/kM'].value
+            self.statistics['dk'] = data_file['kspace/dk'].value
+            computation_needed = True
+            pp_file = h5py.File(self.get_postprocess_file_name(), 'a')
+            if 'ii0' in pp_file.keys():
+                computation_needed =  not (ii0 == pp_file['ii0'].value and
+                                           ii1 == pp_file['ii1'].value)
+                if computation_needed:
+                    for k in pp_file.keys():
+                        del pp_file[k]
+            if computation_needed:
+                pp_file['iter0'] = iter0
+                pp_file['iter1'] = iter1
+                pp_file['ii0'] = ii0
+                pp_file['ii1'] = ii1
+                pp_file['t'] = (self.parameters['dt']*
+                                self.parameters['niter_stat']*
+                                (np.arange(ii0, ii1+1).astype(np.float)))
+                pp_file['energy(t, k)'] = (
+                    data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 0, 0] +
+                    data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 1, 1] +
+                    data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 2, 2])/2
+                pp_file['enstrophy(t, k)'] = (
+                    data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] +
+                    data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] +
+                    data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2
+                pp_file['vel_max(t)'] = data_file['statistics/moments/velocity']  [ii0:ii1+1, 9, 3]
+                pp_file['renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2
+            for k in ['t',
+                      'energy(t, k)',
+                      'enstrophy(t, k)',
+                      'vel_max(t)',
+                      'renergy(t)']:
+                if k in pp_file.keys():
+                    self.statistics[k] = pp_file[k].value
+            self.compute_time_averages()
+        return None
+    def compute_time_averages(self):
+        """Compute easy stats.
+
+        Further computation of statistics based on the contents of
+        ``simname_postprocess.h5``.
+        Standard quantities are as follows
+        (consistent with [Ishihara]_):
+
+        .. math::
+
+            U_{\\textrm{int}}(t) = \\sqrt{\\frac{2E(t)}{3}}, \\hskip .5cm
+            L_{\\textrm{int}}(t) = \\frac{\pi}{2U_{int}^2(t)} \\int \\frac{dk}{k} E(t, k), \\hskip .5cm
+            T_{\\textrm{int}}(t) =
+            \\frac{L_{\\textrm{int}}(t)}{U_{\\textrm{int}}(t)}
+
+            \\eta_K = \\left(\\frac{\\nu^3}{\\varepsilon}\\right)^{1/4}, \\hskip .5cm
+            \\tau_K = \\left(\\frac{\\nu}{\\varepsilon}\\right)^{1/2}, \\hskip .5cm
+            \\lambda = \\sqrt{\\frac{15 \\nu U_{\\textrm{int}}^2}{\\varepsilon}}
+
+            Re = \\frac{U_{\\textrm{int}} L_{\\textrm{int}}}{\\nu}, \\hskip
+            .5cm
+            R_{\\lambda} = \\frac{U_{\\textrm{int}} \\lambda}{\\nu}
+
+        .. [Ishihara] T. Ishihara et al,
+                      *Small-scale statistics in high-resolution direct numerical
+                      simulation of turbulence: Reynolds number dependence of
+                      one-point velocity gradient statistics*.
+                      J. Fluid Mech.,
+                      **592**, 335-366, 2007
+        """
+        for key in ['energy', 'enstrophy']:
+            self.statistics[key + '(t)'] = (self.statistics['dk'] *
+                                            np.sum(self.statistics[key + '(t, k)'], axis = 1))
+        self.statistics['Uint(t)'] = np.sqrt(2*self.statistics['energy(t)'] / 3)
+        self.statistics['Lint(t)'] = ((self.statistics['dk']*np.pi /
+                                       (2*self.statistics['Uint(t)']**2)) *
+                                      np.nansum(self.statistics['energy(t, k)'] /
+                                                self.statistics['kshell'][None, :], axis = 1))
+        for key in ['energy',
+                    'enstrophy',
+                    'vel_max',
+                    'Uint',
+                    'Lint']:
+            if key + '(t)' in self.statistics.keys():
+                self.statistics[key] = np.average(self.statistics[key + '(t)'], axis = 0)
+        for suffix in ['', '(t)']:
+            self.statistics['diss'    + suffix] = (self.parameters['nu'] *
+                                                   self.statistics['enstrophy' + suffix]*2)
+            self.statistics['etaK'    + suffix] = (self.parameters['nu']**3 /
+                                                   self.statistics['diss' + suffix])**.25
+            self.statistics['tauK'    + suffix] =  (self.parameters['nu'] /
+                                                    self.statistics['diss' + suffix])**.5
+            self.statistics['Re' + suffix] = (self.statistics['Uint' + suffix] *
+                                              self.statistics['Lint' + suffix] /
+                                              self.parameters['nu'])
+            self.statistics['lambda' + suffix] = (15 * self.parameters['nu'] *
+                                                  self.statistics['Uint' + suffix]**2 /
+                                                  self.statistics['diss' + suffix])**.5
+            self.statistics['Rlambda' + suffix] = (self.statistics['Uint' + suffix] *
+                                                   self.statistics['lambda' + suffix] /
+                                                   self.parameters['nu'])
+            self.statistics['kMeta' + suffix] = (self.statistics['kM'] *
+                                                 self.statistics['etaK' + suffix])
+            if self.parameters['dealias_type'] == 1:
+                self.statistics['kMeta' + suffix] *= 0.8
+        self.statistics['Tint'] = self.statistics['Lint'] / self.statistics['Uint']
+        self.statistics['Taylor_microscale'] = self.statistics['lambda']
+        return None
+    def set_plt_style(
+            self,
+            style = {'dashes' : (None, None)}):
+        self.style.update(style)
+        return None
+    def read_cfield(
+            self,
+            field_name = 'vorticity',
+            iteration = 0):
+        """read the Fourier representation of a vector field.
+
+        Read the binary file containing iteration ``iteration`` of the
+        field ``field_name``, and return it as a properly shaped
+        ``numpy.memmap`` object.
+        """
+        return np.memmap(
+                os.path.join(self.work_dir,
+                             self.simname + '_{0}_i{1:0>5x}'.format('c' + field_name, iteration)),
+                dtype = self.ctype,
+                mode = 'r',
+                shape = (self.parameters['ny'],
+                         self.parameters['nz'],
+                         self.parameters['nx']//2+1,
+                         3))
+    def write_par(
+            self,
+            iter0 = 0):
+        _fluid_particle_base.write_par(self, iter0 = iter0)
+        with h5py.File(self.get_data_file_name(), 'r+') as ofile:
+            kspace = self.get_kspace()
+            nshells = kspace['nshell'].shape[0]
+            vec_stat_datasets = ['velocity', 'vorticity']
+            scal_stat_datasets = []
+            for k in vec_stat_datasets:
+                time_chunk = 2**20//(8*3*self.parameters['nx']) # FIXME: use proper size of self.dtype
+                time_chunk = max(time_chunk, 1)
+                ofile.create_dataset('statistics/xlines/' + k,
+                                     (1, self.parameters['nx'], 3),
+                                     chunks = (time_chunk, self.parameters['nx'], 3),
+                                     maxshape = (None, self.parameters['nx'], 3),
+                                     dtype = self.dtype)
+            for k in vec_stat_datasets:
+                time_chunk = 2**20//(8*3*3*nshells)
+                time_chunk = max(time_chunk, 1)
+                ofile.create_dataset('statistics/spectra/' + k + '_' + k,
+                                     (1, nshells, 3, 3),
+                                     chunks = (time_chunk, nshells, 3, 3),
+                                     maxshape = (None, nshells, 3, 3),
+                                     dtype = np.float64)
+                time_chunk = 2**20//(8*4*10)
+                time_chunk = max(time_chunk, 1)
+                a = ofile.create_dataset('statistics/moments/' + k,
+                                     (1, 10, 4),
+                                     chunks = (time_chunk, 10, 4),
+                                     maxshape = (None, 10, 4),
+                                     dtype = np.float64)
+                time_chunk = 2**20//(8*4*self.parameters['histogram_bins'])
+                time_chunk = max(time_chunk, 1)
+                ofile.create_dataset('statistics/histograms/' + k,
+                                     (1,
+                                      self.parameters['histogram_bins'],
+                                      4),
+                                     chunks = (time_chunk,
+                                               self.parameters['histogram_bins'],
+                                               4),
+                                     maxshape = (None,
+                                                 self.parameters['histogram_bins'],
+                                                 4),
+                                     dtype = np.int64)
+        return None
+    def specific_parser_arguments(
+            self,
+            parser):
+        _fluid_particle_base.specific_parser_arguments(self, parser)
+        parser.add_argument(
+                '--src-wd',
+                type = str,
+                dest = 'src_work_dir',
+                default = '')
+        parser.add_argument(
+                '--src-simname',
+                type = str,
+                dest = 'src_simname',
+                default = '')
+        parser.add_argument(
+                '--src-iteration',
+                type = int,
+                dest = 'src_iteration',
+                default = 0)
+        parser.add_argument(
+               '--njobs',
+               type = int, dest = 'njobs',
+               default = 1)
+        parser.add_argument(
+               '--kMeta',
+               type = float,
+               dest = 'kMeta',
+               default = 2.0)
+        parser.add_argument(
+               '--dtfactor',
+               type = float,
+               dest = 'dtfactor',
+               default = 0.5,
+               help = 'dt is computed as DTFACTOR / N')
+        return None
+    def prepare_launch(
+            self,
+            args = []):
+        """Set up reasonable parameters.
+
+        With the default Lundgren forcing applied in the band [2, 4],
+        we can estimate the dissipation, therefore we can estimate
+        :math:`k_M \\eta_K` and constrain the viscosity.
+
+        In brief, the command line parameter :math:`k_M \\eta_K` is
+        used in the following formula for :math:`\\nu` (:math:`N` is the
+        number of real space grid points per coordinate):
+
+        .. math::
+
+            \\nu = \\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/3}
+
+        With this choice, the average dissipation :math:`\\varepsilon`
+        will be close to 0.4, and the integral scale velocity will be
+        close to 0.77, yielding the approximate value for the Taylor
+        microscale and corresponding Reynolds number:
+
+        .. math::
+
+            \\lambda \\approx 4.75\\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/6}, \\hskip .5in
+            R_\\lambda \\approx 3.7 \\left(\\frac{N}{2 k_M \\eta_K} \\right)^{4/6}
+
+        """
+        opt = _code.prepare_launch(self, args = args)
+        self.parameters['nu'] = (opt.kMeta * 2 / opt.n)**(4./3)
+        self.parameters['dt'] = (opt.dtfactor / opt.n)
+        # custom famplitude for 288 and 576
+        if opt.n == 288:
+            self.parameters['famplitude'] = 0.45
+        elif opt.n == 576:
+            self.parameters['famplitude'] = 0.47
+        if ((self.parameters['niter_todo'] % self.parameters['niter_out']) != 0):
+            self.parameters['niter_out'] = self.parameters['niter_todo']
+        if len(opt.src_work_dir) == 0:
+            opt.src_work_dir = os.path.realpath(opt.work_dir)
+        self.pars_from_namespace(opt)
+        return opt
+    def launch(
+            self,
+            args = [],
+            **kwargs):
+        opt = self.prepare_launch(args = args)
+        self.fill_up_fluid_code()
+        self.finalize_code()
+        self.launch_jobs(opt = opt)
+        return None
+    def launch_jobs(
+            self,
+            opt = None):
+        if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')):
+            self.write_par()
+            init_condition_file = os.path.join(
+                    self.work_dir,
+                    self.simname + '_cvorticity_i{0:0>5x}.h5'.format(0))
+            if not os.path.exists(init_condition_file):
+                if len(opt.src_simname) > 0:
+                    src_file = os.path.join(
+                            os.path.realpath(opt.src_work_dir),
+                            opt.src_simname + '_cvorticity_i{0:0>5x}.h5'.format(opt.src_iteration))
+                    os.symlink(src_file, init_condition_file)
+                else:
+                    data = self.generate_vector_field(
+                           write_to_file = False,
+                           spectra_slope = 2.0,
+                           amplitude = 0.05)
+                    f = h5py.File(init_condition_file, 'w')
+                    f['vorticity/complex/{0}'.format(0)] = data
+                    f.close()
+        self.run(
+                ncpu = opt.ncpu,
+                njobs = opt.njobs,
+                hours = opt.minutes // 60,
+                minutes = opt.minutes % 60)
+        return None
+
+if __name__ == '__main__':
+    pass
+
diff --git a/bfps/NavierStokes.py b/bfps/NavierStokes.py
index d3eaea031d53dce4ac8394c8d9a3026c7147c925..0c9ebcb9ab3454a9c35f0a746d88b13aefd63eb6 100644
--- a/bfps/NavierStokes.py
+++ b/bfps/NavierStokes.py
@@ -898,7 +898,6 @@ class NavierStokes(_fluid_particle_base):
                     chunks = (time_chunk, 1, 1) + dims[3:]
                 else:
                     chunks = (time_chunk, 1) + dims[2:]
-                chunks = (time_chunk, 1, 1) + dims[3:]
                 bfps.tools.create_alloc_early_dataset(
                         ofile,
                         '/tracers{0}/rhs'.format(s),
@@ -913,6 +912,8 @@ class NavierStokes(_fluid_particle_base):
                         (1,) + pbase_shape + (3,),
                         (h5py.h5s.UNLIMITED,) + pbase_shape + (3,),
                         chunks)
+                # "velocity" is sampled, single precision is enough
+                # for the results we are interested in.
                 bfps.tools.create_alloc_early_dataset(
                         ofile,
                         '/tracers{0}/velocity'.format(s),
diff --git a/bfps/__init__.py b/bfps/__init__.py
index 4a90f95268cffe3b0c2e1d68d7f4763a4c142e84..09663e1da56539eb51d257032444b38ba7096bc9 100644
--- a/bfps/__init__.py
+++ b/bfps/__init__.py
@@ -49,4 +49,5 @@ from host_information import host_info
 from .FluidConvert import FluidConvert
 from .FluidResize import FluidResize
 from .NavierStokes import NavierStokes
+from .NSVorticityEquation import NSVorticityEquation
 
diff --git a/bfps/__main__.py b/bfps/__main__.py
index a26d84d0e918cebe1a9351ca20b5249418d6a3c6..9db5e350340e67dfe99c5a40e3027b489399a42e 100644
--- a/bfps/__main__.py
+++ b/bfps/__main__.py
@@ -29,6 +29,7 @@ import argparse
 
 import bfps
 from .NavierStokes import NavierStokes
+from .NSVorticityEquation import NSVorticityEquation
 from .FluidResize import FluidResize
 from .FluidConvert import FluidConvert
 from .NSManyParticles import NSManyParticles
@@ -45,6 +46,12 @@ def main():
                  'NS',
                  'NS-single',
                  'NS-double']
+    NSVEoptions = ['NSVorticityEquation',
+                 'NSVorticityEquation-single',
+                 'NSVorticityEquation-double',
+                 'NSVE',
+                 'NSVE-single',
+                 'NSVE-double']
     FRoptions = ['FluidResize',
                  'FluidResize-single',
                  'FluidResize-double',
@@ -57,7 +64,7 @@ def main():
                'NSManyParticles-double']
     parser.add_argument(
             'base_class',
-            choices = NSoptions + FRoptions + FCoptions + NSMPopt,
+            choices = NSoptions + NSVEoptions + FRoptions + FCoptions + NSMPopt,
             type = str)
     # first option is the choice of base class or -h or -v
     # all other options are passed on to the base_class instance
@@ -70,6 +77,8 @@ def main():
         precision = 'single'
     if opt.base_class in NSoptions:
         base_class = NavierStokes
+    if opt.base_class in NSVEoptions:
+        base_class = NSVorticityEquation
     elif opt.base_class in FRoptions:
         base_class = FluidResize
     elif opt.base_class in FCoptions:
diff --git a/bfps/_base.py b/bfps/_base.py
index 71b1fcac23984da5b7c73d672ef44e6ab3127c4f..939c8766fc32c2c063e83a2e90758d4078de94a0 100644
--- a/bfps/_base.py
+++ b/bfps/_base.py
@@ -83,8 +83,7 @@ class _base(object):
                    'hid_t dset, memtype, space;\n' +
                    'char fname[256];\n' +
                    'hsize_t dims[1];\n' +
-                   'char *string_data;\n' +
-                   #'char string_data[512];\n' +
+                   'char string_data[512];\n' +
                    'sprintf(fname, "%s.h5", simname);\n' +
                    'parameter_file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT);\n')
         for i in range(len(key)):
@@ -95,14 +94,8 @@ class _base(object):
             elif type(parameters[key[i]]) == str:
                 src_txt += ('space = H5Dget_space(dset);\n' +
                             'memtype = H5Dget_type(dset);\n' +
-                            #'H5Sget_simple_extent_dims(space, dims, NULL);\n' +
-                            #'DEBUG_MSG("dims[0] = %ld\\n", dims[0]);\n' +
-                            'string_data = (char*)malloc(512*sizeof(char));\n' +
                             'H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data);\n' +
                             'sprintf({0}, "%s", string_data);\n'.format(key[i]) +
-                            #'DEBUG_MSG("{0} = %s\\n", {0});\n'.format(key[i]) +
-                            #'delete[] string_data;\n' +
-                            'free(string_data);\n'
                             'H5Sclose(space);\n' +
                             'H5Tclose(memtype);\n')
             elif type(parameters[key[i]]) == np.ndarray:
diff --git a/bfps/cpp/distributed_particles.cpp b/bfps/cpp/distributed_particles.cpp
index c6ae8a282dc0b695354506ccaec873930f30572d..73fd0275d8138d41bb4ee7fbc28e2d41e8017661 100644
--- a/bfps/cpp/distributed_particles.cpp
+++ b/bfps/cpp/distributed_particles.cpp
@@ -45,17 +45,17 @@ template <particle_types particle_type, class rnumber, int interp_neighbours>
 distributed_particles<particle_type, rnumber, interp_neighbours>::distributed_particles(
         const char *NAME,
         const hid_t data_file_id,
-        interpolator<rnumber, interp_neighbours> *FIELD,
+        interpolator<rnumber, interp_neighbours> *VEL,
         const int TRAJ_SKIP,
         const int INTEGRATION_STEPS) : particles_io_base<particle_type>(
             NAME,
             TRAJ_SKIP,
             data_file_id,
-            FIELD->descriptor->comm)
+            VEL->descriptor->comm)
 {
     assert((INTEGRATION_STEPS <= 6) &&
            (INTEGRATION_STEPS >= 1));
-    this->vel = FIELD;
+    this->vel = VEL;
     this->rhs.resize(INTEGRATION_STEPS);
     this->integration_steps = INTEGRATION_STEPS;
     this->state.reserve(2*this->nparticles / this->nprocs);
diff --git a/bfps/cpp/field.cpp b/bfps/cpp/field.cpp
index 62941fc4290dacdd49bca0ba4cf088459607bad3..5cc2e7a32a06a239714e22c169463b0eaed07b1c 100644
--- a/bfps/cpp/field.cpp
+++ b/bfps/cpp/field.cpp
@@ -23,6 +23,7 @@
 **********************************************************************/
 
 
+#include <sys/stat.h>
 #include <cmath>
 #include <cstdlib>
 #include <algorithm>
@@ -31,83 +32,7 @@
 #include "scope_timer.hpp"
 #include "shared_array.hpp"
 
-template <field_components fc>
-field_layout<fc>::field_layout(
-        const hsize_t *SIZES,
-        const hsize_t *SUBSIZES,
-        const hsize_t *STARTS,
-        const MPI_Comm COMM_TO_USE)
-{
-    TIMEZONE("field_layout::field_layout");
-    this->comm = COMM_TO_USE;
-    MPI_Comm_rank(this->comm, &this->myrank);
-    MPI_Comm_size(this->comm, &this->nprocs);
 
-    std::copy(SIZES, SIZES + 3, this->sizes);
-    std::copy(SUBSIZES, SUBSIZES + 3, this->subsizes);
-    std::copy(STARTS, STARTS + 3, this->starts);
-    if (fc == THREE || fc == THREExTHREE)
-    {
-        this->sizes[3] = 3;
-        this->subsizes[3] = 3;
-        this->starts[3] = 0;
-    }
-    if (fc == THREExTHREE)
-    {
-        this->sizes[4] = 3;
-        this->subsizes[4] = 3;
-        this->starts[4] = 0;
-    }
-    this->local_size = 1;
-    this->full_size = 1;
-    for (unsigned int i=0; i<ndim(fc); i++)
-    {
-        this->local_size *= this->subsizes[i];
-        this->full_size *= this->sizes[i];
-    }
-
-    /*field will at most be distributed in 2D*/
-    this->rank.resize(2);
-    this->all_start.resize(2);
-    this->all_size.resize(2);
-    for (int i=0; i<2; i++)
-    {
-        this->rank[i].resize(this->sizes[i]);
-        std::vector<int> local_rank;
-        local_rank.resize(this->sizes[i], 0);
-        for (unsigned int ii=this->starts[i]; ii<this->starts[i]+this->subsizes[i]; ii++)
-            local_rank[ii] = this->myrank;
-        MPI_Allreduce(
-                &local_rank.front(),
-                &this->rank[i].front(),
-                this->sizes[i],
-                MPI_INT,
-                MPI_SUM,
-                this->comm);
-        this->all_start[i].resize(this->nprocs);
-        std::vector<int> local_start;
-        local_start.resize(this->nprocs, 0);
-        local_start[this->myrank] = this->starts[i];
-        MPI_Allreduce(
-                &local_start.front(),
-                &this->all_start[i].front(),
-                this->nprocs,
-                MPI_INT,
-                MPI_SUM,
-                this->comm);
-        this->all_size[i].resize(this->nprocs);
-        std::vector<int> local_subsize;
-        local_subsize.resize(this->nprocs, 0);
-        local_subsize[this->myrank] = this->subsizes[i];
-        MPI_Allreduce(
-                &local_subsize.front(),
-                &this->all_size[i].front(),
-                this->nprocs,
-                MPI_INT,
-                MPI_SUM,
-                this->comm);
-    }
-}
 
 template <typename rnumber,
           field_backend be,
@@ -169,47 +94,27 @@ field<rnumber, be, fc>::field(
             starts[0] = local_0_start; starts[1] = 0; starts[2] = 0;
             this->rmemlayout = new field_layout<fc>(
                     sizes, subsizes, starts, this->comm);
-            sizes[0] = nz; sizes[1] = ny; sizes[2] = nx/2+1;
-            subsizes[0] = local_n1; subsizes[1] = ny; subsizes[2] = nx/2+1;
+            sizes[0] = ny; sizes[1] = nz; sizes[2] = nx/2+1;
+            subsizes[0] = local_n1; subsizes[1] = nz; subsizes[2] = nx/2+1;
             starts[0] = local_1_start; starts[1] = 0; starts[2] = 0;
             this->clayout = new field_layout<fc>(
                     sizes, subsizes, starts, this->comm);
-            this->data = (rnumber*)fftw_malloc(
-                    sizeof(rnumber)*this->rmemlayout->local_size);
-            if(typeid(rnumber) == typeid(float))
-            {
-                this->c2r_plan = new fftwf_plan;
-                this->r2c_plan = new fftwf_plan;
-                *((fftwf_plan*)this->c2r_plan) = fftwf_mpi_plan_many_dft_c2r(
-                        3, nfftw, ncomp(fc),
-                        FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                        (fftwf_complex*)this->data, (float*)this->data,
-                        this->comm,
-                        this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN);
-                *((fftwf_plan*)this->r2c_plan) = fftwf_mpi_plan_many_dft_r2c(
-                        3, nfftw, ncomp(fc),
-                        FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                        (float*)this->data, (fftwf_complex*)this->data,
-                        this->comm,
-                        this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT);
-            }
-            if (typeid(rnumber) == typeid(double))
-            {
-                this->c2r_plan = new fftw_plan;
-                this->r2c_plan = new fftw_plan;
-                *((fftw_plan*)this->c2r_plan) = fftw_mpi_plan_many_dft_c2r(
-                        3, nfftw, ncomp(fc),
-                        FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                        (fftw_complex*)this->data, (double*)this->data,
-                        this->comm,
-                        this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN);
-                *((fftw_plan*)this->r2c_plan) = fftw_mpi_plan_many_dft_r2c(
-                        3, nfftw, ncomp(fc),
-                        FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                        (double*)this->data, (fftw_complex*)this->data,
-                        this->comm,
-                        this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT);
-            }
+            this->data = fftw_interface<rnumber>::alloc_real(
+                    this->rmemlayout->local_size);
+            this->c2r_plan = fftw_interface<rnumber>::mpi_plan_many_dft_c2r(
+                    3, nfftw, ncomp(fc),
+                    FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
+                    (typename fftw_interface<rnumber>::complex*)this->data,
+                    this->data,
+                    this->comm,
+                    this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN);
+            this->r2c_plan = fftw_interface<rnumber>::mpi_plan_many_dft_r2c(
+                    3, nfftw, ncomp(fc),
+                    FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
+                    this->data,
+                    (typename fftw_interface<rnumber>::complex*)this->data,
+                    this->comm,
+                    this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT);
             break;
     }
 }
@@ -228,21 +133,9 @@ field<rnumber, be, fc>::~field()
             delete this->rlayout;
             delete this->rmemlayout;
             delete this->clayout;
-            fftw_free(this->data);
-            if (typeid(rnumber) == typeid(float))
-            {
-                fftwf_destroy_plan(*(fftwf_plan*)this->c2r_plan);
-                delete (fftwf_plan*)this->c2r_plan;
-                fftwf_destroy_plan(*(fftwf_plan*)this->r2c_plan);
-                delete (fftwf_plan*)this->r2c_plan;
-            }
-            else if (typeid(rnumber) == typeid(double))
-            {
-                fftw_destroy_plan(*(fftw_plan*)this->c2r_plan);
-                delete (fftw_plan*)this->c2r_plan;
-                fftw_destroy_plan(*(fftw_plan*)this->r2c_plan);
-                delete (fftw_plan*)this->r2c_plan;
-            }
+            fftw_interface<rnumber>::free(this->data);
+            fftw_interface<rnumber>::destroy_plan(this->c2r_plan);
+            fftw_interface<rnumber>::destroy_plan(this->r2c_plan);
             break;
     }
 }
@@ -253,10 +146,7 @@ template <typename rnumber,
 void field<rnumber, be, fc>::ift()
 {
     TIMEZONE("field::ift");
-    if (typeid(rnumber) == typeid(float))
-        fftwf_execute(*((fftwf_plan*)this->c2r_plan));
-    else if (typeid(rnumber) == typeid(double))
-        fftw_execute(*((fftw_plan*)this->c2r_plan));
+    fftw_interface<rnumber>::execute(this->c2r_plan);
     this->real_space_representation = true;
 }
 
@@ -265,10 +155,8 @@ template <typename rnumber,
           field_components fc>
 void field<rnumber, be, fc>::dft()
 {
-    if (typeid(rnumber) == typeid(float))
-        fftwf_execute(*((fftwf_plan*)this->r2c_plan));
-    else if (typeid(rnumber) == typeid(double))
-        fftw_execute(*((fftw_plan*)this->r2c_plan));
+    TIMEZONE("field::dft");
+    fftw_interface<rnumber>::execute(this->r2c_plan);
     this->real_space_representation = false;
 }
 
@@ -277,60 +165,335 @@ template <typename rnumber,
           field_components fc>
 int field<rnumber, be, fc>::io(
         const std::string fname,
-        const std::string dset_name,
-        const int toffset,
+        const std::string field_name,
+        const int iteration,
         const bool read)
 {
+    /* file dataset has same dimensions as field */
     TIMEZONE("field::io");
     hid_t file_id, dset_id, plist_id;
-    hid_t dset_type;
-    bool io_for_real = false;
+    std::string representation = std::string(
+            this->real_space_representation ?
+                "real" : "complex");
+    std::string dset_name = (
+            "/" + field_name +
+            "/" + representation +
+            "/" + std::to_string(iteration));
+
+    /* open/create file */
+    plist_id = H5Pcreate(H5P_FILE_ACCESS);
+    H5Pset_fapl_mpio(plist_id, this->comm, MPI_INFO_NULL);
+    bool file_exists = false;
+    {
+        struct stat file_buffer;
+        file_exists = (stat(fname.c_str(), &file_buffer) == 0);
+    }
+    if (read)
+    {
+        assert(file_exists);
+        file_id = H5Fopen(fname.c_str(), H5F_ACC_RDONLY, plist_id);
+    }
+    else
+    {
+        if (file_exists)
+            file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id);
+        else
+            file_id = H5Fcreate(fname.c_str(), H5F_ACC_EXCL, H5P_DEFAULT, plist_id);
+    }
+    H5Pclose(plist_id);
+
+    /* check what kind of representation is being used */
+    if (read)
+    {
+        dset_id = H5Dopen(
+                file_id,
+                dset_name.c_str(),
+                H5P_DEFAULT);
+        hid_t dset_type = H5Dget_type(dset_id);
+        bool io_for_real = (
+                H5Tequal(dset_type, H5T_IEEE_F32BE) ||
+                H5Tequal(dset_type, H5T_IEEE_F32LE) ||
+                H5Tequal(dset_type, H5T_INTEL_F32) ||
+                H5Tequal(dset_type, H5T_NATIVE_FLOAT) ||
+                H5Tequal(dset_type, H5T_IEEE_F64BE) ||
+                H5Tequal(dset_type, H5T_IEEE_F64LE) ||
+                H5Tequal(dset_type, H5T_INTEL_F64) ||
+                H5Tequal(dset_type, H5T_NATIVE_DOUBLE));
+        H5Tclose(dset_type);
+        assert(this->real_space_representation == io_for_real);
+    }
+
+    /* generic space initialization */
+    hid_t fspace, mspace;
+    hsize_t count[ndim(fc)], offset[ndim(fc)], dims[ndim(fc)];
+    hsize_t memoffset[ndim(fc)], memshape[ndim(fc)];
+
+    if (this->real_space_representation)
+    {
+        for (unsigned int i=0; i<ndim(fc); i++)
+        {
+            count[i] = this->rlayout->subsizes[i];
+            offset[i] = this->rlayout->starts[i];
+            dims[i] = this->rlayout->sizes[i];
+            memshape[i] = this->rmemlayout->subsizes[i];
+            memoffset[i] = 0;
+        }
+    }
+    else
+    {
+        for (unsigned int i=0; i<ndim(fc); i++)
+        {
+            count [i] = this->clayout->subsizes[i];
+            offset[i] = this->clayout->starts[i];
+            dims  [i] = this->clayout->sizes[i];
+            memshape [i] = count[i];
+            memoffset[i] = 0;
+        }
+    }
+    mspace = H5Screate_simple(ndim(fc), memshape, NULL);
+    H5Sselect_hyperslab(mspace, H5S_SELECT_SET, memoffset, NULL, count, NULL);
+
+    /* open/create data set */
+    if (read)
+        fspace = H5Dget_space(dset_id);
+    else
+    {
+        if (!H5Lexists(file_id, field_name.c_str(), H5P_DEFAULT))
+        {
+            hid_t gid_tmp = H5Gcreate(
+                    file_id, field_name.c_str(),
+                    H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+            H5Gclose(gid_tmp);
+        }
+
+        if (!H5Lexists(file_id, (field_name + "/" + representation).c_str(), H5P_DEFAULT))
+        {
+            hid_t gid_tmp = H5Gcreate(
+                    file_id, ("/" + field_name + "/" + representation).c_str(),
+                    H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+            H5Gclose(gid_tmp);
+        }
+        if (H5Lexists(file_id, dset_name.c_str(), H5P_DEFAULT))
+        {
+            dset_id = H5Dopen(file_id, dset_name.c_str(), H5P_DEFAULT);
+            fspace = H5Dget_space(dset_id);
+        }
+        else
+        {
+            fspace = H5Screate_simple(
+                    ndim(fc),
+                    dims,
+                    NULL);
+            /* chunking needs to go in here */
+            dset_id = H5Dcreate(
+                    file_id,
+                    dset_name.c_str(),
+                    (this->real_space_representation ? this->rnumber_H5T : this->cnumber_H5T),
+                    fspace,
+                    H5P_DEFAULT,
+                    H5P_DEFAULT,
+                    H5P_DEFAULT);
+        }
+    }
+    /* both dset_id and fspace should now have sane values */
+
+    /* check file space */
+    int ndims_fspace = H5Sget_simple_extent_dims(fspace, dims, NULL);
+    assert(ndims_fspace == ndim(fc));
+    if (this->real_space_representation)
+    {
+        for (unsigned int i=0; i<ndim(fc); i++)
+        {
+            offset[i] = this->rlayout->starts[i];
+            assert(dims[i] == this->rlayout->sizes[i]);
+        }
+        H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, count, NULL);
+        if (read)
+        {
+            std::fill_n(this->data, this->rmemlayout->local_size, 0);
+            H5Dread(dset_id, this->rnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data);
+        }
+        else
+        {
+            assert(this->real_space_representation);
+            H5Dwrite(dset_id, this->rnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data);
+        }
+        H5Sclose(mspace);
+    }
+    else
+    {
+        for (unsigned int i=0; i<ndim(fc); i++)
+        {
+            offset[i] = this->clayout->starts[i];
+            assert(dims[i] == this->clayout->sizes[i]);
+        }
+        H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, count, NULL);
+        if (read)
+        {
+            std::fill_n(this->data, this->clayout->local_size*2, 0);
+            H5Dread(dset_id, this->cnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data);
+            this->symmetrize();
+        }
+        else
+        {
+            assert(!this->real_space_representation);
+            H5Dwrite(dset_id, this->cnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data);
+        }
+        H5Sclose(mspace);
+    }
 
-    /* open file */
+    H5Sclose(fspace);
+    /* close data set */
+    H5Dclose(dset_id);
+    /* close file */
+    H5Fclose(file_id);
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber,
+          field_backend be,
+          field_components fc>
+int field<rnumber, be, fc>::io_database(
+        const std::string fname,
+        const std::string field_name,
+        const int toffset,
+        const bool read)
+{
+    /* file dataset is has a time dimension as well */
+    TIMEZONE("field::io_database");
+    hid_t file_id, dset_id, plist_id;
+    std::string representation = std::string(
+            this->real_space_representation ?
+                "real" : "complex");
+    std::string dset_name = (
+            "/" + field_name +
+            "/" + representation);
+
+    /* open/create file */
     plist_id = H5Pcreate(H5P_FILE_ACCESS);
     H5Pset_fapl_mpio(plist_id, this->comm, MPI_INFO_NULL);
+    bool file_exists = false;
+    {
+        struct stat file_buffer;
+        file_exists = (stat(fname.c_str(), &file_buffer) == 0);
+    }
     if (read)
+    {
+        assert(file_exists);
         file_id = H5Fopen(fname.c_str(), H5F_ACC_RDONLY, plist_id);
+    }
     else
-        file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id);
+    {
+        if (file_exists)
+            file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id);
+        else
+            file_id = H5Fcreate(fname.c_str(), H5F_ACC_EXCL, H5P_DEFAULT, plist_id);
+    }
     H5Pclose(plist_id);
 
-    /* open data set */
-    dset_id = H5Dopen(file_id, dset_name.c_str(), H5P_DEFAULT);
-    dset_type = H5Dget_type(dset_id);
-    io_for_real = (
-            H5Tequal(dset_type, H5T_IEEE_F32BE) ||
-            H5Tequal(dset_type, H5T_IEEE_F32LE) ||
-            H5Tequal(dset_type, H5T_INTEL_F32) ||
-            H5Tequal(dset_type, H5T_NATIVE_FLOAT) ||
-            H5Tequal(dset_type, H5T_IEEE_F64BE) ||
-            H5Tequal(dset_type, H5T_IEEE_F64LE) ||
-            H5Tequal(dset_type, H5T_INTEL_F64) ||
-            H5Tequal(dset_type, H5T_NATIVE_DOUBLE));
+    /* check what kind of representation is being used */
+    if (read)
+    {
+        dset_id = H5Dopen(
+                file_id,
+                dset_name.c_str(),
+                H5P_DEFAULT);
+        hid_t dset_type = H5Dget_type(dset_id);
+        bool io_for_real = (
+                H5Tequal(dset_type, H5T_IEEE_F32BE) ||
+                H5Tequal(dset_type, H5T_IEEE_F32LE) ||
+                H5Tequal(dset_type, H5T_INTEL_F32) ||
+                H5Tequal(dset_type, H5T_NATIVE_FLOAT) ||
+                H5Tequal(dset_type, H5T_IEEE_F64BE) ||
+                H5Tequal(dset_type, H5T_IEEE_F64LE) ||
+                H5Tequal(dset_type, H5T_INTEL_F64) ||
+                H5Tequal(dset_type, H5T_NATIVE_DOUBLE));
+        H5Tclose(dset_type);
+        assert(this->real_space_representation == io_for_real);
+    }
 
     /* generic space initialization */
     hid_t fspace, mspace;
-    fspace = H5Dget_space(dset_id);
     hsize_t count[ndim(fc)+1], offset[ndim(fc)+1], dims[ndim(fc)+1];
     hsize_t memoffset[ndim(fc)+1], memshape[ndim(fc)+1];
-    H5Sget_simple_extent_dims(fspace, dims, NULL);
+
+    int dim_counter_offset = 1;
+    dim_counter_offset = 1;
     count[0] = 1;
-    offset[0] = toffset;
     memshape[0] = 1;
     memoffset[0] = 0;
-    if (io_for_real)
+    if (this->real_space_representation)
     {
         for (unsigned int i=0; i<ndim(fc); i++)
         {
-            count[i+1] = this->rlayout->subsizes[i];
-            offset[i+1] = this->rlayout->starts[i];
-            assert(dims[i+1] == this->rlayout->sizes[i]);
-            memshape[i+1] = this->rmemlayout->subsizes[i];
-            memoffset[i+1] = 0;
+            count[i+dim_counter_offset] = this->rlayout->subsizes[i];
+            offset[i+dim_counter_offset] = this->rlayout->starts[i];
+            dims[i+dim_counter_offset] = this->rlayout->sizes[i];
+            memshape[i+dim_counter_offset] = this->rmemlayout->subsizes[i];
+            memoffset[i+dim_counter_offset] = 0;
         }
-        mspace = H5Screate_simple(ndim(fc)+1, memshape, NULL);
-        H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, count, NULL);
+        mspace = H5Screate_simple(dim_counter_offset + ndim(fc), memshape, NULL);
+        H5Sselect_hyperslab(mspace, H5S_SELECT_SET, memoffset, NULL, count, NULL);
+    }
+    else
+    {
+        for (unsigned int i=0; i<ndim(fc); i++)
+        {
+            count[i+dim_counter_offset] = this->clayout->subsizes[i];
+            offset[i+dim_counter_offset] = this->clayout->starts[i];
+            dims[i+dim_counter_offset] = this->clayout->sizes[i];
+            memshape[i+dim_counter_offset] = count[i+dim_counter_offset];
+            memoffset[i+dim_counter_offset] = 0;
+        }
+        mspace = H5Screate_simple(dim_counter_offset + ndim(fc), memshape, NULL);
         H5Sselect_hyperslab(mspace, H5S_SELECT_SET, memoffset, NULL, count, NULL);
+    }
+
+    /* open/create data set */
+    if (read)
+        fspace = H5Dget_space(dset_id);
+    else
+    {
+        if (!H5Lexists(file_id, field_name.c_str(), H5P_DEFAULT))
+            H5Gcreate(
+                    file_id, field_name.c_str(),
+                    H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+        if (H5Lexists(file_id, dset_name.c_str(), H5P_DEFAULT))
+        {
+            dset_id = H5Dopen(file_id, dset_name.c_str(), H5P_DEFAULT);
+            fspace = H5Dget_space(dset_id);
+        }
+        else
+        {
+            fspace = H5Screate_simple(
+                    ndim(fc),
+                    dims,
+                    NULL);
+            /* chunking needs to go in here */
+            dset_id = H5Dcreate(
+                    file_id,
+                    dset_name.c_str(),
+                    (this->real_space_representation ? this->rnumber_H5T : this->cnumber_H5T),
+                    fspace,
+                    H5P_DEFAULT,
+                    H5P_DEFAULT,
+                    H5P_DEFAULT);
+        }
+    }
+    /* both dset_id and fspace should now have sane values */
+
+    /* check file space */
+    int ndims_fspace = H5Sget_simple_extent_dims(fspace, dims, NULL);
+    assert(ndims_fspace == ndim(fc) + 1);
+    offset[0] = toffset;
+    if (this->real_space_representation)
+    {
+        for (unsigned int i=0; i<ndim(fc); i++)
+        {
+            offset[i+dim_counter_offset] = this->rlayout->starts[i];
+            assert(dims[i+dim_counter_offset] == this->rlayout->sizes[i]);
+        }
+        H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, count, NULL);
         if (read)
         {
             std::fill_n(this->data, this->rmemlayout->local_size, 0);
@@ -339,13 +502,8 @@ int field<rnumber, be, fc>::io(
         }
         else
         {
+            assert(this->real_space_representation);
             H5Dwrite(dset_id, this->rnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data);
-            if (!this->real_space_representation)
-                /* in principle we could do an inverse Fourier transform in here,
-                 * however that would be unsafe since we wouldn't know whether we'd need to
-                 * normalize or not.
-                 * */
-                DEBUG_MSG("I just wrote complex field into real space dataset. It's probably nonsense.\n");
         }
         H5Sclose(mspace);
     }
@@ -353,30 +511,24 @@ int field<rnumber, be, fc>::io(
     {
         for (unsigned int i=0; i<ndim(fc); i++)
         {
-            count[i+1] = this->clayout->subsizes[i];
-            offset[i+1] = this->clayout->starts[i];
-            assert(dims[i+1] == this->clayout->sizes[i]);
-            memshape[i+1] = count[i+1];
-            memoffset[i+1] = 0;
+            offset[i+dim_counter_offset] = this->clayout->starts[i];
+            assert(dims[i+dim_counter_offset] == this->clayout->sizes[i]);
         }
-        mspace = H5Screate_simple(ndim(fc)+1, memshape, NULL);
         H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, count, NULL);
-        H5Sselect_hyperslab(mspace, H5S_SELECT_SET, memoffset, NULL, count, NULL);
         if (read)
         {
             H5Dread(dset_id, this->cnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data);
             this->real_space_representation = false;
+            this->symmetrize();
         }
         else
         {
+            assert(!this->real_space_representation);
             H5Dwrite(dset_id, this->cnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data);
-            if (this->real_space_representation)
-                DEBUG_MSG("I just wrote real space field into complex dataset. It's probably nonsense.\n");
         }
         H5Sclose(mspace);
     }
 
-    H5Tclose(dset_type);
     H5Sclose(fspace);
     /* close data set */
     H5Dclose(dset_id);
@@ -405,8 +557,11 @@ void field<rnumber, be, fc>::compute_rspace_xincrement_stats(
             this->rlayout->sizes[0],
             this->rlayout->comm);
     tmp_field->real_space_representation = true;
-    FIELD_RLOOP<be>(
-            this,[&](hsize_t zindex, hsize_t yindex, ptrdiff_t rindex, hsize_t xindex){
+    this->RLOOP(
+                [&](ptrdiff_t rindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex){
             hsize_t rrindex = (xindex + xcells)%this->rlayout->sizes[2] + (
                 zindex * this->rlayout->subsizes[1] + yindex)*(
                     this->rmemlayout->subsizes[2]);
@@ -414,7 +569,7 @@ void field<rnumber, be, fc>::compute_rspace_xincrement_stats(
                 tmp_field->data[rindex*ncomp(fc) + component] =
                     this->data[rrindex*ncomp(fc) + component] -
                     this->data[rindex*ncomp(fc) + component];
-    });
+                    });
     tmp_field->compute_rspace_stats(
             group,
             dset_name,
@@ -499,9 +654,9 @@ void field<rnumber, be, fc>::compute_rspace_stats(
     });
 
     {
-        TIMEZONE("FIELD_RLOOP");
-        FIELD_RLOOP<be>(
-            this,[&](hsize_t /*zindex*/, hsize_t /*yindex*/, ptrdiff_t rindex, hsize_t /*xindex*/){
+        TIMEZONE("field::RLOOP");
+        this->RLOOP(
+            [&](hsize_t /*zindex*/, hsize_t /*yindex*/, ptrdiff_t rindex, hsize_t /*xindex*/){
             double *local_moments = local_moments_threaded.getMine();
             double *val_tmp = val_tmp_threaded.getMine();
             ptrdiff_t *local_hist = local_hist_threaded.getMine();
@@ -628,6 +783,86 @@ void field<rnumber, be, fc>::normalize()
             this->data[tmp_index] /= this->npoints;
 }
 
+template <typename rnumber,
+          field_backend be,
+          field_components fc>
+void field<rnumber, be, fc>::symmetrize()
+{
+    TIMEZONE("field::symmetrize");
+    assert(!this->real_space_representation);
+    ptrdiff_t ii, cc;
+    typename fftw_interface<rnumber>::complex *data = this->get_cdata();
+    MPI_Status *mpistatus = new MPI_Status;
+    if (this->myrank == this->clayout->rank[0][0])
+    {
+        for (cc = 0; cc < ncomp(fc); cc++)
+            data[cc][1] = 0.0;
+        for (ii = 1; ii < this->clayout->sizes[1]/2; ii++)
+            for (cc = 0; cc < ncomp(fc); cc++) {
+                ( *(data + cc + ncomp(fc)*(this->clayout->sizes[1] - ii)*this->clayout->sizes[2]))[0] =
+                 (*(data + cc + ncomp(fc)*(                          ii)*this->clayout->sizes[2]))[0];
+                ( *(data + cc + ncomp(fc)*(this->clayout->sizes[1] - ii)*this->clayout->sizes[2]))[1] =
+                -(*(data + cc + ncomp(fc)*(                          ii)*this->clayout->sizes[2]))[1];
+            }
+    }
+    typename fftw_interface<rnumber>::complex *buffer;
+    buffer = fftw_interface<rnumber>::alloc_complex(ncomp(fc)*this->clayout->sizes[1]);
+    ptrdiff_t yy;
+    /*ptrdiff_t tindex;*/
+    int ranksrc, rankdst;
+    for (yy = 1; yy < this->clayout->sizes[0]/2; yy++) {
+        ranksrc = this->clayout->rank[0][yy];
+        rankdst = this->clayout->rank[0][this->clayout->sizes[0] - yy];
+        if (this->clayout->myrank == ranksrc)
+            for (ii = 0; ii < this->clayout->sizes[1]; ii++)
+                for (cc = 0; cc < ncomp(fc); cc++)
+                    for (int imag_comp=0; imag_comp<2; imag_comp++)
+                        (*(buffer + ncomp(fc)*ii+cc))[imag_comp] =
+                            (*(data + ncomp(fc)*((yy - this->clayout->starts[0])*this->clayout->sizes[1] + ii)*this->clayout->sizes[2] + cc))[imag_comp];
+        if (ranksrc != rankdst)
+        {
+            if (this->clayout->myrank == ranksrc)
+                MPI_Send((void*)buffer,
+                         ncomp(fc)*this->clayout->sizes[1], mpi_real_type<rnumber>::complex(), rankdst, yy,
+                        this->clayout->comm);
+            if (this->clayout->myrank == rankdst)
+                MPI_Recv((void*)buffer,
+                         ncomp(fc)*this->clayout->sizes[1], mpi_real_type<rnumber>::complex(), ranksrc, yy,
+                        this->clayout->comm, mpistatus);
+        }
+        if (this->clayout->myrank == rankdst)
+        {
+            for (ii = 1; ii < this->clayout->sizes[1]; ii++)
+                for (cc = 0; cc < ncomp(fc); cc++)
+                {
+                    (*(data + ncomp(fc)*((this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1] + ii)*this->clayout->sizes[2] + cc))[0] =
+                            (*(buffer + ncomp(fc)*(this->clayout->sizes[1]-ii)+cc))[0];
+                    (*(data + ncomp(fc)*((this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1] + ii)*this->clayout->sizes[2] + cc))[1] =
+                            -(*(buffer + ncomp(fc)*(this->clayout->sizes[1]-ii)+cc))[1];
+                }
+            for (cc = 0; cc < ncomp(fc); cc++)
+            {
+                (*((data + cc + ncomp(fc)*(this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1]*this->clayout->sizes[2])))[0] =  (*(buffer + cc))[0];
+                (*((data + cc + ncomp(fc)*(this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1]*this->clayout->sizes[2])))[1] = -(*(buffer + cc))[1];
+            }
+        }
+    }
+    fftw_interface<rnumber>::free(buffer);
+    delete mpistatus;
+    /* put asymmetric data to 0 */
+    /*if (this->clayout->myrank == this->clayout->rank[0][this->clayout->sizes[0]/2])
+    {
+        tindex = ncomp(fc)*(this->clayout->sizes[0]/2 - this->clayout->starts[0])*this->clayout->sizes[1]*this->clayout->sizes[2];
+        for (ii = 0; ii < this->clayout->sizes[1]; ii++)
+        {
+            std::fill_n((rnumber*)(data + tindex), ncomp(fc)*2*this->clayout->sizes[2], 0.0);
+            tindex += ncomp(fc)*this->clayout->sizes[2];
+        }
+    }
+    tindex = ncomp(fc)*();
+    std::fill_n((rnumber*)(data + tindex), ncomp(fc)*2, 0.0);*/
+}
+
 template <typename rnumber,
           field_backend be,
           field_components fc>
@@ -673,8 +908,8 @@ void field<rnumber, be, fc>::compute_stats(
     // what follows gave me a headache until I found this link:
     // http://stackoverflow.com/questions/8256636/expected-primary-expression-error-on-template-method-using
     kk->template cospectrum<rnumber, fc>(
-            (cnumber*)this->data,
-            (cnumber*)this->data,
+            (typename fftw_interface<rnumber>::complex*)this->data,
+            (typename fftw_interface<rnumber>::complex*)this->data,
             group,
             dset_name + "_" + dset_name,
             toffset);
@@ -690,253 +925,6 @@ void field<rnumber, be, fc>::compute_stats(
     }
 }
 
-template <field_backend be,
-          kspace_dealias_type dt>
-template <field_components fc>
-kspace<be, dt>::kspace(
-        const field_layout<fc> *source_layout,
-        const double DKX,
-        const double DKY,
-        const double DKZ)
-{
-    TIMEZONE("field::kspace");
-    /* get layout */
-    this->layout = new field_layout<ONE>(
-            source_layout->sizes,
-            source_layout->subsizes,
-            source_layout->starts,
-            source_layout->comm);
-
-    /* store dk values */
-    this->dkx = DKX;
-    this->dky = DKY;
-    this->dkz = DKZ;
-
-    /* compute kx, ky, kz and compute kM values */
-    switch(be)
-    {
-        case FFTW:
-            this->kx.resize(this->layout->sizes[2]);
-            this->ky.resize(this->layout->subsizes[0]);
-            this->kz.resize(this->layout->sizes[1]);
-            int i, ii;
-            for (i = 0; i<int(this->layout->sizes[2]); i++)
-                this->kx[i] = i*this->dkx;
-            for (i = 0; i<int(this->layout->subsizes[0]); i++)
-            {
-                ii = i + this->layout->starts[0];
-                if (ii <= int(this->layout->sizes[1]/2))
-                    this->ky[i] = this->dky*ii;
-                else
-                    this->ky[i] = this->dky*(ii - int(this->layout->sizes[1]));
-            }
-            for (i = 0; i<int(this->layout->sizes[1]); i++)
-            {
-                if (i <= int(this->layout->sizes[0]/2))
-                    this->kz[i] = this->dkz*i;
-                else
-                    this->kz[i] = this->dkz*(i - int(this->layout->sizes[0]));
-            }
-            switch(dt)
-            {
-                case TWO_THIRDS:
-                    this->kMx = this->dkx*(int(2*(int(this->layout->sizes[2])-1)/3)-1);
-                    this->kMy = this->dky*(int(this->layout->sizes[0] / 3)-1);
-                    this->kMz = this->dkz*(int(this->layout->sizes[1] / 3)-1);
-                    break;
-                case SMOOTH:
-                    this->kMx = this->dkx*(int(this->layout->sizes[2])-2);
-                    this->kMy = this->dky*(int(this->layout->sizes[0] / 2)-1);
-                    this->kMz = this->dkz*(int(this->layout->sizes[1] / 2)-1);
-                    break;
-            }
-            break;
-    }
-
-    /* get global kM and dk */
-    this->kM = this->kMx;
-    if (this->kM < this->kMy) this->kM = this->kMy;
-    if (this->kM < this->kMz) this->kM = this->kMz;
-    this->kM2 = this->kM * this->kM;
-    this->dk = this->dkx;
-    if (this->dk > this->dky) this->dk = this->dky;
-    if (this->dk > this->dkz) this->dk = this->dkz;
-    this->dk2 = this->dk*this->dk;
-
-    /* spectra stuff */
-    this->nshells = int(this->kM / this->dk) + 2;
-    this->kshell.resize(this->nshells, 0);
-    this->nshell.resize(this->nshells, 0);
-
-    shared_array<double> kshell_local_threaded(this->nshells, [&](double* kshell_local){
-        std::fill_n(kshell_local, this->nshells, 0);
-    });
-
-    shared_array<int64_t> nshell_local_threaded(this->nshells, [&](int64_t* nshell_local){
-        std::fill_n(nshell_local, this->nshells, 0);
-    });
-
-    std::vector<std::unordered_map<int, double>> dealias_filter_threaded(omp_get_max_threads());
-
-    KSPACE_CLOOP_K2_NXMODES(
-            this,[&](ptrdiff_t /*cindex*/, hsize_t /*yindex*/, hsize_t /*zindex*/, int nxmodes, hsize_t /*xindex*/, double k2){
-                if (k2 < this->kM2)
-                {
-                    double knorm = sqrt(k2);
-                    nshell_local_threaded.getMine()[int(knorm/this->dk)] += nxmodes;
-                    kshell_local_threaded.getMine()[int(knorm/this->dk)] += nxmodes*knorm;
-                }
-                if (dt == TWO_THIRDS){
-                    // Should not be any race condition here it is a "write"
-                    dealias_filter_threaded[omp_get_thread_num()][int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.));
-                }
-            });
-
-    for(int idxMerge = 0 ; idxMerge < int(dealias_filter_threaded.size()) ; ++idxMerge){
-        for(const auto kv : dealias_filter_threaded[idxMerge]){
-            this->dealias_filter[kv.first] = kv.second;
-        }
-    }
-
-    nshell_local_threaded.mergeParallel();
-    kshell_local_threaded.mergeParallel();
-
-    MPI_Allreduce(
-            nshell_local_threaded.getMasterData(),
-            &this->nshell.front(),
-            this->nshells,
-            MPI_INT64_T, MPI_SUM, this->layout->comm);
-    MPI_Allreduce(
-            kshell_local_threaded.getMasterData(),
-            &this->kshell.front(),
-            this->nshells,
-            MPI_DOUBLE, MPI_SUM, this->layout->comm);
-    for (int n=0; n<this->nshells; n++)
-        this->kshell[n] /= this->nshell[n];
-}
-
-template <field_backend be,
-          kspace_dealias_type dt>
-kspace<be, dt>::~kspace()
-{
-    delete this->layout;
-}
-
-template <field_backend be,
-          kspace_dealias_type dt>
-template <typename rnumber,
-          field_components fc>
-void kspace<be, dt>::low_pass(rnumber *__restrict__ a, const double kmax)
-{
-    const double km2 = kmax*kmax;
-    KSPACE_CLOOP_K2(
-            this,[&](ptrdiff_t cindex, hsize_t /*yindex*/, hsize_t /*zindex*/, hsize_t /*xindex*/, double k2){
-            // There is not race condition because it is write
-            if (k2 >= km2){
-                std::fill_n(a + 2*ncomp(fc)*cindex, 2*ncomp(fc), 0);
-            }
-    });
-}
-
-template <field_backend be,
-          kspace_dealias_type dt>
-template <typename rnumber,
-          field_components fc>
-void kspace<be, dt>::dealias(rnumber *__restrict__ a)
-{
-    switch(be)
-    {
-        case TWO_THIRDS:
-            this->low_pass<rnumber, fc>(a, this->kM);
-            break;
-        case SMOOTH:
-            KSPACE_CLOOP_K2(
-                    this,[&](ptrdiff_t cindex, hsize_t /*yindex*/, hsize_t /*zindex*/, hsize_t /*xindex*/, double k2){
-                    double tval = this->dealias_filter[int(round(k2 / this->dk2))];
-                    // There is no overlap between threads here because cindex is thread-safe index
-                    // and push for a jump of 2*ncomp(fc)
-                    for (int tcounter=0; tcounter<2*ncomp(fc); tcounter++){
-                        a[2*ncomp(fc)*cindex + tcounter] *= tval;
-                    }
-            });
-            break;
-    }
-}
-
-template <field_backend be,
-          kspace_dealias_type dt>
-template <typename rnumber,
-          field_components fc>
-void kspace<be, dt>::cospectrum(
-        const rnumber(* __restrict a)[2],
-        const rnumber(* __restrict b)[2],
-        const hid_t group,
-        const std::string dset_name,
-        const hsize_t toffset)
-{
-    TIMEZONE("field::cospectrum");
-    std::vector<double> spec;
-    spec.resize(this->nshells*ncomp(fc)*ncomp(fc), 0);
-
-    shared_array<double> spec_local_threaded(this->nshells*ncomp(fc)*ncomp(fc), [&](double* spec_local){
-        std::fill_n(spec_local, this->nshells*ncomp(fc)*ncomp(fc), 0);
-    });
-
-    KSPACE_CLOOP_K2_NXMODES(
-            this,[&](ptrdiff_t cindex, hsize_t /*yindex*/, hsize_t /*zindex*/, int nxmodes, hsize_t /*xindex*/, double k2){
-            if (k2 <= this->kM2)
-            {
-                double* spec_local = spec_local_threaded.getMine();
-                int tmp_int = int(sqrt(k2) / this->dk)*ncomp(fc)*ncomp(fc);
-                for (hsize_t i=0; i<ncomp(fc); i++)
-                for (hsize_t j=0; j<ncomp(fc); j++)
-                    spec_local[tmp_int + i*ncomp(fc)+j] += nxmodes * (
-                    (a[ncomp(fc)*cindex + i][0] * b[ncomp(fc)*cindex + j][0]) +
-                    (a[ncomp(fc)*cindex + i][1] * b[ncomp(fc)*cindex + j][1]));
-            }
-    });
-    spec_local_threaded.mergeParallel();
-
-    MPI_Allreduce(
-            spec_local_threaded.getMasterData(),
-            &spec.front(),
-            spec.size(),
-            MPI_DOUBLE, MPI_SUM, this->layout->comm);
-    if (this->layout->myrank == 0)
-    {
-        hid_t dset, wspace, mspace;
-        hsize_t count[(ndim(fc)-2)*2], offset[(ndim(fc)-2)*2], dims[(ndim(fc)-2)*2];
-        dset = H5Dopen(group, ("spectra/" + dset_name).c_str(), H5P_DEFAULT);
-        wspace = H5Dget_space(dset);
-        H5Sget_simple_extent_dims(wspace, dims, NULL);
-        switch (fc)
-        {
-            case THREExTHREE:
-                offset[4] = 0;
-                offset[5] = 0;
-                count[4] = ncomp(fc);
-                count[5] = ncomp(fc);
-            case THREE:
-                offset[2] = 0;
-                offset[3] = 0;
-                count[2] = ncomp(fc);
-                count[3] = ncomp(fc);
-            default:
-                offset[0] = toffset;
-                offset[1] = 0;
-                count[0] = 1;
-                count[1] = this->nshells;
-        }
-        mspace = H5Screate_simple((ndim(fc)-2)*2, count, NULL);
-        H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
-        H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, &spec.front());
-        H5Sclose(wspace);
-        H5Sclose(mspace);
-        H5Dclose(dset);
-    }
-}
-
-
 template <typename rnumber,
           field_backend be,
           field_components fc1,
@@ -951,28 +939,48 @@ void compute_gradient(
     assert(!src->real_space_representation);
     assert((fc1 == ONE && fc2 == THREE) ||
            (fc1 == THREE && fc2 == THREExTHREE));
-    KSPACE_CLOOP_K2(
-            kk,[&](ptrdiff_t cindex, hsize_t yindex, hsize_t zindex, hsize_t xindex, double k2){
-            if (k2 < kk->kM2)
-                for (unsigned int field_component = 0;
-                     field_component < ncomp(fc1);
-                     field_component++)
-                {
-                    // There must not be any race condition because it is write
-                    dst->get_cdata()[(cindex*3+0)*ncomp(fc1)+field_component][0] =
-                        - kk->kx[xindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][1];
-                    dst->get_cdata()[(cindex*3+0)*ncomp(fc1)+field_component][1] =
-                          kk->kx[xindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][0];
-                    dst->get_cdata()[(cindex*3+1)*ncomp(fc1)+field_component][0] =
-                        - kk->ky[yindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][1];
-                    dst->get_cdata()[(cindex*3+1)*ncomp(fc1)+field_component][1] =
-                          kk->ky[yindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][0];
-                    dst->get_cdata()[(cindex*3+2)*ncomp(fc1)+field_component][0] =
-                        - kk->kz[zindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][1];
-                    dst->get_cdata()[(cindex*3+2)*ncomp(fc1)+field_component][1] =
-                          kk->kz[zindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][0];
-                }
-    });
+    kk->CLOOP_K2(
+            [&](ptrdiff_t cindex,
+                ptrdiff_t xindex,
+                ptrdiff_t yindex,
+                ptrdiff_t zindex,
+                double k2){
+            if (k2 < kk->kM2) switch(fc1)
+            {
+                case ONE:
+                    dst->cval(cindex, 0, 0) = -kk->kx[xindex]*src->cval(cindex, 1);
+                    dst->cval(cindex, 0, 1) =  kk->kx[xindex]*src->cval(cindex, 0);
+                    dst->cval(cindex, 1, 0) = -kk->ky[yindex]*src->cval(cindex, 1);
+                    dst->cval(cindex, 1, 1) =  kk->ky[yindex]*src->cval(cindex, 0);
+                    dst->cval(cindex, 2, 0) = -kk->kz[zindex]*src->cval(cindex, 1);
+                    dst->cval(cindex, 2, 1) =  kk->kz[zindex]*src->cval(cindex, 0);
+                    break;
+                case THREE:
+                    for (unsigned int field_component = 0;
+                         field_component < ncomp(fc1);
+                         field_component++)
+                    {
+                        dst->cval(cindex, 0, field_component, 0) = -kk->kx[xindex]*src->cval(cindex, field_component, 1);
+                        dst->cval(cindex, 0, field_component, 1) =  kk->kx[xindex]*src->cval(cindex, field_component, 0);
+                        dst->cval(cindex, 1, field_component, 0) = -kk->ky[yindex]*src->cval(cindex, field_component, 1);
+                        dst->cval(cindex, 1, field_component, 1) =  kk->ky[yindex]*src->cval(cindex, field_component, 0);
+                        dst->cval(cindex, 2, field_component, 0) = -kk->kz[zindex]*src->cval(cindex, field_component, 1);
+                        dst->cval(cindex, 2, field_component, 1) =  kk->kz[zindex]*src->cval(cindex, field_component, 0);
+                    }
+                //dst->get_cdata()[(cindex*3+0)*ncomp(fc1)+field_component][0] =
+                //    - kk->kx[xindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][1];
+                //dst->get_cdata()[(cindex*3+0)*ncomp(fc1)+field_component][1] =
+                //      kk->kx[xindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][0];
+                //dst->get_cdata()[(cindex*3+1)*ncomp(fc1)+field_component][0] =
+                //    - kk->ky[yindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][1];
+                //dst->get_cdata()[(cindex*3+1)*ncomp(fc1)+field_component][1] =
+                //      kk->ky[yindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][0];
+                //dst->get_cdata()[(cindex*3+2)*ncomp(fc1)+field_component][0] =
+                //    - kk->kz[zindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][1];
+                //dst->get_cdata()[(cindex*3+2)*ncomp(fc1)+field_component][1] =
+                //      kk->kz[zindex]*src->get_cdata()[cindex*ncomp(fc1)+field_component][0];
+            }
+            });
 }
 
 template class field<float, FFTW, ONE>;
@@ -982,49 +990,6 @@ template class field<double, FFTW, ONE>;
 template class field<double, FFTW, THREE>;
 template class field<double, FFTW, THREExTHREE>;
 
-template class kspace<FFTW, TWO_THIRDS>;
-template class kspace<FFTW, SMOOTH>;
-
-template kspace<FFTW, TWO_THIRDS>::kspace<>(
-        const field_layout<ONE> *,
-        const double, const double, const double);
-template kspace<FFTW, TWO_THIRDS>::kspace<>(
-        const field_layout<THREE> *,
-        const double, const double, const double);
-template kspace<FFTW, TWO_THIRDS>::kspace<>(
-        const field_layout<THREExTHREE> *,
-        const double, const double, const double);
-
-template kspace<FFTW, SMOOTH>::kspace<>(
-        const field_layout<ONE> *,
-        const double, const double, const double);
-template kspace<FFTW, SMOOTH>::kspace<>(
-        const field_layout<THREE> *,
-        const double, const double, const double);
-template kspace<FFTW, SMOOTH>::kspace<>(
-        const field_layout<THREExTHREE> *,
-        const double, const double, const double);
-
-template void kspace<FFTW, SMOOTH>::low_pass<float, ONE>(
-   float *__restrict__ a,
-   const double kmax);
-template void kspace<FFTW, SMOOTH>::low_pass<float, THREE>(
-   float *__restrict__ a,
-   const double kmax);
-template void kspace<FFTW, SMOOTH>::low_pass<float, THREExTHREE>(
-   float *__restrict__ a,
-   const double kmax);
-
-template void kspace<FFTW, SMOOTH>::low_pass<double, ONE>(
-   double *__restrict__ a,
-   const double kmax);
-template void kspace<FFTW, SMOOTH>::low_pass<double, THREE>(
-   double *__restrict__ a,
-   const double kmax);
-template void kspace<FFTW, SMOOTH>::low_pass<double, THREExTHREE>(
-        double *__restrict__ a,
-        const double kmax);
-
 template void field<float, FFTW, ONE>::compute_stats<TWO_THIRDS>(
         kspace<FFTW, TWO_THIRDS> *,
         const hid_t, const std::string, const hsize_t, const double);
diff --git a/bfps/cpp/field.hpp b/bfps/cpp/field.hpp
index a9790d9a6cdff4a95db134222484ed4981bb8a9b..3cb1424e1e6aea910f36e16bdcf3324ce05e518b 100644
--- a/bfps/cpp/field.hpp
+++ b/bfps/cpp/field.hpp
@@ -24,110 +24,16 @@
 
 
 
-#include <mpi.h>
 #include <hdf5.h>
-#include <fftw3-mpi.h>
 #include <unordered_map>
 #include <vector>
 #include <string>
-#include "base.hpp"
+#include "kspace.hpp"
 
-#ifndef FIELD
+#ifndef FIELD_HPP
 
-#define FIELD
+#define FIELD_HPP
 
-enum field_backend {FFTW};
-enum field_components {ONE, THREE, THREExTHREE};
-enum kspace_dealias_type {TWO_THIRDS, SMOOTH};
-
-constexpr unsigned int ncomp(
-        field_components fc)
-    /* return actual number of field components for each enum value */
-{
-    return ((fc == THREE) ? 3 : (
-            (fc == THREExTHREE) ? 9 : 1));
-}
-
-constexpr unsigned int ndim(
-        field_components fc)
-    /* return actual number of field dimensions for each enum value */
-{
-    return ((fc == THREE) ? 4 : (
-            (fc == THREExTHREE) ? 5 : 3));
-}
-
-template <field_components fc>
-class field_layout
-{
-    public:
-        /* description */
-        hsize_t sizes[ndim(fc)];
-        hsize_t subsizes[ndim(fc)];
-        hsize_t starts[ndim(fc)];
-        hsize_t local_size, full_size;
-
-        int myrank, nprocs;
-        MPI_Comm comm;
-
-        std::vector<std::vector<int>> rank;
-        std::vector<std::vector<int>> all_start;
-        std::vector<std::vector<int>> all_size;
-
-        /* methods */
-        field_layout(
-                const hsize_t *SIZES,
-                const hsize_t *SUBSIZES,
-                const hsize_t *STARTS,
-                const MPI_Comm COMM_TO_USE);
-        ~field_layout(){}
-};
-
-template <field_backend be,
-          kspace_dealias_type dt>
-class kspace
-{
-    public:
-        /* relevant field layout */
-        field_layout<ONE> *layout;
-
-        /* physical parameters */
-        double dkx, dky, dkz, dk, dk2;
-
-        /* mode and dealiasing information */
-        double kMx, kMy, kMz, kM, kM2;
-        double kMspec, kMspec2;
-        std::vector<double> kx, ky, kz;
-        std::unordered_map<int, double> dealias_filter;
-        std::vector<double> kshell;
-        std::vector<int64_t> nshell;
-        int nshells;
-
-        /* methods */
-        template <field_components fc>
-        kspace(
-                const field_layout<fc> *source_layout,
-                const double DKX = 1.0,
-                const double DKY = 1.0,
-                const double DKZ = 1.0);
-        ~kspace();
-
-        template <typename rnumber,
-                  field_components fc>
-        void low_pass(rnumber *__restrict__ a, const double kmax);
-
-        template <typename rnumber,
-                  field_components fc>
-        void dealias(rnumber *__restrict__ a);
-
-        template <typename rnumber,
-                  field_components fc>
-        void cospectrum(
-                const rnumber(* __restrict__ a)[2],
-                const rnumber(* __restrict__ b)[2],
-                const hid_t group,
-                const std::string dset_name,
-                const hsize_t toffset);
-};
 
 template <typename rnumber,
           field_backend be,
@@ -136,10 +42,9 @@ class field
 {
     private:
         /* data arrays */
-        rnumber *data;
-        typedef rnumber cnumber[2];
-        hsize_t npoints;
+        rnumber *__restrict__ data;
     public:
+        hsize_t npoints;
         bool real_space_representation;
         /* basic MPI information */
         int myrank, nprocs;
@@ -153,8 +58,8 @@ class field
         field_layout<fc> *clayout, *rlayout, *rmemlayout;
 
         /* FFT plans */
-        void *c2r_plan;
-        void *r2c_plan;
+        typename fftw_interface<rnumber>::plan c2r_plan;
+        typename fftw_interface<rnumber>::plan r2c_plan;
         unsigned fftw_plan_rigor;
 
         /* HDF5 data types for arrays */
@@ -171,14 +76,22 @@ class field
 
         int io(
                 const std::string fname,
-                const std::string dset_name,
+                const std::string field_name,
+                const int iteration,
+                const bool read = true);
+        int io_database(
+                const std::string fname,
+                const std::string field_name,
                 const int toffset,
                 const bool read = true);
 
+        /* essential FFT stuff */
         void dft();
         void ift();
         void normalize();
+        void symmetrize();
 
+        /* stats */
         void compute_rspace_xincrement_stats(
                 const int xcells,
                 const hid_t group,
@@ -191,16 +104,57 @@ class field
                 const std::string dset_name,
                 const hsize_t toffset,
                 const std::vector<double> max_estimate);
-        inline rnumber *get_rdata()
+
+        /* acess data */
+        inline rnumber *__restrict__ get_rdata()
         {
             return this->data;
         }
-        inline cnumber *get_cdata()
+
+        inline typename fftw_interface<rnumber>::complex *__restrict__ get_cdata()
+        {
+            return (typename fftw_interface<rnumber>::complex*__restrict__)this->data;
+        }
+
+        inline rnumber &rval(ptrdiff_t rindex, int component = 0)
+        {
+            assert(fc == ONE || fc == THREE);
+            assert(component >= 0 && component <ncomp(fc));
+            return *(this->data + rindex*ncomp(fc) + component);
+        }
+
+        inline rnumber &rval(ptrdiff_t rindex, int comp1, int comp0)
+        {
+            assert(fc == THREExTHREE);
+            assert(comp1 >= 0 && comp1 < 3);
+            assert(comp0 >= 0 && comp0 < 3);
+            return *(this->data + ((rindex*3 + comp1)*3 + comp0));
+        }
+
+        inline rnumber &cval(ptrdiff_t cindex, int imag)
+        {
+            assert(fc == ONE);
+            assert(imag == 0 || imag == 1);
+            return *(this->data + cindex*2 + imag);
+        }
+
+        inline rnumber &cval(ptrdiff_t cindex, int component, int imag)
         {
-            return (cnumber*)this->data;
+            assert(fc == THREE);
+            assert(imag == 0 || imag == 1);
+            return *(this->data + (cindex*ncomp(fc) + component)*2 + imag);
         }
 
-        inline field<rnumber, be, fc>& operator=(const cnumber *__restrict__ source)
+        inline rnumber &cval(ptrdiff_t cindex, int comp1, int comp0, int imag)
+        {
+            assert(fc == THREExTHREE);
+            assert(comp1 >= 0 && comp1 < 3);
+            assert(comp0 >= 0 && comp0 < 3);
+            assert(imag == 0 || imag == 1);
+            return *(this->data + ((cindex*3 + comp1)*3+comp0)*2 + imag);
+        }
+
+        inline field<rnumber, be, fc>& operator=(const typename fftw_interface<rnumber>::complex *__restrict__ source)
         {
             std::copy((rnumber*)source,
                       (rnumber*)(source + this->clayout->local_size),
@@ -217,6 +171,15 @@ class field
             this->real_space_representation = true;
             return *this;
         }
+
+        inline field<rnumber, be, fc>& operator=(const rnumber value)
+        {
+            std::fill_n(this->data,
+                        this->rmemlayout->local_size,
+                        value);
+            return *this;
+        }
+
         template <kspace_dealias_type dt>
         void compute_stats(
                 kspace<be, dt> *kk,
@@ -224,6 +187,45 @@ class field
                 const std::string dset_name,
                 const hsize_t toffset,
                 const double max_estimate);
+        inline void impose_zero_mode()
+        {
+            if (this->clayout->myrank == this->clayout->rank[0][0] &&
+                this->real_space_representation == false)
+            {
+                std::fill_n(this->data, 2*ncomp(fc), 0.0);
+            }
+        }
+        template <class func_type>
+        void RLOOP(func_type expression)
+        {
+            switch(be)
+            {
+                case FFTW:
+	                #pragma omp parallel for schedule(static)
+                    for (hsize_t zindex = 0; zindex < this->rlayout->subsizes[0]; zindex++)
+                    for (hsize_t yindex = 0; yindex < this->rlayout->subsizes[1]; yindex++)
+                    {
+                        ptrdiff_t rindex = (
+                                zindex * this->rlayout->subsizes[1] + yindex)*(
+                                    this->rmemlayout->subsizes[2]);
+                        for (hsize_t xindex = 0; xindex < this->rlayout->subsizes[2]; xindex++)
+                        {
+                            expression(rindex, xindex, yindex, zindex);
+                            rindex++;
+                        }
+                    }
+                    break;
+            }
+        }
+        ptrdiff_t get_cindex(
+                ptrdiff_t xindex,
+                ptrdiff_t yindex,
+                ptrdiff_t zindex)
+        {
+            return ((yindex*this->clayout->subsizes[1] +
+                     zindex)*this->clayout->subsizes[2] +
+                    xindex);
+        }
 };
 
 template <typename rnumber,
@@ -236,79 +238,5 @@ void compute_gradient(
         field<rnumber, be, fc1> *source,
         field<rnumber, be, fc2> *destination);
 
-
-/* real space loop */
-template <field_backend be, class ObjectType, class FuncType>
-void FIELD_RLOOP(ObjectType* obj, FuncType expression)
-{
-    switch (be)
-    {
-        case FFTW:
-            #pragma omp parallel for schedule(static)
-            for (hsize_t zindex = 0; zindex < obj->rlayout->subsizes[0]; zindex++)
-            for (hsize_t yindex = 0; yindex < obj->rlayout->subsizes[1]; yindex++)
-            {
-                ptrdiff_t rindex = (
-                        zindex * obj->rlayout->subsizes[1] + yindex)*(
-                            obj->rmemlayout->subsizes[2]);
-                for (hsize_t xindex = 0; xindex < obj->rlayout->subsizes[2]; xindex++)
-                {
-                    expression(zindex, yindex, rindex, xindex);
-                    rindex++;
-                }
-            }
-            break;
-    }
-}
-
-template <class ObjectType, class FuncType>
-void KSPACE_CLOOP_K2(ObjectType* obj, FuncType expression)
-
-{
-    #pragma omp parallel for schedule(static)
-    for (hsize_t yindex = 0; yindex < obj->layout->subsizes[0]; yindex++){
-        ptrdiff_t cindex = yindex*obj->layout->subsizes[1]*obj->layout->subsizes[2];
-        for (hsize_t zindex = 0; zindex < obj->layout->subsizes[1]; zindex++)
-        for (hsize_t xindex = 0; xindex < obj->layout->subsizes[2]; xindex++)
-        {
-            double k2 = (obj->kx[xindex]*obj->kx[xindex] +
-                  obj->ky[yindex]*obj->ky[yindex] +
-                  obj->kz[zindex]*obj->kz[zindex]);
-            expression(cindex, yindex, zindex, xindex, k2);
-            cindex++;
-        }
-    }
-}
-
-template <class ObjectType, class FuncType>
-void KSPACE_CLOOP_K2_NXMODES(ObjectType* obj, FuncType expression)
-
-{
-    #pragma omp parallel for schedule(static)
-    for (hsize_t yindex = 0; yindex < obj->layout->subsizes[0]; yindex++)
-    {
-        ptrdiff_t cindex = yindex*( obj->layout->subsizes[1] * obj->layout->subsizes[2] );
-        for (hsize_t zindex = 0; zindex < obj->layout->subsizes[1]; zindex++)
-        {
-            int nxmodes = 1;
-            hsize_t xindex = 0;
-            double k2 = (obj->kx[xindex]*obj->kx[xindex] +
-                  obj->ky[yindex]*obj->ky[yindex] +
-                  obj->kz[zindex]*obj->kz[zindex]);
-            expression(cindex, yindex, zindex, nxmodes, xindex, k2);
-            cindex++;
-            nxmodes = 2;
-            for (xindex = 1; xindex < obj->layout->subsizes[2]; xindex++)
-            {
-                double k2 = (obj->kx[xindex]*obj->kx[xindex] +
-                      obj->ky[yindex]*obj->ky[yindex] +
-                      obj->kz[zindex]*obj->kz[zindex]);
-                expression(cindex, yindex, zindex, nxmodes, xindex, k2);
-                cindex++;
-            }
-        }
-    }
-}
-
-#endif//FIELD
+#endif//FIELD_HPP
 
diff --git a/bfps/cpp/field_layout.cpp b/bfps/cpp/field_layout.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..908904991d5d95b0c89ba679b402d8d5727b8c85
--- /dev/null
+++ b/bfps/cpp/field_layout.cpp
@@ -0,0 +1,111 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2015 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+#include <cassert>
+#include "field_layout.hpp"
+#include "scope_timer.hpp"
+
+template <field_components fc>
+field_layout<fc>::field_layout(
+        const hsize_t *SIZES,
+        const hsize_t *SUBSIZES,
+        const hsize_t *STARTS,
+        const MPI_Comm COMM_TO_USE)
+{
+    TIMEZONE("field_layout::field_layout");
+    this->comm = COMM_TO_USE;
+    MPI_Comm_rank(this->comm, &this->myrank);
+    MPI_Comm_size(this->comm, &this->nprocs);
+
+    std::copy(SIZES, SIZES + 3, this->sizes);
+    std::copy(SUBSIZES, SUBSIZES + 3, this->subsizes);
+    std::copy(STARTS, STARTS + 3, this->starts);
+    if (fc == THREE || fc == THREExTHREE)
+    {
+        this->sizes[3] = 3;
+        this->subsizes[3] = 3;
+        this->starts[3] = 0;
+    }
+    if (fc == THREExTHREE)
+    {
+        this->sizes[4] = 3;
+        this->subsizes[4] = 3;
+        this->starts[4] = 0;
+    }
+    this->local_size = 1;
+    this->full_size = 1;
+    for (unsigned int i=0; i<ndim(fc); i++)
+    {
+        this->local_size *= this->subsizes[i];
+        this->full_size *= this->sizes[i];
+    }
+
+    /*field will at most be distributed in 2D*/
+    this->rank.resize(2);
+    this->all_start.resize(2);
+    this->all_size.resize(2);
+    for (int i=0; i<2; i++)
+    {
+        this->rank[i].resize(this->sizes[i]);
+        std::vector<int> local_rank;
+        local_rank.resize(this->sizes[i], 0);
+        for (unsigned int ii=this->starts[i]; ii<this->starts[i]+this->subsizes[i]; ii++)
+            local_rank[ii] = this->myrank;
+        MPI_Allreduce(
+                &local_rank.front(),
+                &this->rank[i].front(),
+                this->sizes[i],
+                MPI_INT,
+                MPI_SUM,
+                this->comm);
+        this->all_start[i].resize(this->nprocs);
+        std::vector<int> local_start;
+        local_start.resize(this->nprocs, 0);
+        local_start[this->myrank] = this->starts[i];
+        MPI_Allreduce(
+                &local_start.front(),
+                &this->all_start[i].front(),
+                this->nprocs,
+                MPI_INT,
+                MPI_SUM,
+                this->comm);
+        this->all_size[i].resize(this->nprocs);
+        std::vector<int> local_subsize;
+        local_subsize.resize(this->nprocs, 0);
+        local_subsize[this->myrank] = this->subsizes[i];
+        MPI_Allreduce(
+                &local_subsize.front(),
+                &this->all_size[i].front(),
+                this->nprocs,
+                MPI_INT,
+                MPI_SUM,
+                this->comm);
+    }
+}
+
+template class field_layout<ONE>;
+template class field_layout<THREE>;
+template class field_layout<THREExTHREE>;
+
diff --git a/bfps/cpp/field_layout.hpp b/bfps/cpp/field_layout.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..770119c2dcb05017d495b62559f050646872dc84
--- /dev/null
+++ b/bfps/cpp/field_layout.hpp
@@ -0,0 +1,79 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2015 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+
+#include <vector>
+#include "base.hpp"
+
+#ifndef FIELD_LAYOUT_HPP
+
+#define FIELD_LAYOUT_HPP
+
+enum field_components {ONE, THREE, THREExTHREE};
+
+constexpr unsigned int ncomp(
+        field_components fc)
+    /* return actual number of field components for each enum value */
+{
+    return ((fc == THREE) ? 3 : (
+            (fc == THREExTHREE) ? 9 : 1));
+}
+
+constexpr unsigned int ndim(
+        field_components fc)
+    /* return actual number of field dimensions for each enum value */
+{
+    return ((fc == THREE) ? 4 : (
+            (fc == THREExTHREE) ? 5 : 3));
+}
+
+template <field_components fc>
+class field_layout
+{
+    public:
+        /* description */
+        hsize_t sizes[ndim(fc)];
+        hsize_t subsizes[ndim(fc)];
+        hsize_t starts[ndim(fc)];
+        hsize_t local_size, full_size;
+
+        int myrank, nprocs;
+        MPI_Comm comm;
+
+        std::vector<std::vector<int>> rank;
+        std::vector<std::vector<int>> all_start;
+        std::vector<std::vector<int>> all_size;
+
+        /* methods */
+        field_layout(
+                const hsize_t *SIZES,
+                const hsize_t *SUBSIZES,
+                const hsize_t *STARTS,
+                const MPI_Comm COMM_TO_USE);
+        ~field_layout(){}
+};
+
+#endif//FIELD_LAYOUT_HPP
+
diff --git a/bfps/cpp/interpolator_base.cpp b/bfps/cpp/interpolator_base.cpp
index 58bf57cf13382f0704da4537dae9d21bb4a841da..db81dcb329070e14897e432e82a2fee95810e169 100644
--- a/bfps/cpp/interpolator_base.cpp
+++ b/bfps/cpp/interpolator_base.cpp
@@ -43,6 +43,20 @@ interpolator_base<rnumber, interp_neighbours>::interpolator_base(
     this->dz = 4*acos(0) / (fs->dkz*this->descriptor->sizes[0]);
 }
 
+template <class rnumber, int interp_neighbours>
+interpolator_base<rnumber, interp_neighbours>::interpolator_base(
+        vorticity_equation<rnumber, FFTW> *fs,
+        base_polynomial_values BETA_POLYS)
+{
+//    this->descriptor = fs->rd;
+//    this->compute_beta = BETA_POLYS;
+//
+//    // compute dx, dy, dz;
+//    this->dx = 4*acos(0) / (fs->kk->dkx*this->descriptor->sizes[2]);
+//    this->dy = 4*acos(0) / (fs->kk->dky*this->descriptor->sizes[1]);
+//    this->dz = 4*acos(0) / (fs->kk->dkz*this->descriptor->sizes[0]);
+}
+
 template <class rnumber, int interp_neighbours>
 void interpolator_base<rnumber, interp_neighbours>::get_grid_coordinates(
         const int nparticles,
diff --git a/bfps/cpp/interpolator_base.hpp b/bfps/cpp/interpolator_base.hpp
index 7dda7fb08319bf2a044bcc220e204b748d6336d6..f4b793342d9b5b38e39c717ad30ee88e106958aa 100644
--- a/bfps/cpp/interpolator_base.hpp
+++ b/bfps/cpp/interpolator_base.hpp
@@ -25,6 +25,7 @@
 
 
 #include "fluid_solver_base.hpp"
+#include "vorticity_equation.hpp"
 #include "spline_n1.hpp"
 #include "spline_n2.hpp"
 #include "spline_n3.hpp"
@@ -58,6 +59,10 @@ class interpolator_base
         interpolator_base(
                 fluid_solver_base<rnumber> *FSOLVER,
                 base_polynomial_values BETA_POLYS);
+
+        interpolator_base(
+                vorticity_equation<rnumber, FFTW> *FSOLVER,
+                base_polynomial_values BETA_POLYS);
         virtual ~interpolator_base(){}
 
         /* may not destroy input */
diff --git a/bfps/cpp/kspace.cpp b/bfps/cpp/kspace.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f98024ea50dfd709ab63f490727c274c7fe3d600
--- /dev/null
+++ b/bfps/cpp/kspace.cpp
@@ -0,0 +1,439 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2015 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+#include <cmath>
+#include <cstdlib>
+#include <algorithm>
+#include <cassert>
+#include "kspace.hpp"
+#include "scope_timer.hpp"
+
+
+
+template <field_backend be,
+          kspace_dealias_type dt>
+template <field_components fc>
+kspace<be, dt>::kspace(
+        const field_layout<fc> *source_layout,
+        const double DKX,
+        const double DKY,
+        const double DKZ)
+{
+    TIMEZONE("field::kspace");
+    /* get layout */
+    this->layout = new field_layout<ONE>(
+            source_layout->sizes,
+            source_layout->subsizes,
+            source_layout->starts,
+            source_layout->comm);
+
+    /* store dk values */
+    this->dkx = DKX;
+    this->dky = DKY;
+    this->dkz = DKZ;
+
+    /* compute kx, ky, kz and compute kM values */
+    switch(be)
+    {
+        case FFTW:
+            this->kx.resize(this->layout->sizes[2]);
+            this->ky.resize(this->layout->subsizes[0]);
+            this->kz.resize(this->layout->sizes[1]);
+            int i, ii;
+            for (i = 0; i<int(this->layout->sizes[2]); i++)
+                this->kx[i] = i*this->dkx;
+            for (i = 0; i<int(this->layout->subsizes[0]); i++)
+            {
+                ii = i + this->layout->starts[0];
+                if (ii <= int(this->layout->sizes[1]/2))
+                    this->ky[i] = this->dky*ii;
+                else
+                    this->ky[i] = this->dky*(ii - int(this->layout->sizes[1]));
+            }
+            for (i = 0; i<int(this->layout->sizes[1]); i++)
+            {
+                if (i <= int(this->layout->sizes[0]/2))
+                    this->kz[i] = this->dkz*i;
+                else
+                    this->kz[i] = this->dkz*(i - int(this->layout->sizes[0]));
+            }
+            switch(dt)
+            {
+                case TWO_THIRDS:
+                    this->kMx = this->dkx*(int(2*(int(this->layout->sizes[2])-1)/3)-1);
+                    this->kMy = this->dky*(int(this->layout->sizes[0] / 3)-1);
+                    this->kMz = this->dkz*(int(this->layout->sizes[1] / 3)-1);
+                    break;
+                case SMOOTH:
+                    this->kMx = this->dkx*(int(this->layout->sizes[2])-2);
+                    this->kMy = this->dky*(int(this->layout->sizes[0] / 2)-1);
+                    this->kMz = this->dkz*(int(this->layout->sizes[1] / 2)-1);
+                    break;
+            }
+            break;
+    }
+
+    /* get global kM and dk */
+    this->kM = this->kMx;
+    if (this->kM < this->kMy) this->kM = this->kMy;
+    if (this->kM < this->kMz) this->kM = this->kMz;
+    this->kM2 = this->kM * this->kM;
+    this->dk = this->dkx;
+    if (this->dk > this->dky) this->dk = this->dky;
+    if (this->dk > this->dkz) this->dk = this->dkz;
+    this->dk2 = this->dk*this->dk;
+
+    /* spectra stuff */
+    this->nshells = int(this->kM / this->dk) + 2;
+    this->kshell.resize(this->nshells, 0);
+    this->nshell.resize(this->nshells, 0);
+    std::vector<double> kshell_local;
+    kshell_local.resize(this->nshells, 0);
+    std::vector<int64_t> nshell_local;
+    nshell_local.resize(this->nshells, 0);
+    this->CLOOP_K2_NXMODES(
+            [&](ptrdiff_t cindex,
+                ptrdiff_t xindex,
+                ptrdiff_t yindex,
+                ptrdiff_t zindex,
+                double k2,
+                int nxmodes){
+            if (k2 < this->kM2)
+            {
+                double knorm = sqrt(k2);
+                nshell_local[int(knorm/this->dk)] += nxmodes;
+                kshell_local[int(knorm/this->dk)] += nxmodes*knorm;
+            }
+            if (dt == SMOOTH)
+                this->dealias_filter[int(round(k2 / this->dk2))] = \
+                    exp(-36.0 * pow(k2/this->kM2, 18.));
+                });
+    MPI_Allreduce(
+            &nshell_local.front(),
+            &this->nshell.front(),
+            this->nshells,
+            MPI_INT64_T, MPI_SUM, this->layout->comm);
+    MPI_Allreduce(
+            &kshell_local.front(),
+            &this->kshell.front(),
+            this->nshells,
+            MPI_DOUBLE, MPI_SUM, this->layout->comm);
+    for (int n=0; n<this->nshells; n++)
+        this->kshell[n] /= this->nshell[n];
+}
+
+template <field_backend be,
+          kspace_dealias_type dt>
+kspace<be, dt>::~kspace()
+{
+    delete this->layout;
+}
+
+template <field_backend be,
+          kspace_dealias_type dt>
+template <typename rnumber,
+          field_components fc>
+void kspace<be, dt>::low_pass(typename fftw_interface<rnumber>::complex *__restrict__ a, const double kmax)
+{
+    const double km2 = kmax*kmax;
+    this->CLOOP_K2(
+            [&](ptrdiff_t cindex,
+                ptrdiff_t xindex,
+                ptrdiff_t yindex,
+                ptrdiff_t zindex,
+                double k2){
+            if (k2 >= km2)
+                std::fill_n((rnumber*)(a + ncomp(fc)*cindex), 2*ncomp(fc), 0);
+                });
+}
+
+template <field_backend be,
+          kspace_dealias_type dt>
+template <typename rnumber,
+          field_components fc>
+void kspace<be, dt>::dealias(typename fftw_interface<rnumber>::complex *__restrict__ a)
+{
+    switch(dt)
+    {
+        case TWO_THIRDS:
+            this->low_pass<rnumber, fc>(a, this->kM);
+            break;
+        case SMOOTH:
+            this->CLOOP_K2(
+                [&](ptrdiff_t cindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex,
+                    double k2){
+                    double tval = this->dealias_filter[int(round(k2 / this->dk2))];
+                    for (int tcounter=0; tcounter<2*ncomp(fc); tcounter++)
+                        ((rnumber*)a)[2*ncomp(fc)*cindex + tcounter] *= tval;
+                        });
+            break;
+    }
+}
+
+template <field_backend be,
+          kspace_dealias_type dt>
+template <typename rnumber>
+void kspace<be, dt>::force_divfree(typename fftw_interface<rnumber>::complex *__restrict__ a)
+{
+    TIMEZONE("kspace::force_divfree");
+    typename fftw_interface<rnumber>::complex tval;
+    this->CLOOP_K2(
+                [&](ptrdiff_t cindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex,
+                    double k2){
+                if (k2 > 0)
+        {
+            tval[0] = (this->kx[xindex]*((*(a + cindex*3  ))[0]) +
+                       this->ky[yindex]*((*(a + cindex*3+1))[0]) +
+                       this->kz[zindex]*((*(a + cindex*3+2))[0]) ) / k2;
+            tval[1] = (this->kx[xindex]*((*(a + cindex*3  ))[1]) +
+                       this->ky[yindex]*((*(a + cindex*3+1))[1]) +
+                       this->kz[zindex]*((*(a + cindex*3+2))[1]) ) / k2;
+            for (int imag_part=0; imag_part<2; imag_part++)
+            {
+                a[cindex*3  ][imag_part] -= tval[imag_part]*this->kx[xindex];
+                a[cindex*3+1][imag_part] -= tval[imag_part]*this->ky[yindex];
+                a[cindex*3+2][imag_part] -= tval[imag_part]*this->kz[zindex];
+            }
+        }
+                }
+    );
+    if (this->layout->myrank == this->layout->rank[0][0])
+        std::fill_n((rnumber*)(a), 6, 0.0);
+}
+
+template <field_backend be,
+          kspace_dealias_type dt>
+template <typename rnumber,
+          field_components fc>
+void kspace<be, dt>::cospectrum(
+        const rnumber(* __restrict a)[2],
+        const rnumber(* __restrict b)[2],
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset)
+{
+    TIMEZONE("field::cospectrum");
+    std::vector<double> spec, spec_local;
+    spec.resize(this->nshells*ncomp(fc)*ncomp(fc), 0);
+    spec_local.resize(this->nshells*ncomp(fc)*ncomp(fc), 0);
+    this->CLOOP_K2_NXMODES(
+            [&](ptrdiff_t cindex,
+                ptrdiff_t xindex,
+                ptrdiff_t yindex,
+                ptrdiff_t zindex,
+                double k2,
+                int nxmodes){
+            if (k2 <= this->kM2)
+            {
+                int tmp_int = int(sqrt(k2) / this->dk)*ncomp(fc)*ncomp(fc);
+                for (hsize_t i=0; i<ncomp(fc); i++)
+                for (hsize_t j=0; j<ncomp(fc); j++)
+                    spec_local[tmp_int + i*ncomp(fc)+j] += nxmodes * (
+                    (a[ncomp(fc)*cindex + i][0] * b[ncomp(fc)*cindex + j][0]) +
+                    (a[ncomp(fc)*cindex + i][1] * b[ncomp(fc)*cindex + j][1]));
+            }
+            });
+    MPI_Allreduce(
+            &spec_local.front(),
+            &spec.front(),
+            spec.size(),
+            MPI_DOUBLE, MPI_SUM, this->layout->comm);
+    if (this->layout->myrank == 0)
+    {
+        hid_t dset, wspace, mspace;
+        hsize_t count[(ndim(fc)-2)*2], offset[(ndim(fc)-2)*2], dims[(ndim(fc)-2)*2];
+        dset = H5Dopen(group, ("spectra/" + dset_name).c_str(), H5P_DEFAULT);
+        wspace = H5Dget_space(dset);
+        H5Sget_simple_extent_dims(wspace, dims, NULL);
+        switch (fc)
+        {
+            case THREExTHREE:
+                offset[4] = 0;
+                offset[5] = 0;
+                count[4] = ncomp(fc);
+                count[5] = ncomp(fc);
+            case THREE:
+                offset[2] = 0;
+                offset[3] = 0;
+                count[2] = ncomp(fc);
+                count[3] = ncomp(fc);
+            default:
+                offset[0] = toffset;
+                offset[1] = 0;
+                count[0] = 1;
+                count[1] = this->nshells;
+        }
+        mspace = H5Screate_simple((ndim(fc)-2)*2, count, NULL);
+        H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
+        H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, &spec.front());
+        H5Sclose(wspace);
+        H5Sclose(mspace);
+        H5Dclose(dset);
+    }
+}
+
+
+template class kspace<FFTW, TWO_THIRDS>;
+template class kspace<FFTW, SMOOTH>;
+
+template kspace<FFTW, TWO_THIRDS>::kspace<>(
+        const field_layout<ONE> *,
+        const double, const double, const double);
+template kspace<FFTW, TWO_THIRDS>::kspace<>(
+        const field_layout<THREE> *,
+        const double, const double, const double);
+template kspace<FFTW, TWO_THIRDS>::kspace<>(
+        const field_layout<THREExTHREE> *,
+        const double, const double, const double);
+
+template kspace<FFTW, SMOOTH>::kspace<>(
+        const field_layout<ONE> *,
+        const double, const double, const double);
+template kspace<FFTW, SMOOTH>::kspace<>(
+        const field_layout<THREE> *,
+        const double, const double, const double);
+template kspace<FFTW, SMOOTH>::kspace<>(
+        const field_layout<THREExTHREE> *,
+        const double, const double, const double);
+
+template void kspace<FFTW, SMOOTH>::low_pass<float, ONE>(
+        typename fftw_interface<float>::complex *__restrict__ a,
+        const double kmax);
+template void kspace<FFTW, SMOOTH>::low_pass<float, THREE>(
+        typename fftw_interface<float>::complex *__restrict__ a,
+        const double kmax);
+template void kspace<FFTW, SMOOTH>::low_pass<float, THREExTHREE>(
+        typename fftw_interface<float>::complex *__restrict__ a,
+        const double kmax);
+
+template void kspace<FFTW, SMOOTH>::low_pass<double, ONE>(
+        typename fftw_interface<double>::complex *__restrict__ a,
+        const double kmax);
+template void kspace<FFTW, SMOOTH>::low_pass<double, THREE>(
+        typename fftw_interface<double>::complex *__restrict__ a,
+        const double kmax);
+template void kspace<FFTW, SMOOTH>::low_pass<double, THREExTHREE>(
+        typename fftw_interface<double>::complex *__restrict__ a,
+        const double kmax);
+
+template void kspace<FFTW, SMOOTH>::dealias<float, ONE>(
+        typename fftw_interface<float>::complex *__restrict__ a);
+template void kspace<FFTW, SMOOTH>::dealias<float, THREE>(
+        typename fftw_interface<float>::complex *__restrict__ a);
+template void kspace<FFTW, SMOOTH>::dealias<float, THREExTHREE>(
+        typename fftw_interface<float>::complex *__restrict__ a);
+
+template void kspace<FFTW, SMOOTH>::dealias<double, ONE>(
+        typename fftw_interface<double>::complex *__restrict__ a);
+template void kspace<FFTW, SMOOTH>::dealias<double, THREE>(
+        typename fftw_interface<double>::complex *__restrict__ a);
+template void kspace<FFTW, SMOOTH>::dealias<double, THREExTHREE>(
+        typename fftw_interface<double>::complex *__restrict__ a);
+
+template void kspace<FFTW, TWO_THIRDS>::cospectrum<float, ONE>(
+        const typename fftw_interface<float>::complex *__restrict__ a,
+        const typename fftw_interface<float>::complex *__restrict__ b,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, TWO_THIRDS>::cospectrum<float, THREE>(
+        const typename fftw_interface<float>::complex *__restrict__ a,
+        const typename fftw_interface<float>::complex *__restrict__ b,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, TWO_THIRDS>::cospectrum<float, THREExTHREE>(
+        const typename fftw_interface<float>::complex *__restrict__ a,
+        const typename fftw_interface<float>::complex *__restrict__ b,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, TWO_THIRDS>::cospectrum<double, ONE>(
+        const typename fftw_interface<double>::complex *__restrict__ a,
+        const typename fftw_interface<double>::complex *__restrict__ b,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, TWO_THIRDS>::cospectrum<double, THREE>(
+        const typename fftw_interface<double>::complex *__restrict__ a,
+        const typename fftw_interface<double>::complex *__restrict__ b,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, TWO_THIRDS>::cospectrum<double, THREExTHREE>(
+        const typename fftw_interface<double>::complex *__restrict__ a,
+        const typename fftw_interface<double>::complex *__restrict__ b,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+
+template void kspace<FFTW, SMOOTH>::cospectrum<float, ONE>(
+        const typename fftw_interface<float>::complex *__restrict__ a,
+        const typename fftw_interface<float>::complex *__restrict__ b,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, SMOOTH>::cospectrum<float, THREE>(
+        const typename fftw_interface<float>::complex *__restrict__ a,
+        const typename fftw_interface<float>::complex *__restrict__ b,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, SMOOTH>::cospectrum<float, THREExTHREE>(
+        const typename fftw_interface<float>::complex *__restrict__ a,
+        const typename fftw_interface<float>::complex *__restrict__ b,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, SMOOTH>::cospectrum<double, ONE>(
+        const typename fftw_interface<double>::complex *__restrict__ a,
+        const typename fftw_interface<double>::complex *__restrict__ b,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, SMOOTH>::cospectrum<double, THREE>(
+        const typename fftw_interface<double>::complex *__restrict__ a,
+        const typename fftw_interface<double>::complex *__restrict__ b,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, SMOOTH>::cospectrum<double, THREExTHREE>(
+        const typename fftw_interface<double>::complex *__restrict__ a,
+        const typename fftw_interface<double>::complex *__restrict__ b,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+
+template void kspace<FFTW, SMOOTH>::force_divfree<float>(
+       typename fftw_interface<float>::complex *__restrict__ a);
+template void kspace<FFTW, SMOOTH>::force_divfree<double>(
+       typename fftw_interface<double>::complex *__restrict__ a);
+
diff --git a/bfps/cpp/kspace.hpp b/bfps/cpp/kspace.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ea59d53aed80d3d97fd17d5b5ed72f710c190d4f
--- /dev/null
+++ b/bfps/cpp/kspace.hpp
@@ -0,0 +1,145 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2015 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+
+#include <hdf5.h>
+#include <unordered_map>
+#include <vector>
+#include <string>
+#include "fftw_interface.hpp"
+#include "field_layout.hpp"
+
+#ifndef KSPACE_HPP
+
+#define KSPACE_HPP
+
+enum field_backend {FFTW};
+enum kspace_dealias_type {TWO_THIRDS, SMOOTH};
+
+
+template <field_backend be,
+          kspace_dealias_type dt>
+class kspace
+{
+    public:
+        /* relevant field layout */
+        field_layout<ONE> *layout;
+
+        /* physical parameters */
+        double dkx, dky, dkz, dk, dk2;
+
+        /* mode and dealiasing information */
+        double kMx, kMy, kMz, kM, kM2;
+        std::vector<double> kx, ky, kz;
+        std::unordered_map<int, double> dealias_filter;
+        std::vector<double> kshell;
+        std::vector<int64_t> nshell;
+        int nshells;
+
+        /* methods */
+        template <field_components fc>
+        kspace(
+                const field_layout<fc> *source_layout,
+                const double DKX = 1.0,
+                const double DKY = 1.0,
+                const double DKZ = 1.0);
+        ~kspace();
+
+        template <typename rnumber,
+                  field_components fc>
+        void low_pass(
+                typename fftw_interface<rnumber>::complex *__restrict__ a,
+                const double kmax);
+
+        template <typename rnumber,
+                  field_components fc>
+        void dealias(typename fftw_interface<rnumber>::complex *__restrict__ a);
+
+        template <typename rnumber,
+                  field_components fc>
+        void cospectrum(
+                const rnumber(* __restrict__ a)[2],
+                const rnumber(* __restrict__ b)[2],
+                const hid_t group,
+                const std::string dset_name,
+                const hsize_t toffset);
+        template <class func_type>
+        void CLOOP(func_type expression)
+        {
+            ptrdiff_t cindex = 0;
+            for (hsize_t yindex = 0; yindex < this->layout->subsizes[0]; yindex++)
+            for (hsize_t zindex = 0; zindex < this->layout->subsizes[1]; zindex++)
+            for (hsize_t xindex = 0; xindex < this->layout->subsizes[2]; xindex++)
+                {
+                    expression(cindex, xindex, yindex, zindex);
+                    cindex++;
+                }
+        }
+        template <class func_type>
+        void CLOOP_K2(func_type expression)
+        {
+            double k2;
+            ptrdiff_t cindex = 0;
+            for (hsize_t yindex = 0; yindex < this->layout->subsizes[0]; yindex++)
+            for (hsize_t zindex = 0; zindex < this->layout->subsizes[1]; zindex++)
+            for (hsize_t xindex = 0; xindex < this->layout->subsizes[2]; xindex++)
+                {
+                    k2 = (this->kx[xindex]*this->kx[xindex] +
+                          this->ky[yindex]*this->ky[yindex] +
+                          this->kz[zindex]*this->kz[zindex]);
+                    expression(cindex, xindex, yindex, zindex, k2);
+                    cindex++;
+                }
+        }
+        template <class func_type>
+        void CLOOP_K2_NXMODES(func_type expression)
+        {
+            ptrdiff_t cindex = 0;
+            for (hsize_t yindex = 0; yindex < this->layout->subsizes[0]; yindex++)
+            for (hsize_t zindex = 0; zindex < this->layout->subsizes[1]; zindex++)
+            {
+                hsize_t xindex = 0;
+                double k2 = (
+                        this->kx[xindex]*this->kx[xindex] +
+                        this->ky[yindex]*this->ky[yindex] +
+                        this->kz[zindex]*this->kz[zindex]);
+                expression(cindex, xindex, yindex, zindex, k2, 1);
+                cindex++;
+                for (xindex = 1; xindex < this->layout->subsizes[2]; xindex++)
+                {
+                    k2 = (this->kx[xindex]*this->kx[xindex] +
+                          this->ky[yindex]*this->ky[yindex] +
+                          this->kz[zindex]*this->kz[zindex]);
+                    expression(cindex, xindex, yindex, zindex, k2, 2);
+                    cindex++;
+                }
+            }
+        }
+        template <typename rnumber>
+        void force_divfree(typename fftw_interface<rnumber>::complex *__restrict__ a);
+};
+
+#endif//KSPACE_HPP
+
diff --git a/bfps/cpp/particles.cpp b/bfps/cpp/particles.cpp
index 847f065d49299b559162060876402101fe48d9d4..cdaf157cb912c3074faf84bfecf1d9b3752c78a7 100644
--- a/bfps/cpp/particles.cpp
+++ b/bfps/cpp/particles.cpp
@@ -43,17 +43,17 @@ template <particle_types particle_type, class rnumber, int interp_neighbours>
 particles<particle_type, rnumber, interp_neighbours>::particles(
         const char *NAME,
         const hid_t data_file_id,
-        interpolator_base<rnumber, interp_neighbours> *FIELD,
+        interpolator_base<rnumber, interp_neighbours> *VEL,
         const int TRAJ_SKIP,
         const int INTEGRATION_STEPS) : particles_io_base<particle_type>(
             NAME,
             TRAJ_SKIP,
             data_file_id,
-            FIELD->descriptor->comm)
+            VEL->descriptor->comm)
 {
     assert((INTEGRATION_STEPS <= 6) &&
            (INTEGRATION_STEPS >= 1));
-    this->vel = FIELD;
+    this->vel = VEL;
     this->integration_steps = INTEGRATION_STEPS;
     this->array_size = this->nparticles * state_dimension(particle_type);
     this->state = new double[this->array_size];
diff --git a/bfps/cpp/rFFTW_distributed_particles.cpp b/bfps/cpp/rFFTW_distributed_particles.cpp
index d04e10402ba649a3aa17001481d85585eb17a841..713b4f79cc8888713a9edad7a2c344edc090289b 100644
--- a/bfps/cpp/rFFTW_distributed_particles.cpp
+++ b/bfps/cpp/rFFTW_distributed_particles.cpp
@@ -32,6 +32,8 @@
 #include <string>
 #include <sstream>
 #include <set>
+#include <algorithm>
+#include <ctime>
 
 #include "base.hpp"
 #include "rFFTW_distributed_particles.hpp"
@@ -45,13 +47,13 @@ template <particle_types particle_type, class rnumber, int interp_neighbours>
 rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::rFFTW_distributed_particles(
         const char *NAME,
         const hid_t data_file_id,
-        rFFTW_interpolator<rnumber, interp_neighbours> *FIELD,
+        rFFTW_interpolator<rnumber, interp_neighbours> *VEL,
         const int TRAJ_SKIP,
         const int INTEGRATION_STEPS) : particles_io_base<particle_type>(
             NAME,
             TRAJ_SKIP,
             data_file_id,
-            FIELD->descriptor->comm)
+            VEL->descriptor->comm)
 {
     TIMEZONE("rFFTW_distributed_particles::rFFTW_distributed_particles");
     /* check that integration_steps has a valid value.
@@ -67,16 +69,16 @@ rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::rFFTW_di
      * therefore I prefer to just kill the code at this point,
      * no matter whether or not NDEBUG is present.
      * */
-    if (interp_neighbours*2+2 > FIELD->descriptor->subsizes[0])
+    if (interp_neighbours*2+2 > VEL->descriptor->subsizes[0])
     {
         DEBUG_MSG("parameters incompatible with rFFTW_distributed_particles.\n"
                   "interp kernel size is %d, local_z_size is %d\n",
-                  interp_neighbours*2+2, FIELD->descriptor->subsizes[0]);
-        if (FIELD->descriptor->myrank == 0)
+                  interp_neighbours*2+2, VEL->descriptor->subsizes[0]);
+        if (VEL->descriptor->myrank == 0)
             std::cerr << "parameters incompatible with rFFTW_distributed_particles." << std::endl;
         exit(0);
     }
-    this->vel = FIELD;
+    this->vel = VEL;
     this->rhs.resize(INTEGRATION_STEPS);
     this->integration_steps = INTEGRATION_STEPS;
     this->state.reserve(2*this->nparticles / this->nprocs);
@@ -187,14 +189,18 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sam
             int tindex;
             tindex = 0;
             // can this sorting be done more efficiently?
-            std::set<int> ordered_dp;
+            std::vector<int> ordered_dp;
             {
                 TIMEZONE("rFFTW_distributed_particles::sample::ordered_dp");
-                for (auto p: dp.at(domain_index))
-                    ordered_dp.insert(p);
+            ordered_dp.reserve(dp.at(domain_index).size());
+            for (auto p: dp.at(domain_index))
+                ordered_dp.push_back(p);
+            //std::set<int> ordered_dp(dp.at(domain_index));
+            std::sort(ordered_dp.begin(), ordered_dp.end());
             }
 
             for (auto p: ordered_dp)
+            //for (auto p: dp.at(domain_index))
             {
                 (*field)(x.at(p).data, yy + tindex*3);
                 tindex++;
@@ -211,6 +217,7 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sam
             }
             tindex = 0;
             for (auto p: ordered_dp)
+            //for (auto p: dp.at(domain_index))
             {
                 y[p] = yyy + tindex*3;
                 tindex++;
@@ -233,8 +240,10 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::get
         case VELOCITY_TRACER:
             this->sample(this->vel, x, dp, yy);
             y.clear();
-            for (auto &pp: x)
-                y[pp.first] = yy[pp.first].data;
+            y.reserve(yy.size());
+            y.rehash(this->nparticles);
+            for (auto &pp: yy)
+                y[pp.first] = pp.second.data;
             break;
     }
 }
@@ -301,6 +310,10 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::red
     ro[1] = 1;
     /* particles to send, particles to receive */
     std::vector<int> ps[2], pr[2];
+    for (int tcounter = 0; tcounter < 2; tcounter++)
+    {
+        ps[tcounter].reserve(newdp[dindex[tcounter]].size());
+    }
     /* number of particles to send, number of particles to receive */
     int nps[2], npr[2];
     int rsrc, rdst;
@@ -452,44 +465,51 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::Ada
         const int nsteps)
 {
     this->get_rhs(this->state, this->domain_particles, this->rhs[0]);
-    for (auto &pp: this->state)
+#define AdamsBashforth_LOOP_PREAMBLE \
+    for (auto &pp: this->state) \
         for (unsigned int i=0; i<state_dimension(particle_type); i++)
-            switch(nsteps)
-            {
-                case 1:
-                    pp.second[i] += this->dt*this->rhs[0][pp.first][i];
-                    break;
-                case 2:
-                    pp.second[i] += this->dt*(3*this->rhs[0][pp.first][i]
-                                            -   this->rhs[1][pp.first][i])/2;
-                    break;
-                case 3:
-                    pp.second[i] += this->dt*(23*this->rhs[0][pp.first][i]
-                                            - 16*this->rhs[1][pp.first][i]
-                                            +  5*this->rhs[2][pp.first][i])/12;
-                    break;
-                case 4:
-                    pp.second[i] += this->dt*(55*this->rhs[0][pp.first][i]
-                                            - 59*this->rhs[1][pp.first][i]
-                                            + 37*this->rhs[2][pp.first][i]
-                                            -  9*this->rhs[3][pp.first][i])/24;
-                    break;
-                case 5:
-                    pp.second[i] += this->dt*(1901*this->rhs[0][pp.first][i]
-                                            - 2774*this->rhs[1][pp.first][i]
-                                            + 2616*this->rhs[2][pp.first][i]
-                                            - 1274*this->rhs[3][pp.first][i]
-                                            +  251*this->rhs[4][pp.first][i])/720;
-                    break;
-                case 6:
-                    pp.second[i] += this->dt*(4277*this->rhs[0][pp.first][i]
-                                            - 7923*this->rhs[1][pp.first][i]
-                                            + 9982*this->rhs[2][pp.first][i]
-                                            - 7298*this->rhs[3][pp.first][i]
-                                            + 2877*this->rhs[4][pp.first][i]
-                                            -  475*this->rhs[5][pp.first][i])/1440;
-                    break;
-            }
+    switch(nsteps)
+    {
+        case 1:
+            AdamsBashforth_LOOP_PREAMBLE
+            pp.second[i] += this->dt*this->rhs[0][pp.first][i];
+            break;
+        case 2:
+            AdamsBashforth_LOOP_PREAMBLE
+            pp.second[i] += this->dt*(3*this->rhs[0][pp.first][i]
+                                    -   this->rhs[1][pp.first][i])/2;
+            break;
+        case 3:
+            AdamsBashforth_LOOP_PREAMBLE
+            pp.second[i] += this->dt*(23*this->rhs[0][pp.first][i]
+                                    - 16*this->rhs[1][pp.first][i]
+                                    +  5*this->rhs[2][pp.first][i])/12;
+            break;
+        case 4:
+            AdamsBashforth_LOOP_PREAMBLE
+            pp.second[i] += this->dt*(55*this->rhs[0][pp.first][i]
+                                    - 59*this->rhs[1][pp.first][i]
+                                    + 37*this->rhs[2][pp.first][i]
+                                    -  9*this->rhs[3][pp.first][i])/24;
+            break;
+        case 5:
+            AdamsBashforth_LOOP_PREAMBLE
+            pp.second[i] += this->dt*(1901*this->rhs[0][pp.first][i]
+                                    - 2774*this->rhs[1][pp.first][i]
+                                    + 2616*this->rhs[2][pp.first][i]
+                                    - 1274*this->rhs[3][pp.first][i]
+                                    +  251*this->rhs[4][pp.first][i])/720;
+            break;
+        case 6:
+            AdamsBashforth_LOOP_PREAMBLE
+            pp.second[i] += this->dt*(4277*this->rhs[0][pp.first][i]
+                                    - 7923*this->rhs[1][pp.first][i]
+                                    + 9982*this->rhs[2][pp.first][i]
+                                    - 7298*this->rhs[3][pp.first][i]
+                                    + 2877*this->rhs[4][pp.first][i]
+                                    -  475*this->rhs[5][pp.first][i])/1440;
+            break;
+    }
     this->redistribute(this->state, this->rhs, this->domain_particles);
     this->roll_rhs();
 }
@@ -620,20 +640,31 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::wri
     TIMEZONE("rFFTW_distributed_particles::write");
     double *data = new double[this->chunk_size*3];
     double *yy = new double[this->chunk_size*3];
-    int pindex = 0;
-    for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
+    //int pindex = 0;
+   for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
     {
         std::fill_n(yy, this->chunk_size*3, 0);
-        for (unsigned int p=0; p<this->chunk_size; p++, pindex++)
-        {
-            if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() ||
-                this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end())
-            {
-                std::copy(y[pindex].data,
-                          y[pindex].data + 3,
-                          yy + p*3);
-            }
-        }
+        //for (unsigned int p=0; p<this->chunk_size; p++, pindex++)
+        //{
+        //    if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() ||
+        //        this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end())
+        //    {
+        //        std::copy(y[pindex].data,
+        //                  y[pindex].data + 3,
+        //                  yy + p*3);
+        //    }
+        //}
+        for (int s = -1; s <= 0; s++)
+             for (auto &pp: this->domain_particles[s])
+             {
+                 if (pp >= cindex*this->chunk_size &&
+                     pp < (cindex+1)*this->chunk_size)
+                {
+                    std::copy(y[pp].data,
+                              y[pp].data + 3,
+                              yy + (pp-cindex*this->chunk_size)*3);
+                }
+             }
         {
             TIMEZONE("MPI_Allreduce");
             MPI_Allreduce(
@@ -660,23 +691,34 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::wri
     TIMEZONE("rFFTW_distributed_particles::write2");
     double *temp0 = new double[this->chunk_size*state_dimension(particle_type)];
     double *temp1 = new double[this->chunk_size*state_dimension(particle_type)];
-    int pindex = 0;
+    //int pindex = 0;
     for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
     {
         //write state
         std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0);
-        pindex = cindex*this->chunk_size;
-        for (unsigned int p=0; p<this->chunk_size; p++, pindex++)
-        {
-            if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() ||
-                this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end())
-            {
-                TIMEZONE("std::copy");
-                std::copy(this->state[pindex].data,
-                          this->state[pindex].data + state_dimension(particle_type),
-                          temp0 + p*state_dimension(particle_type));
-            }
-        }
+        //pindex = cindex*this->chunk_size;
+        //for (unsigned int p=0; p<this->chunk_size; p++, pindex++)
+        //{
+        //    if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() ||
+        //        this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end())
+        //    {
+        //        TIMEZONE("std::copy");
+        //        std::copy(this->state[pindex].data,
+        //                  this->state[pindex].data + state_dimension(particle_type),
+        //                  temp0 + p*state_dimension(particle_type));
+        //    }
+        //}
+        for (int s = -1; s <= 0; s++)
+             for (auto &pp: this->domain_particles[s])
+             {
+                 if (pp >= cindex*this->chunk_size &&
+                     pp < (cindex+1)*this->chunk_size)
+                {
+                    std::copy(this->state[pp].data,
+                              this->state[pp].data + state_dimension(particle_type),
+                              temp0 + (pp-cindex*this->chunk_size)*state_dimension(particle_type));
+                }
+             }
         {
             TIMEZONE("MPI_Allreduce");
             MPI_Allreduce(
@@ -697,18 +739,29 @@ void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::wri
             for (int i=0; i<this->integration_steps; i++)
             {
                 std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0);
-                pindex = cindex*this->chunk_size;
-                for (unsigned int p=0; p<this->chunk_size; p++, pindex++)
-                {
-                    if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() ||
-                        this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end())
-                    {
-                        TIMEZONE("std::copy");
-                        std::copy(this->rhs[i][pindex].data,
-                                  this->rhs[i][pindex].data + state_dimension(particle_type),
-                                  temp0 + p*state_dimension(particle_type));
-                    }
-                }
+                //pindex = cindex*this->chunk_size;
+                //for (unsigned int p=0; p<this->chunk_size; p++, pindex++)
+                //{
+                //    if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() ||
+                //        this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end())
+                //    {
+                //        TIMEZONE("std::copy");
+                //        std::copy(this->rhs[i][pindex].data,
+                //                  this->rhs[i][pindex].data + state_dimension(particle_type),
+                //                  temp0 + p*state_dimension(particle_type));
+                //    }
+                //}
+                for (int s = -1; s <= 0; s++)
+                     for (auto &pp: this->domain_particles[s])
+                     {
+                         if (pp >= cindex*this->chunk_size &&
+                             pp < (cindex+1)*this->chunk_size)
+                        {
+                            std::copy(this->rhs[i][pp].data,
+                                      this->rhs[i][pp].data + state_dimension(particle_type),
+                                      temp0 + (pp-cindex*this->chunk_size)*state_dimension(particle_type));
+                        }
+                     }
                 {
                     TIMEZONE("MPI_Allreduce");
                     MPI_Allreduce(
diff --git a/bfps/cpp/rFFTW_interpolator.cpp b/bfps/cpp/rFFTW_interpolator.cpp
index f8dc62da0a163425075a0a0f38209ab28c45ca7f..77100d1dc25878f299e76cb4dbbcd089c845dcdc 100644
--- a/bfps/cpp/rFFTW_interpolator.cpp
+++ b/bfps/cpp/rFFTW_interpolator.cpp
@@ -34,10 +34,10 @@ template <class rnumber, int interp_neighbours>
 rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator(
         fluid_solver_base<rnumber> *fs,
         base_polynomial_values BETA_POLYS,
-        rnumber *FIELD) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS)
+        rnumber *FIELD_POINTER) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS)
 {
     this->field_size = 2*fs->cd->local_size;
-    this->field = FIELD;
+    this->field = FIELD_POINTER;
 
 
     // generate compute array
@@ -49,6 +49,25 @@ rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator(
         this->compute[((iz + this->descriptor->sizes[0]) % this->descriptor->sizes[0])] = true;
 }
 
+template <class rnumber, int interp_neighbours>
+rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator(
+        vorticity_equation<rnumber, FFTW> *fs,
+        base_polynomial_values BETA_POLYS,
+        rnumber *FIELD_POINTER) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS)
+{
+//    this->field_size = 2*fs->cd->local_size;
+//    this->field = FIELD_POINTER;
+//
+//
+//    // generate compute array
+//    this->compute = new bool[this->descriptor->sizes[0]];
+//    std::fill_n(this->compute, this->descriptor->sizes[0], false);
+//    for (int iz = this->descriptor->starts[0]-interp_neighbours-1;
+//            iz <= this->descriptor->starts[0]+this->descriptor->subsizes[0]+interp_neighbours;
+//            iz++)
+//        this->compute[((iz + this->descriptor->sizes[0]) % this->descriptor->sizes[0])] = true;
+}
+
 template <class rnumber, int interp_neighbours>
 rFFTW_interpolator<rnumber, interp_neighbours>::~rFFTW_interpolator()
 {
diff --git a/bfps/cpp/rFFTW_interpolator.hpp b/bfps/cpp/rFFTW_interpolator.hpp
index 795257d2744e432d9c346b93848cadfbd8cc85dc..a8ca1abc61a883cb27dee5e9f82aa09abdd77cca 100644
--- a/bfps/cpp/rFFTW_interpolator.hpp
+++ b/bfps/cpp/rFFTW_interpolator.hpp
@@ -27,6 +27,7 @@
 #include "field_descriptor.hpp"
 #include "fftw_tools.hpp"
 #include "fluid_solver_base.hpp"
+#include "vorticity_equation.hpp"
 #include "interpolator_base.hpp"
 
 #ifndef RFFTW_INTERPOLATOR
@@ -54,6 +55,11 @@ class rFFTW_interpolator:public interpolator_base<rnumber, interp_neighbours>
                 fluid_solver_base<rnumber> *FSOLVER,
                 base_polynomial_values BETA_POLYS,
                 rnumber *FIELD_DATA);
+
+        rFFTW_interpolator(
+                vorticity_equation<rnumber, FFTW> *FSOLVER,
+                base_polynomial_values BETA_POLYS,
+                rnumber *FIELD_DATA);
         ~rFFTW_interpolator();
 
         /* does not destroy input */
diff --git a/bfps/cpp/vorticity_equation.cpp b/bfps/cpp/vorticity_equation.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..55573d5bcc2f5ebdb3ec2ac975997455a977215c
--- /dev/null
+++ b/bfps/cpp/vorticity_equation.cpp
@@ -0,0 +1,652 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2015 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+
+//#define NDEBUG
+
+#include <cassert>
+#include <cmath>
+#include <cstring>
+#include "fftw_tools.hpp"
+#include "vorticity_equation.hpp"
+#include "scope_timer.hpp"
+
+
+
+template <class rnumber,
+          field_backend be>
+void vorticity_equation<rnumber, be>::impose_zero_modes()
+{
+    TIMEZONE("vorticity_equation::impose_zero_modes");
+    this->u->impose_zero_mode();
+    this->v[0]->impose_zero_mode();
+    this->v[1]->impose_zero_mode();
+    this->v[2]->impose_zero_mode();
+}
+
+template <class rnumber,
+          field_backend be>
+vorticity_equation<rnumber, be>::vorticity_equation(
+        const char *NAME,
+        int nx,
+        int ny,
+        int nz,
+        double DKX,
+        double DKY,
+        double DKZ,
+        unsigned FFTW_PLAN_RIGOR)
+{
+    TIMEZONE("vorticity_equation::vorticity_equation");
+    /* initialize name and basic stuff */
+    strncpy(this->name, NAME, 256);
+    this->name[255] = '\0';
+    this->iteration = 0;
+
+    /* initialize fields */
+    this->cvorticity = new field<rnumber, be, THREE>(
+            nx, ny, nz, MPI_COMM_WORLD, FFTW_PLAN_RIGOR);
+    this->rvorticity = new field<rnumber, be, THREE>(
+            nx, ny, nz, MPI_COMM_WORLD, FFTW_PLAN_RIGOR);
+    this->v[1] = new field<rnumber, be, THREE>(
+            nx, ny, nz, MPI_COMM_WORLD, FFTW_PLAN_RIGOR);
+    this->v[2] = new field<rnumber, be, THREE>(
+            nx, ny, nz, MPI_COMM_WORLD, FFTW_PLAN_RIGOR);
+    this->v[0] = this->cvorticity;
+    this->v[3] = this->cvorticity;
+
+    this->cvelocity = new field<rnumber, be, THREE>(
+            nx, ny, nz, MPI_COMM_WORLD, FFTW_PLAN_RIGOR);
+    this->rvelocity = new field<rnumber, be, THREE>(
+            nx, ny, nz, MPI_COMM_WORLD, FFTW_PLAN_RIGOR);
+    this->u = this->cvelocity;
+
+    /* initialize kspace */
+    this->kk = new kspace<be, SMOOTH>(
+            this->cvorticity->clayout, DKX, DKY, DKZ);
+
+    /* ``physical'' parameters etc, initialized here just in case */
+
+    this->nu = 0.1;
+    this->fmode = 1;
+    this->famplitude = 1.0;
+    this->fk0  = 2.0;
+    this->fk1 = 4.0;
+}
+
+template <class rnumber,
+          field_backend be>
+vorticity_equation<rnumber, be>::~vorticity_equation()
+{
+    TIMEZONE("vorticity_equation::~vorticity_equation");
+    delete this->kk;
+    delete this->cvorticity;
+    delete this->rvorticity;
+    delete this->v[1];
+    delete this->v[2];
+    delete this->cvelocity;
+    delete this->rvelocity;
+}
+
+template <class rnumber,
+          field_backend be>
+void vorticity_equation<rnumber, be>::compute_vorticity()
+{
+    TIMEZONE("vorticity_equation::compute_vorticity");
+    this->cvorticity->real_space_representation = false;
+    this->kk->CLOOP_K2(
+                [&](ptrdiff_t cindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex,
+                    double k2){
+        if (k2 <= this->kk->kM2)
+        {
+            this->cvorticity->cval(cindex,0,0) = -(this->kk->ky[yindex]*this->u->cval(cindex,2,1) - this->kk->kz[zindex]*this->u->cval(cindex,1,1));
+            this->cvorticity->cval(cindex,0,1) =  (this->kk->ky[yindex]*this->u->cval(cindex,2,0) - this->kk->kz[zindex]*this->u->cval(cindex,1,0));
+            this->cvorticity->cval(cindex,1,0) = -(this->kk->kz[zindex]*this->u->cval(cindex,0,1) - this->kk->kx[xindex]*this->u->cval(cindex,2,1));
+            this->cvorticity->cval(cindex,1,1) =  (this->kk->kz[zindex]*this->u->cval(cindex,0,0) - this->kk->kx[xindex]*this->u->cval(cindex,2,0));
+            this->cvorticity->cval(cindex,2,0) = -(this->kk->kx[xindex]*this->u->cval(cindex,1,1) - this->kk->ky[yindex]*this->u->cval(cindex,0,1));
+            this->cvorticity->cval(cindex,2,1) =  (this->kk->kx[xindex]*this->u->cval(cindex,1,0) - this->kk->ky[yindex]*this->u->cval(cindex,0,0));
+            //ptrdiff_t tindex = 3*cindex;
+            //this->cvorticity->get_cdata()[tindex+0][0] = -(this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][1] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][1]);
+            //this->cvorticity->get_cdata()[tindex+1][0] = -(this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][1] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][1]);
+            //this->cvorticity->get_cdata()[tindex+2][0] = -(this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][1] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][1]);
+            //this->cvorticity->get_cdata()[tindex+0][1] =  (this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][0] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][0]);
+            //this->cvorticity->get_cdata()[tindex+1][1] =  (this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][0] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][0]);
+            //this->cvorticity->get_cdata()[tindex+2][1] =  (this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][0] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][0]);
+        }
+        else
+            std::fill_n((rnumber*)(this->cvorticity->get_cdata()+3*cindex), 6, 0.0);
+    }
+    );
+    this->cvorticity->symmetrize();
+}
+
+template <class rnumber,
+          field_backend be>
+void vorticity_equation<rnumber, be>::compute_velocity(field<rnumber, be, THREE> *vorticity)
+{
+    TIMEZONE("vorticity_equation::compute_velocity");
+    this->u->real_space_representation = false;
+    this->kk->CLOOP_K2(
+                [&](ptrdiff_t cindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex,
+                    double k2){
+        if (k2 <= this->kk->kM2 && k2 > 0)
+        {
+            this->u->cval(cindex,0,0) = -(this->kk->ky[yindex]*vorticity->cval(cindex,2,1) - this->kk->kz[zindex]*vorticity->cval(cindex,1,1)) / k2;
+            this->u->cval(cindex,0,1) =  (this->kk->ky[yindex]*vorticity->cval(cindex,2,0) - this->kk->kz[zindex]*vorticity->cval(cindex,1,0)) / k2;
+            this->u->cval(cindex,1,0) = -(this->kk->kz[zindex]*vorticity->cval(cindex,0,1) - this->kk->kx[xindex]*vorticity->cval(cindex,2,1)) / k2;
+            this->u->cval(cindex,1,1) =  (this->kk->kz[zindex]*vorticity->cval(cindex,0,0) - this->kk->kx[xindex]*vorticity->cval(cindex,2,0)) / k2;
+            this->u->cval(cindex,2,0) = -(this->kk->kx[xindex]*vorticity->cval(cindex,1,1) - this->kk->ky[yindex]*vorticity->cval(cindex,0,1)) / k2;
+            this->u->cval(cindex,2,1) =  (this->kk->kx[xindex]*vorticity->cval(cindex,1,0) - this->kk->ky[yindex]*vorticity->cval(cindex,0,0)) / k2;
+            //ptrdiff_t tindex = 3*cindex;
+            //this->u->get_cdata()[tindex+0][0] = -(this->kk->ky[yindex]*vorticity->get_cdata()[tindex+2][1] - this->kk->kz[zindex]*vorticity->get_cdata()[tindex+1][1]) / k2;
+            //this->u->get_cdata()[tindex+0][1] =  (this->kk->ky[yindex]*vorticity->get_cdata()[tindex+2][0] - this->kk->kz[zindex]*vorticity->get_cdata()[tindex+1][0]) / k2;
+            //this->u->get_cdata()[tindex+1][0] = -(this->kk->kz[zindex]*vorticity->get_cdata()[tindex+0][1] - this->kk->kx[xindex]*vorticity->get_cdata()[tindex+2][1]) / k2;
+            //this->u->get_cdata()[tindex+1][1] =  (this->kk->kz[zindex]*vorticity->get_cdata()[tindex+0][0] - this->kk->kx[xindex]*vorticity->get_cdata()[tindex+2][0]) / k2;
+            //this->u->get_cdata()[tindex+2][0] = -(this->kk->kx[xindex]*vorticity->get_cdata()[tindex+1][1] - this->kk->ky[yindex]*vorticity->get_cdata()[tindex+0][1]) / k2;
+            //this->u->get_cdata()[tindex+2][1] =  (this->kk->kx[xindex]*vorticity->get_cdata()[tindex+1][0] - this->kk->ky[yindex]*vorticity->get_cdata()[tindex+0][0]) / k2;
+        }
+        else
+            std::fill_n((rnumber*)(this->u->get_cdata()+3*cindex), 6, 0.0);
+    }
+    );
+    this->u->symmetrize();
+}
+
+template <class rnumber,
+          field_backend be>
+void vorticity_equation<rnumber, be>::add_forcing(
+        field<rnumber, be, THREE> *dst,
+        field<rnumber, be, THREE> *vort_field,
+        rnumber factor)
+{
+    TIMEZONE("vorticity_equation::add_forcing");
+    if (strcmp(this->forcing_type, "none") == 0)
+        return;
+    if (strcmp(this->forcing_type, "Kolmogorov") == 0)
+    {
+        ptrdiff_t cindex;
+        if (this->cvorticity->clayout->myrank == this->cvorticity->clayout->rank[0][this->fmode])
+        {
+            cindex = ((this->fmode - this->cvorticity->clayout->starts[0]) * this->cvorticity->clayout->sizes[1])*this->cvorticity->clayout->sizes[2];
+            dst->cval(cindex,2, 0) -= this->famplitude*factor/2;
+            //dst->get_cdata()[cindex*3+2][0] -= this->famplitude*factor/2;
+        }
+        if (this->cvorticity->clayout->myrank == this->cvorticity->clayout->rank[0][this->cvorticity->clayout->sizes[0] - this->fmode])
+        {
+            cindex = ((this->cvorticity->clayout->sizes[0] - this->fmode - this->cvorticity->clayout->starts[0]) * this->cvorticity->clayout->sizes[1])*this->cvorticity->clayout->sizes[2];
+            dst->cval(cindex, 2, 0) -= this->famplitude*factor/2;
+            //dst->get_cdata()[cindex*3+2][0] -= this->famplitude*factor/2;
+        }
+        return;
+    }
+    if (strcmp(this->forcing_type, "linear") == 0)
+    {
+        this->kk->CLOOP(
+                    [&](ptrdiff_t cindex,
+                        ptrdiff_t xindex,
+                        ptrdiff_t yindex,
+                        ptrdiff_t zindex){
+            double knorm = sqrt(this->kk->kx[xindex]*this->kk->kx[xindex] +
+                                this->kk->ky[yindex]*this->kk->ky[yindex] +
+                                this->kk->kz[zindex]*this->kk->kz[zindex]);
+            if ((this->fk0 <= knorm) &&
+                    (this->fk1 >= knorm))
+                for (int c=0; c<3; c++)
+                    for (int i=0; i<2; i++)
+                        dst->cval(cindex,c,i) += this->famplitude*vort_field->cval(cindex,c,i)*factor;
+                        //dst->get_cdata()[cindex*3+c][i] += this->famplitude*vort_field->get_cdata()[cindex*3+c][i]*factor;
+        }
+        );
+        return;
+    }
+}
+
+template <class rnumber,
+          field_backend be>
+void vorticity_equation<rnumber, be>::omega_nonlin(
+        int src)
+{
+    DEBUG_MSG("vorticity_equation::omega_nonlin(%d)\n", src);
+    assert(src >= 0 && src < 3);
+    this->compute_velocity(this->v[src]);
+    /* get fields from Fourier space to real space */
+    this->u->ift();
+    this->rvorticity->real_space_representation = false;
+    *this->rvorticity = this->v[src]->get_cdata();
+    this->rvorticity->ift();
+    /* compute cross product $u \times \omega$, and normalize */
+    this->u->RLOOP(
+                [&](ptrdiff_t rindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex){
+        //ptrdiff_t tindex = 3*rindex;
+        rnumber tmp[3];
+        for (int cc=0; cc<3; cc++)
+            tmp[cc] = (this->u->rval(rindex,(cc+1)%3)*this->rvorticity->rval(rindex,(cc+2)%3) -
+                       this->u->rval(rindex,(cc+2)%3)*this->rvorticity->rval(rindex,(cc+1)%3));
+            //tmp[cc][0] = (this->u->get_rdata()[tindex+(cc+1)%3]*this->rvorticity->get_rdata()[tindex+(cc+2)%3] -
+            //              this->u->get_rdata()[tindex+(cc+2)%3]*this->rvorticity->get_rdata()[tindex+(cc+1)%3]);
+        for (int cc=0; cc<3; cc++)
+            this->u->rval(rindex,cc) = tmp[cc] / this->u->npoints;
+            //this->u->get_rdata()[(3*rindex)+cc] = tmp[cc][0] / this->u->npoints;
+    }
+    );
+    /* go back to Fourier space */
+    //this->clean_up_real_space(this->ru, 3);
+    this->u->dft();
+    this->kk->template dealias<rnumber, THREE>(this->u->get_cdata());
+    /* $\imath k \times Fourier(u \times \omega)$ */
+    this->kk->CLOOP(
+                [&](ptrdiff_t cindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex){
+        rnumber tmp[3][2];
+        {
+            tmp[0][0] = -(this->kk->ky[yindex]*this->u->cval(cindex,2,1) - this->kk->kz[zindex]*this->u->cval(cindex,1,1));
+            tmp[1][0] = -(this->kk->kz[zindex]*this->u->cval(cindex,0,1) - this->kk->kx[xindex]*this->u->cval(cindex,2,1));
+            tmp[2][0] = -(this->kk->kx[xindex]*this->u->cval(cindex,1,1) - this->kk->ky[yindex]*this->u->cval(cindex,0,1));
+            tmp[0][1] =  (this->kk->ky[yindex]*this->u->cval(cindex,2,0) - this->kk->kz[zindex]*this->u->cval(cindex,1,0));
+            tmp[1][1] =  (this->kk->kz[zindex]*this->u->cval(cindex,0,0) - this->kk->kx[xindex]*this->u->cval(cindex,2,0));
+            tmp[2][1] =  (this->kk->kx[xindex]*this->u->cval(cindex,1,0) - this->kk->ky[yindex]*this->u->cval(cindex,0,0));
+        }
+        //ptrdiff_t tindex = 3*cindex;
+        //{
+        //    tmp[0][0] = -(this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][1] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][1]);
+        //    tmp[1][0] = -(this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][1] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][1]);
+        //    tmp[2][0] = -(this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][1] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][1]);
+        //    tmp[0][1] =  (this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][0] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][0]);
+        //    tmp[1][1] =  (this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][0] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][0]);
+        //    tmp[2][1] =  (this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][0] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][0]);
+        //}
+        for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++)
+            this->u->cval(cindex, cc, i) = tmp[cc][i];
+            //this->u->get_cdata()[3*cindex+cc][i] = tmp[cc][i];
+    }
+    );
+    this->add_forcing(this->u, this->v[src], 1.0);
+    this->kk->template force_divfree<rnumber>(this->u->get_cdata());
+}
+
+template <class rnumber,
+          field_backend be>
+void vorticity_equation<rnumber, be>::step(double dt)
+{
+    DEBUG_MSG("vorticity_equation::step\n");
+    TIMEZONE("vorticity_equation::step");
+    double factor0, factor1;
+    *this->v[1] = 0.0;
+    this->omega_nonlin(0);
+    this->kk->CLOOP_K2(
+                [&](ptrdiff_t cindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex,
+                    double k2){
+        if (k2 <= this->kk->kM2)
+        {
+            factor0 = exp(-this->nu * k2 * dt);
+            for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++)
+                this->v[1]->cval(cindex,cc,i) = (
+                        this->v[0]->cval(cindex,cc,i) +
+                        dt*this->u->cval(cindex,cc,i))*factor0;
+                //this->v[1]->get_cdata()[3*cindex+cc][i] = (
+                //        this->v[0]->get_cdata()[3*cindex+cc][i] +
+                //        dt*this->u->get_cdata()[3*cindex+cc][i])*factor0;
+        }
+    }
+    );
+
+    this->omega_nonlin(1);
+    this->kk->CLOOP_K2(
+                [&](ptrdiff_t cindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex,
+                    double k2){
+        if (k2 <= this->kk->kM2)
+        {
+            factor0 = exp(-this->nu * k2 * dt/2);
+            factor1 = exp( this->nu * k2 * dt/2);
+            for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++)
+                this->v[2]->cval(cindex, cc, i) = (
+                        3*this->v[0]->cval(cindex,cc,i)*factor0 +
+                        ( this->v[1]->cval(cindex,cc,i) +
+                         dt*this->u->cval(cindex,cc,i))*factor1)*0.25;
+                //this->v[2]->get_cdata()[3*cindex+cc][i] = (
+                //        3*this->v[0]->get_cdata()[3*cindex+cc][i]*factor0 +
+                //        (this->v[1]->get_cdata()[3*cindex+cc][i] +
+                //         dt*this->u->get_cdata()[3*cindex+cc][i])*factor1)*0.25;
+        }
+    }
+    );
+
+    this->omega_nonlin(2);
+    this->kk->CLOOP_K2(
+                [&](ptrdiff_t cindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex,
+                    double k2){
+        if (k2 <= this->kk->kM2)
+        {
+            factor0 = exp(-this->nu * k2 * dt * 0.5);
+            for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++)
+                this->v[3]->cval(cindex,cc,i) = (
+                        this->v[0]->cval(cindex,cc,i)*factor0 +
+                        2*(this->v[2]->cval(cindex,cc,i) +
+                           dt*this->u->cval(cindex,cc,i)))*factor0/3;
+                //this->v[3]->get_cdata()[3*cindex+cc][i] = (
+                //        this->v[0]->get_cdata()[3*cindex+cc][i]*factor0 +
+                //        2*(this->v[2]->get_cdata()[3*cindex+cc][i] +
+                //           dt*this->u->get_cdata()[3*cindex+cc][i]))*factor0/3;
+        }
+    }
+    );
+
+    this->kk->template force_divfree<rnumber>(this->cvorticity->get_cdata());
+    this->cvorticity->symmetrize();
+    this->iteration++;
+}
+
+template <class rnumber,
+          field_backend be>
+void vorticity_equation<rnumber, be>::compute_pressure(field<rnumber, be, ONE> *pressure)
+{
+    TIMEZONE("vorticity_equation::compute_pressure");
+    /* assume velocity is already in real space representation */
+
+    this->v[1]->real_space_representation = true;
+    /* diagonal terms 11 22 33 */
+    this->v[1]->RLOOP (
+                [&](ptrdiff_t rindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex){
+        //ptrdiff_t tindex = 3*rindex;
+        for (int cc=0; cc<3; cc++)
+            this->v[1]->rval(rindex,cc) = this->u->rval(rindex,cc)*this->u->rval(rindex,cc);
+            //this->v[1]->get_rdata()[tindex+cc] = this->u->get_rdata()[tindex+cc]*this->u->get_rdata()[tindex+cc];
+        }
+        );
+    //this->clean_up_real_space(this->rv[1], 3);
+    this->v[1]->dft();
+    this->kk->template dealias<rnumber, THREE>(this->v[1]->get_cdata());
+    this->kk->CLOOP_K2(
+                [&](ptrdiff_t cindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex,
+                    double k2){
+        if (k2 <= this->kk->kM2 && k2 > 0)
+        {
+            ptrdiff_t tindex = 3*cindex;
+            for (int i=0; i<2; i++)
+            {
+                pressure->get_cdata()[cindex][i] = \
+                    -(this->kk->kx[xindex]*this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+0][i] +
+                      this->kk->ky[yindex]*this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+1][i] +
+                      this->kk->kz[zindex]*this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+2][i]);
+            }
+        }
+        else
+            std::fill_n((rnumber*)(pressure->get_cdata()+cindex), 2, 0.0);
+    }
+    );
+    /* off-diagonal terms 12 23 31 */
+    this->v[1]->real_space_representation = true;
+    this->v[1]->RLOOP (
+                [&](ptrdiff_t rindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex){
+        //ptrdiff_t tindex = 3*rindex;
+        for (int cc=0; cc<3; cc++)
+            this->v[1]->rval(rindex,cc) = this->u->rval(rindex,cc)*this->u->rval(rindex,(cc+1)%3);
+            //this->v[1]->get_rdata()[tindex+cc] = this->u->get_rdata()[tindex+cc]*this->u->get_rdata()[tindex+(cc+1)%3];
+    }
+    );
+    //this->clean_up_real_space(this->rv[1], 3);
+    this->v[1]->dft();
+    this->kk->template dealias<rnumber, THREE>(this->v[1]->get_cdata());
+    this->kk->CLOOP_K2(
+                [&](ptrdiff_t cindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex,
+                    double k2){
+        if (k2 <= this->kk->kM2 && k2 > 0)
+        {
+            ptrdiff_t tindex = 3*cindex;
+            for (int i=0; i<2; i++)
+            {
+                pressure->get_cdata()[cindex][i] -= \
+                    2*(this->kk->kx[xindex]*this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+0][i] +
+                       this->kk->ky[yindex]*this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+1][i] +
+                       this->kk->kz[zindex]*this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+2][i]);
+                pressure->get_cdata()[cindex][i] /= pressure->npoints*k2;
+            }
+        }
+    }
+    );
+}
+
+template <class rnumber,
+          field_backend be>
+void vorticity_equation<rnumber, be>::compute_Lagrangian_acceleration(
+        field<rnumber, be, THREE> *acceleration)
+{
+    field<rnumber, be, ONE> *pressure = new field<rnumber, be, ONE>(
+            this->cvelocity->rlayout->sizes[2],
+            this->cvelocity->rlayout->sizes[1],
+            this->cvelocity->rlayout->sizes[0],
+            this->cvelocity->rlayout->comm,
+            this->cvelocity->fftw_plan_rigor);
+    this->compute_velocity(this->cvorticity);
+    this->cvelocity->ift();
+    this->compute_pressure(pressure);
+    this->compute_velocity(this->cvorticity);
+    acceleration->real_space_representation = false;
+    this->kk->CLOOP_K2(
+                [&](ptrdiff_t cindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex,
+                    double k2){
+        if (k2 <= this->kk->kM2)
+        {
+            ptrdiff_t tindex = 3*cindex;
+            for (int cc=0; cc<3; cc++)
+                for (int i=0; i<2; i++)
+                    acceleration->get_cdata()[tindex+cc][i] = \
+                        - this->nu*k2*this->cvelocity->get_cdata()[tindex+cc][i];
+            if (strcmp(this->forcing_type, "linear") == 0)
+            {
+                double knorm = sqrt(k2);
+                if ((this->fk0 <= knorm) &&
+                        (this->fk1 >= knorm))
+                    for (int c=0; c<3; c++)
+                        for (int i=0; i<2; i++)
+                            acceleration->get_cdata()[tindex+c][i] += \
+                                this->famplitude*this->cvelocity->get_cdata()[tindex+c][i];
+            }
+            acceleration->get_cdata()[tindex+0][0] += this->kk->kx[xindex]*pressure->get_cdata()[cindex][1];
+            acceleration->get_cdata()[tindex+1][0] += this->kk->ky[yindex]*pressure->get_cdata()[cindex][1];
+            acceleration->get_cdata()[tindex+2][0] += this->kk->kz[zindex]*pressure->get_cdata()[cindex][1];
+            acceleration->get_cdata()[tindex+0][1] -= this->kk->kx[xindex]*pressure->get_cdata()[cindex][0];
+            acceleration->get_cdata()[tindex+1][1] -= this->kk->ky[yindex]*pressure->get_cdata()[cindex][0];
+            acceleration->get_cdata()[tindex+2][1] -= this->kk->kz[zindex]*pressure->get_cdata()[cindex][0];
+        }
+        });
+    delete pressure;
+}
+
+template <class rnumber,
+          field_backend be>
+void vorticity_equation<rnumber, be>::compute_Eulerian_acceleration(
+        field<rnumber, be, THREE> *acceleration)
+{
+    this->compute_velocity(this->cvorticity);
+    acceleration->real_space_representation = false;
+    /* put in linear terms */
+    this->kk->CLOOP_K2(
+                [&](ptrdiff_t cindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex,
+                    double k2){
+        if (k2 <= this->kk->kM2)
+        {
+            ptrdiff_t tindex = 3*cindex;
+            for (int cc=0; cc<3; cc++)
+                for (int i=0; i<2; i++)
+                    acceleration->get_cdata()[tindex+cc][i] = \
+                        - this->nu*k2*this->cvelocity->get_cdata()[tindex+cc][i];
+            if (strcmp(this->forcing_type, "linear") == 0)
+            {
+                double knorm = sqrt(k2);
+                if ((this->fk0 <= knorm) &&
+                        (this->fk1 >= knorm))
+                {
+                    for (int c=0; c<3; c++)
+                        for (int i=0; i<2; i++)
+                            acceleration->get_cdata()[tindex+c][i] += \
+                                this->famplitude*this->cvelocity->get_cdata()[tindex+c][i];
+                }
+            }
+        }
+    }
+    );
+    this->cvelocity->ift();
+    /* compute uu */
+    /* 11 22 33 */
+    this->v[1]->real_space_representation = true;
+    this->cvelocity->RLOOP (
+                [&](ptrdiff_t rindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex){
+        //ptrdiff_t tindex = 3*rindex;
+        for (int cc=0; cc<3; cc++)
+            this->v[1]->rval(rindex,cc) = \
+                this->cvelocity->rval(rindex,cc)*this->cvelocity->rval(rindex,cc) / this->cvelocity->npoints;
+            //this->v[1]->get_rdata()[tindex+cc] = this->cvelocity->get_rdata()[tindex+cc]*this->cvelocity->get_rdata()[tindex+cc] / this->cvelocity->npoints;
+    }
+    );
+    this->v[1]->dft();
+    this->kk->template dealias<rnumber, THREE>(this->v[1]->get_cdata());
+    this->kk->CLOOP_K2(
+                [&](ptrdiff_t cindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex,
+                    double k2){
+        if (k2 <= this->kk->kM2)
+        {
+            ptrdiff_t tindex = 3*cindex;
+            acceleration->get_cdata()[tindex+0][0] +=
+                    this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+0][1];
+            acceleration->get_cdata()[tindex+0][1] +=
+                   -this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+0][0];
+            acceleration->get_cdata()[tindex+1][0] +=
+                    this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+1][1];
+            acceleration->get_cdata()[tindex+1][1] +=
+                   -this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+1][0];
+            acceleration->get_cdata()[tindex+2][0] +=
+                    this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+2][1];
+            acceleration->get_cdata()[tindex+2][1] +=
+                   -this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+2][0];
+        }
+    }
+    );
+    /* 12 23 31 */
+    this->v[1]->real_space_representation = true;
+    this->cvelocity->RLOOP (
+                [&](ptrdiff_t rindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex){
+        //ptrdiff_t tindex = 3*rindex;
+        for (int cc=0; cc<3; cc++)
+            this->v[1]->rval(rindex,cc) = \
+                this->cvelocity->rval(rindex,cc)*this->cvelocity->rval(rindex,(cc+1)%3) / this->cvelocity->npoints;
+            //this->v[1]->get_rdata()[tindex+cc] = this->cvelocity->get_rdata()[tindex+cc]*this->cvelocity->get_rdata()[tindex+(cc+1)%3] / this->cvelocity->npoints;
+    }
+    );
+    this->v[1]->dft();
+    this->kk->template dealias<rnumber, THREE>(this->v[1]->get_cdata());
+    this->kk->CLOOP_K2(
+                [&](ptrdiff_t cindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex,
+                    double k2){
+        if (k2 <= this->kk->kM2)
+        {
+            ptrdiff_t tindex = 3*cindex;
+            acceleration->get_cdata()[tindex+0][0] +=
+                    (this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+0][1] +
+                     this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+2][1]);
+            acceleration->get_cdata()[tindex+0][1] +=
+                  - (this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+0][0] +
+                     this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+2][0]);
+            acceleration->get_cdata()[tindex+1][0] +=
+                    (this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+1][1] +
+                     this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+0][1]);
+            acceleration->get_cdata()[tindex+1][1] +=
+                  - (this->kk->kz[zindex]*this->v[1]->get_cdata()[tindex+1][0] +
+                     this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+0][0]);
+            acceleration->get_cdata()[tindex+2][0] +=
+                    (this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+2][1] +
+                     this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+1][1]);
+            acceleration->get_cdata()[tindex+2][1] +=
+                  - (this->kk->kx[xindex]*this->v[1]->get_cdata()[tindex+2][0] +
+                     this->kk->ky[yindex]*this->v[1]->get_cdata()[tindex+1][0]);
+        }
+    }
+    );
+    if (this->kk->layout->myrank == this->kk->layout->rank[0][0])
+        std::fill_n((rnumber*)(acceleration->get_cdata()), 6, 0.0);
+    this->kk->template force_divfree<rnumber>(acceleration->get_cdata());
+}
+
+
+/*****************************************************************************/
+
+
+
+
+/*****************************************************************************/
+/* finally, force generation of code for single precision                    */
+template class vorticity_equation<float, FFTW>;
+template class vorticity_equation<double, FFTW>;
+/*****************************************************************************/
+
diff --git a/bfps/cpp/vorticity_equation.hpp b/bfps/cpp/vorticity_equation.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..0831362d96fc842082b6a0fb26b1486fe0bdb4e0
--- /dev/null
+++ b/bfps/cpp/vorticity_equation.hpp
@@ -0,0 +1,107 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2015 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <iostream>
+
+#include "field.hpp"
+#include "field_descriptor.hpp"
+
+#ifndef VORTICITY_EQUATION
+
+#define VORTICITY_EQUATION
+
+extern int myrank, nprocs;
+
+
+/* container for field descriptor, fields themselves, parameters, etc
+ * This particular class is only meant as a stepping stone to a proper solver
+ * that only uses the field class (and related layout and kspace classes), and
+ * HDF5 for I/O.
+ * */
+
+template <typename rnumber,
+          field_backend be>
+class vorticity_equation
+{
+    public:
+        /* name */
+        char name[256];
+
+        /* iteration */
+        int iteration;
+
+        /* fields */
+        field<rnumber, be, THREE> *cvorticity, *cvelocity;
+        field<rnumber, be, THREE> *rvorticity, *rvelocity;
+        kspace<be, SMOOTH> *kk;
+
+
+        /* short names for velocity, and 4 vorticity fields */
+        field<rnumber, be, THREE> *u, *v[4];
+
+        /* physical parameters */
+        double nu;
+        int fmode;         // for Kolmogorov flow
+        double famplitude; // both for Kflow and band forcing
+        double fk0, fk1;   // for band forcing
+        char forcing_type[128];
+
+        /* constructor, destructor */
+        vorticity_equation(
+                const char *NAME,
+                int nx,
+                int ny,
+                int nz,
+                double DKX = 1.0,
+                double DKY = 1.0,
+                double DKZ = 1.0,
+                unsigned FFTW_PLAN_RIGOR = FFTW_MEASURE);
+        ~vorticity_equation(void);
+
+        /* solver essential methods */
+        void omega_nonlin(int src);
+        void step(double dt);
+        void impose_zero_modes(void);
+        void add_forcing(field<rnumber, be, THREE> *dst,
+                         field<rnumber, be, THREE> *src_vorticity,
+                         rnumber factor);
+        void compute_vorticity(void);
+        void compute_velocity(field<rnumber, be, THREE> *vorticity);
+
+        /* binary I/O stuff */
+        inline void fill_up_filename(const char *base_name, char *full_name)
+        {
+            sprintf(full_name, "%s_%s_i%.5x.h5", this->name, base_name, this->iteration);
+        }
+
+        /* statistics and general postprocessing */
+        void compute_pressure(field<rnumber, be, ONE> *pressure);
+        void compute_Eulerian_acceleration(field<rnumber, be, THREE> *acceleration);
+        void compute_Lagrangian_acceleration(field<rnumber, be, THREE> *acceleration);
+};
+
+#endif//VORTICITY_EQUATION
+
diff --git a/setup.py b/setup.py
index 2a745190571df95c5e2d0b89138ebfe3b32c0469..8d4ae0f9dfb554229c6b7293e596646501a58bc7 100644
--- a/setup.py
+++ b/setup.py
@@ -88,7 +88,10 @@ print('This is bfps version ' + VERSION)
 
 
 ### lists of files and MANIFEST.in
-src_file_list = ['field',
+src_file_list = ['vorticity_equation',
+                 'field',
+                 'kspace',
+                 'field_layout',
                  'field_descriptor',
                  'rFFTW_distributed_particles',
                  'distributed_particles',
diff --git a/tests/test_field_class.py b/tests/test_field_class.py
index fc52f419a5ab2dd7a5231676c41b9d586d497080..110d9be685ef42d4ed231a3a3c723ac34e3d916d 100644
--- a/tests/test_field_class.py
+++ b/tests/test_field_class.py
@@ -32,32 +32,37 @@ class TestField(_fluid_particle_base):
         self.fluid_includes += '#include "fftw_tools.hpp"\n'
         self.fluid_includes += '#include "field.hpp"\n'
         self.fluid_variables += ('field<' + self.C_dtype + ', FFTW, ONE> *f;\n' +
+                                 'field<' + self.C_dtype + ', FFTW, THREE> *v;\n' +
                                  'kspace<FFTW, SMOOTH> *kk;\n')
         self.fluid_start += """
                 //begincpp
                 f = new field<{0}, FFTW, ONE>(
                         nx, ny, nz, MPI_COMM_WORLD);
+                v = new field<{0}, FFTW, THREE>(
+                        nx, ny, nz, MPI_COMM_WORLD);
                 kk = new kspace<FFTW, SMOOTH>(
                         f->clayout, 1., 1., 1.);
                 // read rdata
-                f->io("field.h5", "rdata", 0, true);
+                f->real_space_representation = true;
+                f->io("field.h5", "scal", 0, true);
                 // go to fourier space, write into cdata_tmp
                 f->dft();
-                f->io("field.h5", "cdata_tmp", 0, false);
+                f->io("field.h5", "scal_tmp", 0, false);
                 f->ift();
-                f->io("field.h5", "rdata", 0, false);
-                f->io("field.h5", "cdata", 0, true);
+                f->io("field.h5", "scal", 0, false);
+                f->real_space_representation = false;
+                f->io("field.h5", "scal", 0, true);
                 hid_t gg;
                 if (f->myrank == 0)
                     gg = H5Fopen("field.h5", H5F_ACC_RDWR, H5P_DEFAULT);
                 kk->cospectrum<float, ONE>(
-                        f->get_rdata(),
-                        f->get_rdata(),
+                        f->get_cdata(),
+                        f->get_cdata(),
                         gg,
                         "scal",
                         0);
                 f->ift();
-                f->io("field.h5", "rdata_tmp", 0, false);
+                f->io("field.h5", "scal_tmp", 0, false);
                 std::vector<double> me;
                 me.resize(1);
                 me[0] = 30;
@@ -66,11 +71,15 @@ class TestField(_fluid_particle_base):
                         0, me);
                 if (f->myrank == 0)
                     H5Fclose(gg);
+                v->real_space_representation = false;
+                v->io("field.h5", "vec", 0, true);
+                v->io("field.h5", "vec_tmp", 0, false);
                 //endcpp
                 """.format(self.C_dtype)
         self.fluid_end += """
                 //begincpp
                 delete f;
+                delete v;
                 //endcpp
                 """
         return None
@@ -92,7 +101,7 @@ class TestField(_fluid_particle_base):
         return None
 
 def main():
-    n = 128
+    n = 32
     kdata = pyfftw.n_byte_align_empty(
             (n, n, n//2 + 1),
             pyfftw.simd_alignment,
@@ -116,10 +125,10 @@ def main():
     tf.parameters['ny'] = n
     tf.parameters['nz'] = n
     f = h5py.File('field.h5', 'w')
-    f['cdata'] = cdata.reshape((1,) + cdata.shape)
-    f['cdata_tmp'] = np.zeros(shape=(1,) + cdata.shape).astype(cdata.dtype)
-    f['rdata'] = rdata.reshape((1,) + rdata.shape)
-    f['rdata_tmp'] = np.zeros(shape=(1,) + rdata.shape).astype(rdata.dtype)
+    f['scal/complex/0'] = cdata
+    f['scal/real/0'] = rdata
+    f['vec/complex/0'] = np.array([cdata, cdata, cdata]).reshape(cdata.shape + (3,))
+    f['vec/real/0'] = np.array([rdata, rdata, rdata]).reshape(rdata.shape + (3,))
     f['moments/scal'] = np.zeros(shape = (1, 10)).astype(np.float)
     f['histograms/scal'] = np.zeros(shape = (1, 64)).astype(np.float)
     kspace = tf.get_kspace()
@@ -133,35 +142,60 @@ def main():
              '--ncpu', '2'])
 
     f = h5py.File('field.h5', 'r')
-    err0 = np.max(np.abs(f['rdata_tmp'][0] - rdata)) / np.mean(np.abs(rdata))
-    err1 = np.max(np.abs(f['rdata'][0]/(n**3) - rdata)) / np.mean(np.abs(rdata))
-    err2 = np.max(np.abs(f['cdata_tmp'][0]/(n**3) - cdata)) / np.mean(np.abs(cdata))
-    print(err0, err1, err2)
-    assert(err0 < 1e-5)
-    assert(err1 < 1e-5)
-    assert(err2 < 1e-4)
-    ### compare
-    #fig = plt.figure(figsize=(12, 6))
-    #a = fig.add_subplot(121)
-    #a.set_axis_off()
-    #a.imshow(rdata[0, :, :], interpolation = 'none')
-    #a = fig.add_subplot(122)
-    #a.set_axis_off()
-    #a.imshow(f['rdata_tmp'][0, 0, :, :], interpolation = 'none')
+    #err0 = np.max(np.abs(f['scal_tmp/real/0'].value - rdata)) / np.mean(np.abs(rdata))
+    #err1 = np.max(np.abs(f['scal/real/0'].value/(n**3) - rdata)) / np.mean(np.abs(rdata))
+    #err2 = np.max(np.abs(f['scal_tmp/complex/0'].value/(n**3) - cdata)) / np.mean(np.abs(cdata))
+    #print(err0, err1, err2)
+    #assert(err0 < 1e-5)
+    #assert(err1 < 1e-5)
+    #assert(err2 < 1e-4)
+    ## compare
+    fig = plt.figure(figsize=(18, 6))
+    a = fig.add_subplot(131)
+    a.set_axis_off()
+    v0 = f['vec/complex/0'][:, :, 0, 0]
+    v1 = f['vec_tmp/complex/0'][:, :, 0, 0]
+    a.imshow(np.log(np.abs(v0 - v1)),
+             interpolation = 'none')
+    a = fig.add_subplot(132)
+    a.set_axis_off()
+    a.imshow(np.log(np.abs(v0)),
+             interpolation = 'none')
+    a = fig.add_subplot(133)
+    a.set_axis_off()
+    a.imshow(np.log(np.abs(v1)),
+             interpolation = 'none')
+    fig.tight_layout()
+    fig.savefig('tst_fields.pdf')
+    fig = plt.figure(figsize=(18, 6))
+    a = fig.add_subplot(131)
+    a.set_axis_off()
+    v0 = f['scal/complex/0'][:, :, 0]
+    v1 = f['scal_tmp/complex/0'][:, :, 0]
+    a.imshow(np.log(np.abs(v0 - v1)),
+             interpolation = 'none')
+    a = fig.add_subplot(132)
+    a.set_axis_off()
+    a.imshow(np.log(np.abs(v0)),
+             interpolation = 'none')
+    a = fig.add_subplot(133)
+    a.set_axis_off()
+    a.imshow(np.log(np.abs(v1)),
+             interpolation = 'none')
+    fig.tight_layout()
+    fig.savefig('tst_sfields.pdf')
+    # look at moments and histogram
+    #print('moments are ', f['moments/scal'][0])
+    #fig = plt.figure(figsize=(6,6))
+    #a = fig.add_subplot(211)
+    #a.plot(f['histograms/scal'][0])
+    #a.set_yscale('log')
+    #a = fig.add_subplot(212)
+    #a.plot(f['spectra/scal'][0])
+    #a.set_xscale('log')
+    #a.set_yscale('log')
     #fig.tight_layout()
     #fig.savefig('tst.pdf')
-    # look at moments and histogram
-    print('moments are ', f['moments/scal'][0])
-    fig = plt.figure(figsize=(6,6))
-    a = fig.add_subplot(211)
-    a.plot(f['histograms/scal'][0])
-    a.set_yscale('log')
-    a = fig.add_subplot(212)
-    a.plot(f['spectra/scal'][0])
-    a.set_xscale('log')
-    a.set_yscale('log')
-    fig.tight_layout()
-    fig.savefig('tst.pdf')
     return None
 
 if __name__ == '__main__':
diff --git a/tests/test_vorticity_equation.py b/tests/test_vorticity_equation.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec50531df29e82c1ff767ab3d292bef0aac66c4c
--- /dev/null
+++ b/tests/test_vorticity_equation.py
@@ -0,0 +1,101 @@
+#######################################################################
+#                                                                     #
+#  Copyright 2015 Max Planck Institute                                #
+#                 for Dynamics and Self-Organization                  #
+#                                                                     #
+#  This file is part of bfps.                                         #
+#                                                                     #
+#  bfps is free software: you can redistribute it and/or modify       #
+#  it under the terms of the GNU General Public License as published  #
+#  by the Free Software Foundation, either version 3 of the License,  #
+#  or (at your option) any later version.                             #
+#                                                                     #
+#  bfps is distributed in the hope that it will be useful,            #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
+#  GNU General Public License for more details.                       #
+#                                                                     #
+#  You should have received a copy of the GNU General Public License  #
+#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
+#                                                                     #
+# Contact: Cristian.Lalescu@ds.mpg.de                                 #
+#                                                                     #
+#######################################################################
+
+
+
+import sys
+import os
+import numpy as np
+import h5py
+import argparse
+
+import bfps
+import bfps.tools
+
+from bfps_addons import NSReader
+import matplotlib.pyplot as plt
+
+def main():
+    c = bfps.NavierStokes()
+    c.launch(
+            ['-n', '72',
+             '--simname', 'fluid_solver',
+             '--ncpu', '4',
+             '--niter_todo', '256',
+             '--niter_out', '256',
+             '--niter_stat', '1',
+             '--wd', './'] +
+            sys.argv[1:])
+    data = c.read_cfield(iteration = 0)
+    f = h5py.File('vorticity_equation_cvorticity_i00000.h5', 'w')
+    f['vorticity/complex/0'] = data
+    f.close()
+    c = bfps.NSVorticityEquation()
+    c.launch(
+            ['-n', '72',
+             '--simname', 'vorticity_equation',
+             '--ncpu', '4',
+             '--niter_todo', '256',
+             '--niter_out', '256',
+             '--niter_stat', '1',
+             '--wd', './'] +
+            sys.argv[1:])
+    c0 = NSReader(simname = 'fluid_solver')
+    c1 = NSReader(simname = 'vorticity_equation')
+    df0 = c0.get_data_file()
+    df1 = c1.get_data_file()
+    f = plt.figure(figsize=(6,10))
+    a = f.add_subplot(211)
+    a.plot(df0['statistics/moments/vorticity'][:, 2, 3],
+           color = 'blue',
+           marker = '.')
+    a.plot(df1['statistics/moments/vorticity'][:, 2, 3],
+           color = 'red',
+           marker = '.')
+    a = f.add_subplot(212)
+    a.plot(df0['statistics/moments/velocity'][:, 2, 3],
+           color = 'blue',
+           marker = '.')
+    a.plot(df1['statistics/moments/velocity'][:, 2, 3],
+           color = 'red',
+           marker = '.')
+    f.tight_layout()
+    f.savefig('figs/moments.pdf')
+    f = plt.figure(figsize = (6, 10))
+    a = f.add_subplot(111)
+    a.plot(c0.statistics['enstrophy(t, k)'][0])
+    a.plot(c1.statistics['enstrophy(t, k)'][0])
+    a.set_yscale('log')
+    f.tight_layout()
+    f.savefig('figs/spectra.pdf')
+    f = h5py.File('vorticity_equation_cvorticity_i00000.h5', 'r')
+    #print(c0.statistics['enstrophy(t, k)'][0])
+    #print(c1.statistics['enstrophy(t, k)'][0])
+    c0.do_plots()
+    c1.do_plots()
+    return None
+
+if __name__ == '__main__':
+    main()
+