diff --git a/TurTLE/DNS.py b/TurTLE/DNS.py
index b04c8b360307632e759eddbe81a03d555473ba96..606014b09a87b490f1dda781fc6bc6e501d12eb8 100644
--- a/TurTLE/DNS.py
+++ b/TurTLE/DNS.py
@@ -93,7 +93,7 @@ class DNS(_code):
         return None
     def generate_default_parameters(self):
         # these parameters are relevant for all DNS classes
-        self.parameters['fftw_plan_rigor'] = 'FFTW_ESTIMATE'
+        self.parameters['fftw_plan_rigor'] = str('FFTW_ESTIMATE')
         self.parameter_description['fftw_plan_rigor'] = 'FFTW plan rigor to use. One of `FFTW_ESTIMATE`, `FFTW_MEASURE`, `FFTW_PATIENT`. Please see FFTW documentation.'
         self.parameters['dealias_type'] = int(1)
         self.parameter_description['dealias_type'] = 'Dealiasing mehtod to use, integer. Options are: two-thirds (0) or smooth (1).'
@@ -125,7 +125,7 @@ class DNS(_code):
         self.parameter_description['fk0'] = 'Forcing parameter: if forcing acts on wavenumber band, this is the smallest wavenumber where it acts (in code units).'
         self.parameters['fk1'] = float(4.0)
         self.parameter_description['fk1'] = 'Forcing parameter: if forcing acts on wavenumber band, this is the largest wavenumber where it acts (in code units).'
-        self.parameters['forcing_type'] = 'fixed_energy_injection_rate'
+        self.parameters['forcing_type'] = str('fixed_energy_injection_rate')
         self.parameter_description['forcing_type'] = 'Forcing parameter: what type of force to use.'
         self.parameters['histogram_bins'] = int(256)
         self.parameter_description['histogram_bins'] = 'During statistics, histograms of real-valued fields are computed using a number of `HISTOGRAM_BINS` bins.'
@@ -324,22 +324,39 @@ class DNS(_code):
                 '--src-wd',
                 type = str,
                 dest = 'src_work_dir',
+                help = ('Empty by default.'
+                    + ' Location of simulation from which to source initial conditions.'
+                    + ' See also `--src-simname`.'),
                 default = '')
         parser.add_argument(
                 '--src-simname',
                 type = str,
                 dest = 'src_simname',
+                help = ('Empty by default.'
+                    + ' Name of simulation from which to source initial conditions.'
+                    + ' If the parameter file of the source simulation is present, '
+                    + 'the new DNS will use the same physical parameters '
+                    + '(forcing parameters and viscosity) and the same dealiasing option.'
+                    + ' Any other parameters (e.g. "nx") have to be specified separately.'
+                    + ' See also `--overwrite-src-parameters`.'),
                 default = '')
         parser.add_argument(
                 '--src-iteration',
                 type = int,
                 dest = 'src_iteration',
+                help = ('0 by default.'
+                    + ' Iteration of source simulation to be used as initial condition.'
+                    + ' See also `--src-simname`.'),
                 default = 0)
         parser.add_argument(
                 '--overwrite-src-parameters',
                 action = 'store_true',
                 dest = 'overwrite_src_parameters',
-                help = 'False by default. If using a source simulation, do we keep its physical parameters? Note: if this option is switched on, default values are used for parameters that are not set explicitly by the command --- NOT the values of the source simulation.')
+                help = ('False by default.'
+                      + ' If using a source simulation, do we keep its physical parameters?'
+                      + ' Note: if this option is switched on, default values are used for '
+                      + 'parameters that are not set explicitly by the command --- '
+                      + 'NOT the values of the source simulation.'))
         parser.add_argument(
                '--kMeta',
                type = float,
@@ -472,7 +489,23 @@ class DNS(_code):
             self,
             args = [],
             extra_parameters = None):
-        """Set up reasonable parameters.
+        """Assign DNS parameters.
+
+        There are three ways to specify parameters:
+
+        * Directly via the command line.
+        * If the simulation takes its initial condition from another simulation
+        (via the :code:`--src-dir` and :code:`--src-simname` parameters) and
+        some physical parameter is not provided on the command line, then this
+        method will attempt to read that parameter from the parameter file of
+        the source simulation.
+        * If a parameter is not specified via the command line, and a source
+        simulation is not provided, or the source parameter file is missing, or
+        the :code:`--overwrite-src-parameters` option is turned on, this method
+        will generate a "reasonable" value for that parameter.
+        "Reasonable" refers to either some fixed values for the forcing
+        parameters, or an empirical extrapolation of the viscosity to satisfy a
+        given value of the :code:`--kMeta` command line parameter.
 
         With the default Lundgren forcing applied in the band [2, 4],
         we can estimate the dissipation, therefore we can estimate
@@ -520,10 +553,32 @@ class DNS(_code):
             for k in self.extra_parameters[self.dns_type].keys():
                 self.parameters[k] = self.extra_parameters[self.dns_type][k]
 
+        # for parameters there are three options, in order of preference:
+        # * value given from command line
+        # * value used by source simulation
+        # * default value
+
         if ((self.parameters['niter_todo'] % self.parameters['niter_out']) != 0):
             self.parameters['niter_out'] = self.parameters['niter_todo']
-        if len(opt.src_work_dir) == 0:
-            opt.src_work_dir = os.path.realpath(opt.work_dir)
+
+        if len(opt.src_simname) > 0:
+            if len(opt.src_work_dir) == 0:
+                opt.src_work_dir = os.path.realpath(opt.work_dir)
+            src_file_name = os.path.join(
+                    os.path.realpath(opt.src_work_dir),
+                    opt.src_simname + '.h5')
+            if os.path.exists(src_file_name):
+                # open source file for use later
+                src_file = h5py.File(src_file_name, 'r')
+            else:
+                print('TurTLE WARNING: unable to copy parameters of source simulation.'
+                    + 'File {0} is missing.\n'.format(src_file_name)
+                    + 'DNS will use default values for physical parameters.\n')
+                src_file = None
+        else:
+            src_file = None
+        if opt.overwrite_src_parameters:
+            src_file = None
         if type(opt.dkx) == type(None):
             opt.dkx = 2. / opt.Lx
         if type(opt.dky) == type(None):
@@ -536,16 +591,60 @@ class DNS(_code):
             opt.ny = opt.n
         if type(opt.nz) == type(None):
             opt.nz = opt.n
+        if type(opt.friction_coefficient) == type(None):
+            if type(src_file) != type(None):
+                self.parameters['friction_coefficient'] = type(self.parameters['friction_coefficient'])(
+                        src_file['parameters/friction_coefficient'][()])
+            else:
+                opt.friction_coefficient = self.parameters['friction_coefficient']
+        if type(opt.fmode) == type(None):
+            if type(src_file) != type(None):
+                self.parameters['fmode'] = type(self.parameters['fmode'])(
+                        src_file['parameters/fmode'][()])
+            else:
+                opt.fmode = self.parameters['fmode']
+        if type(opt.famplitude) == type(None):
+            if type(src_file) != type(None):
+                self.parameters['famplitude'] = type(self.parameters['famplitude'])(
+                        src_file['parameters/famplitude'][()])
+            else:
+                opt.famplitude = self.parameters['famplitude']
+        if type(opt.energy) == type(None):
+            if type(src_file) != type(None):
+                self.parameters['energy'] = type(self.parameters['energy'])(
+                        src_file['parameters/energy'][()])
+            else:
+                opt.energy = self.parameters['energy']
         if type(opt.fk0) == type(None):
-            opt.fk0 = self.parameters['fk0']
+            if type(src_file) != type(None):
+                self.parameters['fk0'] = type(self.parameters['fk0'])(
+                        src_file['parameters/fk0'][()])
+            else:
+                opt.fk0 = self.parameters['fk0']
         if type(opt.fk1) == type(None):
-            opt.fk1 = self.parameters['fk1']
+            if type(src_file) != type(None):
+                self.parameters['fk1'] = type(self.parameters['fk1'])(
+                        src_file['parameters/fk1'][()])
+            else:
+                opt.fk1 = self.parameters['fk1']
         if type(opt.forcing_type) == type(None):
-            opt.forcing_type = self.parameters['forcing_type']
+            if type(src_file) != type(None):
+                self.parameters['forcing_type'] = str(
+                        src_file['parameters/forcing_type'][()].decode())
+            else:
+                opt.forcing_type = self.parameters['forcing_type']
         if type(opt.injection_rate) == type(None):
-            opt.injection_rate = self.parameters['injection_rate']
+            if type(src_file) != type(None):
+                self.parameters['injection_rate'] = type(self.parameters['injection_rate'])(
+                        src_file['parameters/injection_rate'][()])
+            else:
+                opt.injection_rate = self.parameters['injection_rate']
         if type(opt.dealias_type) == type(None):
-            opt.dealias_type = self.parameters['dealias_type']
+            if type(src_file) != type(None):
+                self.parameters['dealias_type'] = type(self.parameters['dealias_type'])(
+                        src_file['parameters/dealias_type'][()])
+            else:
+                opt.dealias_type = self.parameters['dealias_type']
         if (opt.nx > opt.n or
             opt.ny > opt.n or
             opt.nz > opt.n):
@@ -561,34 +660,33 @@ class DNS(_code):
         kM = opt.n * 0.5
         if opt.dealias_type == 1:
             kM *= 0.8
-        # tweak forcing/viscosity based on forcing type
-        if opt.forcing_type == 'linear':
-            # custom famplitude for 288 and 576
-            if opt.n == 288:
-                self.parameters['famplitude'] = 0.45
-            elif opt.n == 576:
-                self.parameters['famplitude'] = 0.47
-        elif opt.forcing_type == 'fixed_energy_injection_rate':
-            # use the fact that mean dissipation rate is equal to injection rate
-            if type(opt.nu) == type(None):
-                opt.nu = (
-                    opt.injection_rate *
-                    (opt.kMeta / kM)**4)**(1./3)
-                if opt.injection_rate == 0:
-                    opt.nu = (
-                    (opt.kMeta / kM)**(4./3) *
-                    (np.pi / 1.5)**(1./3) *
-                    (2*self.parameters['energy'] / 3)**0.5)
-        elif opt.forcing_type == 'fixed_energy':
-            kf = 1. / (1./opt.fk0 +
-                       1./opt.fk1)
-            if type(opt.nu) == type(None):
-                opt.nu = (
-                    (opt.kMeta / kM)**(4./3) *
-                    (np.pi / kf)**(1./3) *
-                    (2*self.parameters['energy'] / 3)**0.5)
+
         if type(opt.nu) == type(None):
-            opt.nu = (opt.kMeta * 2 / opt.n)**(4./3)
+            # check if we are using viscosity of source simulation
+            if type(src_file) != type(None):
+                opt.nu = type(self.parameters['nu'])(
+                        src_file['parameters/nu'][()])
+            # empirical viscosity based on forcing type
+            else:
+                if opt.forcing_type == 'fixed_energy_injection_rate':
+                    # use the fact that mean dissipation rate is equal to injection rate
+                        opt.nu = (
+                            opt.injection_rate *
+                            (opt.kMeta / kM)**4)**(1./3)
+                        if opt.injection_rate == 0:
+                            opt.nu = (
+                            (opt.kMeta / kM)**(4./3) *
+                            (np.pi / 1.5)**(1./3) *
+                            (2*self.parameters['energy'] / 3)**0.5)
+                elif opt.forcing_type == 'fixed_energy':
+                    kf = 1. / (1./opt.fk0 +
+                               1./opt.fk1)
+                    opt.nu = (
+                        (opt.kMeta / kM)**(4./3) *
+                        (np.pi / kf)**(1./3) *
+                        (2*self.parameters['energy'] / 3)**0.5)
+                else:
+                    opt.nu = (opt.kMeta * 2 / opt.n)**(4./3)
         self.parameters['nu'] = opt.nu
         if type(opt.checkpoints_per_file) == type(None):
             # hardcoded FFTW complex representation size
@@ -898,35 +996,6 @@ class DNS(_code):
                         checkpoint_field + '/complex/{0}'.format(opt.src_iteration),
                         f,
                         checkpoint_field + '/complex/{0}'.format(0))
-                if not opt.overwrite_src_parameters:
-                    print('DNS will use physical parameters from source simulation.\n'
-                          'This means `dealias_type`, `energy`, `famplitude`, `fk0`, `fk1`, `fmode`, `friction_coefficient`, `injection_rate`, `nu` and `forcing_type`.')
-                    # timestep left out on purpose
-                    src_file = os.path.join(
-                            os.path.realpath(opt.src_work_dir),
-                            opt.src_simname + '.h5')
-                    if os.path.exists(src_file):
-                        f0 = h5py.File(src_file, 'r')
-                        for kk in ['dealias_type',
-                                   'energy',
-                                   'famplitude',
-                                   'fk0',
-                                   'fk1',
-                                   'fmode',
-                                   'friction_coefficient',
-                                   'injection_rate',
-                                   'nu']:
-                            if (kk in f0['parameters'].keys()) and (kk in self.parameters.keys()):
-                                self.parameters[kk] = type(self.parameters[kk])(f0['parameters/' + kk][()])
-                        self.parameters['forcing_type'] = str(f0['parameters/forcing_type'][()], 'ascii')
-                        f0.close()
-                    else:
-                        print('TurTLE WARNING: unable to copy parameters of source simulation.'
-                            + 'File {0} is missing.\n'.format(src_file)
-                            + 'DNS will use default values for physical parameters.\n')
-                else:
-                    print('TurTLE WARNING: DNS will use default values for physical parameters.\n'
-                          'This means `dealias_type`, `energy`, `famplitude`, `fk0`, `fk1`, `fmode`, `friction_coefficient`, `injection_rate`, `nu` and `forcing_type` may differ from values used by the source DNS.')
             else:
                 if self.dns_type == 'NSVE_Stokes_particles':
                     data = self.generate_vector_field(
diff --git a/TurTLE/DNS_statistics.py b/TurTLE/DNS_statistics.py
index 3a82e55165dddc38269495f5b18da0e579aa6b62..b0f3aeb3e27544c32f98feed64c4af09bd2016ad 100644
--- a/TurTLE/DNS_statistics.py
+++ b/TurTLE/DNS_statistics.py
@@ -262,7 +262,8 @@ def compute_time_averages(self):
     self.statistics['Lint'] = ((np.pi /
                                 (2*self.statistics['Uint']**2)) *
                                np.sum(self.statistics['energy(k)'][1:-2] /
-                                      self.statistics['kshell'][1:-2]))
+                                      self.statistics['kshell'][1:-2] *
+                                      self.statistics['dk']))
     self.statistics['Re'] = (self.statistics['Uint'] *
                              self.statistics['Lint'] /
                              self.parameters['nu'])
@@ -300,6 +301,7 @@ def read_histogram(
         base_group = 'statistics',
         pp_file = None,
         iteration_range = None,
+        min_estimate = None,
         max_estimate = None,
         niter_stat = None,
         nbins = None,
@@ -325,6 +327,17 @@ def read_histogram(
     if (hh.shape[-1] == 4):
         hh = hh[..., :3]
         max_estimate /= 3**0.5
+    if type(min_estimate) == type(None):
+        if quantity[:5] == 'log2_':
+            if 'min_' + quantity[5:] + '_estimate' in self.parameters:
+                min_estimate = np.log2(self.parameters['min_' + quantity[5:] + '_estimate'])
+            else:
+                min_estimate = -max_estimate # for compatibility with old data sets
+        else:
+            if 'min_' + quantity + '_estimate' in self.parameters:
+                min_estimate = self.parameters['min_' + quantity + '_estimate']
+            else:
+                min_estimate = -max_estimate # for compatibility with old data sets
     ngrid = self.parameters['nx']*self.parameters['ny']*self.parameters['nz']
     if return_full_history:
         npoints_expected = ngrid
@@ -333,7 +346,7 @@ def read_histogram(
         npoints_expected = ngrid*hh.shape[0]
         hh = np.sum(hh, axis = 0)
         npoints = np.sum(hh, axis = 0, keepdims=True)
-    bb0 = np.linspace(-max_estimate, max_estimate, nbins+1)
+    bb0 = np.linspace(min_estimate, max_estimate, nbins+1)
     bb = (bb0[1:] + bb0[:-1])*0.5
     if (undersample_warning_on):
         if not (npoints == npoints_expected).all():
@@ -446,8 +459,14 @@ def plot_basic_stats(
     if (   (self.parameters['nx'] != self.parameters['ny'])
         or (self.parameters['nx'] != self.parameters['nz'])):
         print('Warning: basic sanity check using possibly inappropriate CFL estimate')
+    dx = ((2*np.pi/self.parameters['dkx'])/self.parameters['nx']) # Lx / nx
+    dy = ((2*np.pi/self.parameters['dky'])/self.parameters['ny'])
+    dz = ((2*np.pi/self.parameters['dkz'])/self.parameters['nz'])
     tresolution = (self.parameters['dt']*np.sqrt(3)*self.statistics['vel_max(t)'] /
-                   (2*np.pi/self.parameters['nx']))
+                   dx)
+    if ((np.round(dx, 5) != np.round(dy, 5))
+        or (np.round(dx, 5) != np.round(dz, 5))):
+        print('WARNING: CFL computation does not make sense for non-isotropic grids')
     a.plot(self.statistics['t'] / self.statistics['Tint'],
            sresolution,
            label = '$(k_M \eta_K)^{-1}$ (spatial resolution)')
@@ -461,7 +480,7 @@ def plot_basic_stats(
         if 'CFL_velocity' in data_file['statistics'].keys():
             self.statistics['CFL_velocity(t)'] = data_file['statistics/CFL_velocity'][ii0:ii1+1]
             tresolution2 = (self.parameters['dt'] * self.statistics['CFL_velocity(t)']
-                            / (2*np.pi/self.parameters['nx']))
+                            / dx)
             a.plot(self.statistics['t'] / self.statistics['Tint'],
                     tresolution2,
                     label = '$\\frac{\\Delta t}{\\Delta x} \\| u \\|_{CFL}$')
diff --git a/TurTLE/PP.py b/TurTLE/PP.py
index b90e02138e7eb88d1ddf016d88c1214e8fd6d26d..4ac50d3594ebbea8cb04bc8aa63c7e126f0f0ad6 100644
--- a/TurTLE/PP.py
+++ b/TurTLE/PP.py
@@ -104,6 +104,8 @@ class PP(_code):
             self,
             dns_type = 'joint_acc_vel_stats'):
         pars = {}
+        if dns_type == 'get_3D_correlations':
+            pars['full_snapshot_output'] = int(False)
         if dns_type == 'pressure_stats':
             pars['max_pressure_estimate'] = float(1)
             pars['histogram_bins'] = int(129)
@@ -376,11 +378,11 @@ class PP(_code):
                 'bandpass_stats',
                 help = 'get stats of bandpassed velocity')
         parser_joint_acc_vel_stats = subparsers.add_parser(
+                'joint_acc_vel_stats',
+                help = 'get joint acceleration velocity statistics')
+        parser_pressure_stats = subparsers.add_parser(
                 'pressure_stats',
                 help = 'get pressure statistics')
-        parser_pressure_stats = subparsers.add_parser(
-                'joint_acc_vel_stats',
-                help = 'get joint acceleration and velocity statistics')
         parser_resize = subparsers.add_parser(
                 'resize',
                 help = 'resize field')
@@ -653,6 +655,28 @@ class PP(_code):
             vec4_rspace_stats = []
             scal_rspace_stats = []
             scal_spectra_stats = []
+            if self.dns_type == 'get_3D_correlations':
+                group.require_group('correlations')
+                for quantity in ['velocity', 'vorticity']:
+                    time_chunk = 2**20 // (9*self.parameters['nx'])
+                    group['correlations'].create_dataset(
+                        'R^' + quantity + '_ij(0, 0, x)',
+                        (1, self.parameters['nx'], 3, 3),
+                        maxshape = (None, self.parameters['nx'], 3, 3),
+                        chunks = (time_chunk, self.parameters['nx'], 3, 3),
+                        dtype = np.float32)
+                    group['correlations'].create_dataset(
+                        'R^' + quantity + '_ij(0, y, 0)',
+                        (1, self.parameters['ny'], 3, 3),
+                        chunks = (time_chunk, self.parameters['ny'], 3, 3),
+                        maxshape = (None, self.parameters['ny'], 3, 3),
+                        dtype = np.float32)
+                    group['correlations'].create_dataset(
+                        'R^' + quantity + '_ij(z, 0, 0)',
+                        (1, self.parameters['nz'], 3, 3),
+                        chunks = (time_chunk, self.parameters['nz'], 3, 3),
+                        maxshape = (None, self.parameters['nz'], 3, 3),
+                        dtype = np.float32)
             if self.dns_type == 'bandpass_stats':
                 vec4_rspace_stats.append('velocity')
                 for kindex in range(len(self.pp_parameters['k0list'])):
@@ -778,7 +802,7 @@ class PP(_code):
                 if os.path.exists(cpf_name):
                     cpf = h5py.File(cpf_name, 'r')
                     if 'vorticity' not in cpf.keys():
-                        print('file ', cpf_name, ' does not have vorticity group')
+                        print('TurTLE warning: file ', cpf_name, ' does not have vorticity group')
                     else:
                         for iter_name in cpf['vorticity/complex'].keys():
                             if iter_name not in ff['vorticity/complex'].keys():
@@ -786,7 +810,7 @@ class PP(_code):
                                         cpf_name,
                                         'vorticity/complex/' + iter_name)
                     if 'velocity' not in cpf.keys():
-                        print('file ', cpf_name, ' does not have velocity group')
+                        print('TurTLE warning: file ', cpf_name, ' does not have velocity group')
                     else:
                         if 'complex' in cpf['velocity'].keys():
                             for iter_name in cpf['velocity/complex'].keys():
diff --git a/TurTLE/test/profiler/Kraichnan_scalar_v1.cpp b/TurTLE/test/profiler/Kraichnan_scalar_v1.cpp
index eadb159ac3be985bac30bcc6113e68ac7ad2f229..522c0081e850aebf34f3d4ed469b48a053f2b0db 100644
--- a/TurTLE/test/profiler/Kraichnan_scalar_v1.cpp
+++ b/TurTLE/test/profiler/Kraichnan_scalar_v1.cpp
@@ -142,7 +142,7 @@ int Kraichnan_scalar_v1<rnumber>::step(void)
     // ensure velocity field is solenoidal
     {
         TIMEZONE("Kraichnan_scalar_v1::divfree");
-        this->kk->CLOOP_K2(
+        this->kk->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
@@ -195,7 +195,7 @@ int Kraichnan_scalar_v1<rnumber>::step(void)
     this->kk->template dealias<rnumber, ONE>(this->uz->get_cdata());
 
     // Euler step
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
diff --git a/TurTLE/test/profiler/scalar_evolution.cpp b/TurTLE/test/profiler/scalar_evolution.cpp
index ac576f723339c0f68ecb4276444e0935d30d90a9..b5f24fdd82937e4cb8582fdea2de747d871be9c6 100644
--- a/TurTLE/test/profiler/scalar_evolution.cpp
+++ b/TurTLE/test/profiler/scalar_evolution.cpp
@@ -110,7 +110,7 @@ int scalar_evolution<rnumber>::step(void)
             this->spectrum_small_scale_const);
 
     // some nontrivial Fourier space evolution
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
@@ -148,7 +148,7 @@ int scalar_evolution<rnumber>::step(void)
     // dealias
     this->kk->template dealias<rnumber, ONE>(this->tscal0->get_cdata());
     // another nontrivial Fourier space evolution
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
diff --git a/TurTLE/test/test_tracer_integration.py b/TurTLE/test/test_tracer_integration.py
index 73bc58c821fb160941c85844a81e93736ecaaefb..a4281198198f06e3fd9aff80ef55a4c8b8fb1abb 100644
--- a/TurTLE/test/test_tracer_integration.py
+++ b/TurTLE/test/test_tracer_integration.py
@@ -1,5 +1,6 @@
 import os
 import sys
+import re
 
 import numpy as np
 import h5py
@@ -146,11 +147,19 @@ def read_diff_data(
         d1p += x[neighbours+ii]*fornberg_coefficients[1][ii+neighbours]
         d2p += x[neighbours+ii]*fornberg_coefficients[2][ii+neighbours]
         d1u += u[neighbours+ii]*fornberg_coefficients[1][ii+neighbours]
-    return {'a' : aa,
-            'u' : uu,
-            'd1p' : d1p,
-            'd2p' : d2p,
-            'd1u' : d1u}
+    result = {'a' : aa,
+              'u' : uu,
+              'd1p' : d1p,
+              'd2p' : d2p,
+              'd1u' : d1u,
+              'species_index' : species_index,
+              'species_name'  : sname,
+              'diff_neighbours' : neighbours}
+    interp_information = re.search('_n([0-9]+)_m(\d)_', sname)
+    if type(interp_information) != type(None):
+        result['neighbours'] = int(interp_information.group(1))
+        result['smoothness'] = int(interp_information.group(2))
+    return result
 
 def relative_vector_difference(vv1, vv2, reference = None):
     assert(vv1.shape[1] == 3 and vv2.shape[1] == 3)
diff --git a/cpp/field.cpp b/cpp/field.cpp
index 92f3a09d652cc690410382ea1d4dea80022a77c9..8482e5b0737326cc76895dc1f4b60e470fe445f3 100644
--- a/cpp/field.cpp
+++ b/cpp/field.cpp
@@ -992,7 +992,7 @@ void field<rnumber, be, fc>::compute_rspace_xincrement_stats(
                 this->rlayout->sizes[0],
                 this->rlayout->comm);
     tmp_field->real_space_representation = true;
-    this->RLOOP_simd(
+    this->RLOOP(
                 [&](const ptrdiff_t rindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
@@ -1036,7 +1036,7 @@ double field<rnumber, be, fc>::compute_CFL_velocity()
     shared_array<double> local_CFL_velocity(
             1,
             shared_array_zero_initializer<double, 1>);
-    this->RLOOP_simd(
+    this->RLOOP(
                 [&](const ptrdiff_t rindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
@@ -1082,6 +1082,7 @@ void field<rnumber, be, fc>::compute_rspace_stats(
                 const hid_t group,
                 const std::string dset_name,
                 const hsize_t toffset,
+                const std::vector<double> min_estimate,
                 const std::vector<double> max_estimate,
                 const unsigned int maximum_moment)
 {
@@ -1150,14 +1151,26 @@ void field<rnumber, be, fc>::compute_rspace_stats(
         MPI_Bcast(&nvals, 1, MPI_INT, 0, this->comm);
         MPI_Bcast(&nbins, 1, MPI_INT, 0, this->comm);
     }
+    assert(nvals == int(min_estimate.size()));
     assert(nvals == int(max_estimate.size()));
+    for(int i=0; i<nvals; i++)
+        assert(max_estimate[i] > min_estimate[i]);
 
     start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
     shared_array<double> local_moments_threaded(
             nmoments*nvals,
             [&](double* local_moments){
                 std::fill_n(local_moments, nmoments*nvals, 0);
-                if (nvals == 4) local_moments[3] = max_estimate[3];
+                for (int i=0; i<nvals; i++)
+                {
+                    // see https://en.cppreference.com/w/cpp/types/numeric_limits
+                    local_moments[0*nvals+i] = std::numeric_limits<double>::max();
+                    local_moments[(nmoments-1)*nvals+i] = std::numeric_limits<double>::lowest();
+                }
+                if (nvals == 4) {
+                    local_moments[0*nvals+3] = std::numeric_limits<double>::max();
+                    local_moments[(nmoments-1)*nvals+3] = std::numeric_limits<double>::min();
+                }
             });
 
     shared_array<double> val_tmp_threaded(
@@ -1176,7 +1189,7 @@ void field<rnumber, be, fc>::compute_rspace_stats(
 
     double *binsize = new double[nvals];
     for (int i=0; i<nvals; i++)
-        binsize[i] = 2*max_estimate[i] / nbins;
+        binsize[i] = (max_estimate[i] - min_estimate[i]) / nbins;
 
     {
         {
@@ -1202,27 +1215,19 @@ void field<rnumber, be, fc>::compute_rspace_stats(
             if (nvals == int(4))
             {
                 val_tmp[3] = std::sqrt(val_tmp[3]);
-                if (val_tmp[3] < local_moments[0*nvals+3])
-                    local_moments[0*nvals+3] = val_tmp[3];
-                if (val_tmp[3] > local_moments[9*nvals+3])
-                    local_moments[9*nvals+3] = val_tmp[3];
-                const int bin = int(std::floor(val_tmp[3]*2/binsize[3]));
-                if (bin >= 0 && bin < nbins){
-                    local_hist[bin*nvals+3]++;
-                }
             }
-            for (unsigned int i=0; i<ncomp(fc); i++)
+            for (int i=0; i<nvals; i++)
             {
                 if (val_tmp[i] < local_moments[0*nvals+i])
                     local_moments[0*nvals+i] = val_tmp[i];
                 if (val_tmp[i] > local_moments[(nmoments-1)*nvals+i])
                     local_moments[(nmoments-1)*nvals+i] = val_tmp[i];
-                const int bin = int(std::floor((val_tmp[i] + max_estimate[i]) / binsize[i]));
+                const int bin = int(std::floor((val_tmp[i] - min_estimate[i]) / binsize[i]));
                 if (bin >= 0 && bin < nbins)
                     local_hist[bin*nvals+i]++;
             }
             for (unsigned int n=1; n < maximum_moment; n++){
-                for (auto i=0; i<nvals; i++){
+                for (int i=0; i<nvals; i++){
                     local_moments[n*nvals + i] += (pow_tmp[i] = val_tmp[i]*pow_tmp[i]);
                 }
             }
@@ -1235,16 +1240,10 @@ void field<rnumber, be, fc>::compute_rspace_stats(
         {
         TIMEZONE("FIELD_RLOOP::Merge");
         local_moments_threaded.mergeParallel([&](const int idx, const double& v1, const double& v2) -> double {
-            if(nvals == int(4) && idx == 0*nvals+3){
+            if(idx < int(nvals)){
                 return std::min(v1, v2);
             }
-            if(nvals == int(4) && idx == 9*nvals+3){
-                return std::max(v1, v2);
-            }
-            if(idx < int(ncomp(fc))){
-                return std::min(v1, v2);
-            }
-            if(int(nmoments-1)*nvals <= idx && idx < int(int(nmoments-1)*nvals+ncomp(fc))){
+            if(int(nmoments-1)*nvals <= idx){
                 return std::max(v1, v2);
             }
             return v1 + v2;
@@ -2352,7 +2351,7 @@ int compute_gradient(
     switch(fc1)
             {
                 case ONE:
-    kk->CLOOP_K2(
+    kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -2369,7 +2368,7 @@ int compute_gradient(
                     }});
                     break;
                 case THREE:
-    kk->CLOOP_K2(
+    kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -2406,7 +2405,7 @@ int compute_divergence(
     assert(!src->real_space_representation);
     *dst = 0.0;
     dst->real_space_representation = false;
-    kk->CLOOP_K2(
+    kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -2434,7 +2433,7 @@ int compute_curl(
     assert(!src->real_space_representation);
     *dst = 0.0;
     dst->real_space_representation = false;
-    kk->CLOOP_K2(
+    kk->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
@@ -2471,7 +2470,7 @@ int invert_curl(
     assert(!src->real_space_representation);
     *dst = 0.0;
     dst->real_space_representation = false;
-    kk->CLOOP_K2(
+    kk->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
@@ -2489,8 +2488,7 @@ int invert_curl(
         }
         else
             std::fill_n((rnumber*)(dst->get_cdata()+3*cindex), 6, 0.0);
-    }
-    );
+    });
     dst->symmetrize();
     return EXIT_SUCCESS;
 }
@@ -3271,7 +3269,7 @@ int make_gaussian_random_field(
     *output_field = 0.0;
     //DEBUG_MSG("slope: %g\n", slope);
     // inside loop use only thread-local random number generator
-    kk->CLOOP_K2([&](
+    kk->CLOOP([&](
                 const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -3351,7 +3349,7 @@ int generate_random_phase_field(
     }
 
     // inside loop use only thread-local random number generator
-    kk->CLOOP_K2([&](
+    kk->CLOOP([&](
                 const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -3374,7 +3372,7 @@ int generate_random_phase_field(
     output_field->symmetrize();
     // ensure absolute value is 1 for all modes
     {
-    kk->CLOOP_K2([&](
+    kk->CLOOP([&](
                 const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -3424,7 +3422,7 @@ int apply_phase_field_shift(
     assert(phase_field->real_space_representation == false);
 
     // inside loop use only thread-local random number generator
-    kk->CLOOP_K2([&](
+    kk->CLOOP([&](
                 const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
diff --git a/cpp/field.hpp b/cpp/field.hpp
index 6759af2683592c92a15e77d773a5d998efdd27d8..c136761e780eac73c1eada16e5ecece5c78f4e53 100644
--- a/cpp/field.hpp
+++ b/cpp/field.hpp
@@ -128,13 +128,36 @@ class field
                 const hsize_t toffset,
                 const std::vector<double> max_estimate,
                 field<rnumber, be, fc> *tmp_field = NULL);
+    
+        void compute_rspace_stats(
+                const hid_t group,
+                const std::string dset_name,
+                const hsize_t toffset,
+                const std::vector<double> max_estimate,
+                const unsigned int maximum_moment = 9) {
+            std::vector<double> min_estimate;
+            min_estimate.resize(max_estimate.size());
+            for (std::size_t ii = 0; ii < max_estimate.size(); ii++)
+                min_estimate[ii] = -max_estimate[ii];
+            if (max_estimate.size() == 4)
+                min_estimate[3] = 0;
+            this->compute_rspace_stats(
+                    group,
+                    dset_name,
+                    toffset,
+                    min_estimate,
+                    max_estimate,
+                    maximum_moment);
+        }
 
         void compute_rspace_stats(
                 const hid_t group,
                 const std::string dset_name,
                 const hsize_t toffset,
+                const std::vector<double> min_estimate,
                 const std::vector<double> max_estimate,
                 const unsigned int maximum_moment = 9);
+ 
 
 
         double compute_CFL_velocity();
@@ -208,6 +231,13 @@ class field
             return *(this->data + cindex*2 + imag);
         }
 
+        inline rnumber &cval(ptrdiff_t cindex, int imag) const
+        {
+            assert(fc == ONE);
+            assert(imag == 0 || imag == 1);
+            return *(this->data + cindex*2 + imag);
+        }
+
         inline rnumber &cval(ptrdiff_t cindex, int component, int imag)
         {
             assert(fc == THREE);
@@ -215,6 +245,13 @@ class field
             return *(this->data + (cindex*ncomp(fc) + component)*2 + imag);
         }
 
+        inline rnumber &cval(ptrdiff_t cindex, int component, int imag) const
+        {
+            assert(fc == THREE);
+            assert(imag == 0 || imag == 1);
+            return *(this->data + (cindex*ncomp(fc) + component)*2 + imag);
+        }
+
         inline rnumber &cval(ptrdiff_t cindex, int comp1, int comp0, int imag)
         {
             assert(fc == THREExTHREE);
@@ -247,8 +284,10 @@ class field
                 std::fill_n(this->data, 2*ncomp(fc), 0.0);
             }
         }
-        template <class func_type>
-        void RLOOP_simd(func_type expression)
+        void RLOOP(std::function<void(const ptrdiff_t,
+                                      const ptrdiff_t,
+                                      const ptrdiff_t,
+                                      const ptrdiff_t)> expression)
         {
             start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
             switch(be)
@@ -256,17 +295,21 @@ class field
                 case FFTW:
                     #pragma omp parallel
                     {
-                        const hsize_t start = OmpUtils::ForIntervalStart(this->rlayout->subsizes[1]);
-                        const hsize_t end = OmpUtils::ForIntervalEnd(this->rlayout->subsizes[1]);
+                        const ptrdiff_t start = OmpUtils::ForIntervalStart(this->rlayout->subsizes[1]);
+                        const ptrdiff_t end = OmpUtils::ForIntervalEnd(this->rlayout->subsizes[1]);
 
-                        #pragma omp simd
-                        for (hsize_t zindex = 0; zindex < this->rlayout->subsizes[0]; zindex++)
-                        for (hsize_t yindex = start; yindex < end; yindex++)
+                        for (ptrdiff_t zindex = 0;
+                             zindex < ptrdiff_t(this->rlayout->subsizes[0]);
+                             zindex++)
+                            //#pragma omp simd
+                        for (ptrdiff_t yindex = start; yindex < end; yindex++)
                         {
                             const ptrdiff_t rindex = (
                                     zindex * this->rlayout->subsizes[1] + yindex)*(
                                         this->rmemlayout->subsizes[2]);
-                            for (hsize_t xindex = 0; xindex < this->rlayout->subsizes[2]; xindex++)
+                            for (ptrdiff_t xindex = 0;
+                                 xindex < ptrdiff_t(this->rlayout->subsizes[2]);
+                                 xindex++)
                             {
                                 expression(rindex + xindex, xindex, yindex, zindex);
                             }
@@ -276,8 +319,7 @@ class field
             }
             finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
         }
-        template <class func_type>
-        void RLOOP(func_type expression)
+        void RLOOP(std::function<void(const ptrdiff_t)> expression)
         {
             start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
             switch(be)
@@ -285,19 +327,23 @@ class field
                 case FFTW:
                     #pragma omp parallel
                     {
-                        const hsize_t start = OmpUtils::ForIntervalStart(this->rlayout->subsizes[1]);
-                        const hsize_t end = OmpUtils::ForIntervalEnd(this->rlayout->subsizes[1]);
+                        const ptrdiff_t start = OmpUtils::ForIntervalStart(this->rlayout->subsizes[1]);
+                        const ptrdiff_t end = OmpUtils::ForIntervalEnd(this->rlayout->subsizes[1]);
 
-                        for (hsize_t zindex = 0; zindex < this->rlayout->subsizes[0]; zindex++)
+                        for (ptrdiff_t zindex = 0;
+                             zindex < ptrdiff_t(this->rlayout->subsizes[0]);
+                             zindex++)
                             //#pragma omp simd
-                        for (hsize_t yindex = start; yindex < end; yindex++)
+                        for (ptrdiff_t yindex = start; yindex < end; yindex++)
                         {
                             const ptrdiff_t rindex = (
                                     zindex * this->rlayout->subsizes[1] + yindex)*(
                                         this->rmemlayout->subsizes[2]);
-                            for (hsize_t xindex = 0; xindex < this->rlayout->subsizes[2]; xindex++)
+                            for (ptrdiff_t xindex = 0;
+                                 xindex < ptrdiff_t(this->rlayout->subsizes[2]);
+                                 xindex++)
                             {
-                                expression(rindex + xindex, xindex, yindex, zindex);
+                                expression(rindex + xindex);
                             }
                         }
                     }
@@ -336,9 +382,9 @@ class field
             finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
         }
         ptrdiff_t get_cindex(
-                ptrdiff_t xindex,
-                ptrdiff_t yindex,
-                ptrdiff_t zindex)
+                const ptrdiff_t xindex,
+                const ptrdiff_t yindex,
+                const ptrdiff_t zindex)
         {
             return ((yindex*this->clayout->subsizes[1] +
                      zindex)*this->clayout->subsizes[2] +
@@ -346,9 +392,9 @@ class field
         }
 
         ptrdiff_t get_rindex(
-                ptrdiff_t xindex,
-                ptrdiff_t yindex,
-                ptrdiff_t zindex) const
+                const ptrdiff_t xindex,
+                const ptrdiff_t yindex,
+                const ptrdiff_t zindex) const
         {
             return ((zindex*this->rmemlayout->subsizes[1] +
                      yindex)*this->rmemlayout->subsizes[2] +
diff --git a/cpp/full_code/NSE.cpp b/cpp/full_code/NSE.cpp
index 830f0dae28f6bb80621526e199e0e05cabdf3cea..950883094c9af0c9301a645de241b6828a8c34b0 100644
--- a/cpp/full_code/NSE.cpp
+++ b/cpp/full_code/NSE.cpp
@@ -197,7 +197,7 @@ int NSE<rnumber>::add_forcing_term(
                 1,
                 shared_array_zero_initializer<double, 1>);
         double energy_in_shell = 0;
-        this->kk->CLOOP_K2_NXMODES(
+        this->kk->CLOOP(
                     [&](const ptrdiff_t cindex,
                         const ptrdiff_t xindex,
                         const ptrdiff_t yindex,
@@ -401,7 +401,7 @@ int NSE<rnumber>::Euler_step(void)
     this->kk->template force_divfree<rnumber>(this->velocity->get_cdata());
 
     // apply diffusion exponential
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -441,7 +441,7 @@ int NSE<rnumber>::Heun_step(void)
             this->comm,
             fftw_planner_string_to_flag[this->fftw_plan_rigor]);
 
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -495,7 +495,7 @@ int NSE<rnumber>::Heun_step(void)
             }
             });
     this->kk->template force_divfree<rnumber>(this->velocity->get_cdata());
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -558,7 +558,7 @@ int NSE<rnumber>::ShuOsher(void)
     this->add_forcing_term(this->velocity, acc);
     *(this->tmp_velocity1) = rnumber(0.0);
     this->tmp_velocity1->real_space_representation = false;
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -582,7 +582,7 @@ int NSE<rnumber>::ShuOsher(void)
     this->add_forcing_term(this->tmp_velocity1, acc);
     *(this->tmp_velocity2) = rnumber(0.0);
     this->tmp_velocity2->real_space_representation = false;
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -606,7 +606,7 @@ int NSE<rnumber>::ShuOsher(void)
             this->tmp_velocity2,
             acc);
     this->add_forcing_term(this->tmp_velocity2, acc);
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -799,7 +799,7 @@ int NSE<rnumber>::get_cpressure(field<rnumber, FFTW, ONE> *destination)
     // 00
     this->store_nonlinear_product(temp_scalar, rvelocity, 0, 0);
     temp_scalar->dft();
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -815,7 +815,7 @@ int NSE<rnumber>::get_cpressure(field<rnumber, FFTW, ONE> *destination)
     // 01
     this->store_nonlinear_product(temp_scalar, rvelocity, 0, 1);
     temp_scalar->dft();
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -831,7 +831,7 @@ int NSE<rnumber>::get_cpressure(field<rnumber, FFTW, ONE> *destination)
     // 02
     this->store_nonlinear_product(temp_scalar, rvelocity, 0, 2);
     temp_scalar->dft();
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -847,7 +847,7 @@ int NSE<rnumber>::get_cpressure(field<rnumber, FFTW, ONE> *destination)
     // 11
     this->store_nonlinear_product(temp_scalar, rvelocity, 1, 1);
     temp_scalar->dft();
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -863,7 +863,7 @@ int NSE<rnumber>::get_cpressure(field<rnumber, FFTW, ONE> *destination)
     // 12
     this->store_nonlinear_product(temp_scalar, rvelocity, 1, 2);
     temp_scalar->dft();
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -879,7 +879,7 @@ int NSE<rnumber>::get_cpressure(field<rnumber, FFTW, ONE> *destination)
     // 22
     this->store_nonlinear_product(temp_scalar, rvelocity, 2, 2);
     temp_scalar->dft();
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -915,7 +915,7 @@ int NSE<rnumber>::get_cacceleration(field<rnumber, FFTW, THREE> *destination)
         this->comm,
         this->velocity->fftw_plan_rigor);
     this->get_cpressure(pressure);
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
diff --git a/cpp/full_code/NSE.hpp b/cpp/full_code/NSE.hpp
index ccfc1e0fddb8744ac86e6e481e9b094621e08e2d..e2a5f17da54e656a6eac8db7fd3965698afba9b2 100644
--- a/cpp/full_code/NSE.hpp
+++ b/cpp/full_code/NSE.hpp
@@ -28,8 +28,8 @@
 
 
 
-#include "vorticity_equation.hpp"
 #include "full_code/direct_numerical_simulation.hpp"
+#include "field.hpp"
 
 /** \brief Navier-Stokes solver.
  *
diff --git a/cpp/full_code/NSE_alt_dealias.cpp b/cpp/full_code/NSE_alt_dealias.cpp
index 44a325fb9e6306e179fe15903771321a2f80bb01..c066164d59e9e14b5e06528e993182194f1e4bee 100644
--- a/cpp/full_code/NSE_alt_dealias.cpp
+++ b/cpp/full_code/NSE_alt_dealias.cpp
@@ -171,7 +171,7 @@ int NSE_alt_dealias<rnumber>::add_forcing_term(
         // first, compute energy in shell
         shared_array<double> local_energy_in_shell(1);
         double energy_in_shell = 0;
-        this->kk->CLOOP_K2_NXMODES(
+        this->kk->CLOOP(
                     [&](const ptrdiff_t cindex,
                         const ptrdiff_t xindex,
                         const ptrdiff_t yindex,
@@ -374,7 +374,7 @@ int NSE_alt_dealias<rnumber>::Euler_step(void)
     this->kk->template force_divfree<rnumber>(this->velocity->get_cdata());
 
     // apply diffusion exponential
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -425,7 +425,7 @@ int NSE_alt_dealias<rnumber>::ShuOsher(void)
     *this->tmp_velocity1 = rnumber(0);
     this->add_nonlinear_term(this->velocity, this->tmp_velocity1);
     this->add_forcing_term(this->velocity, this->tmp_velocity1);
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -445,7 +445,7 @@ int NSE_alt_dealias<rnumber>::ShuOsher(void)
     *this->tmp_velocity2 = rnumber(0);
     this->add_nonlinear_term(this->tmp_velocity1, this->tmp_velocity2);
     this->add_forcing_term(this->tmp_velocity1, this->tmp_velocity2);
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -467,7 +467,7 @@ int NSE_alt_dealias<rnumber>::ShuOsher(void)
     *this->tmp_velocity1 = rnumber(0);
     this->add_nonlinear_term(this->tmp_velocity2, this->tmp_velocity1);
     this->add_forcing_term(this->tmp_velocity2, this->tmp_velocity1);
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
diff --git a/cpp/full_code/NSVE_field_stats.cpp b/cpp/full_code/NSVE_field_stats.cpp
index 9fbb3a4165a5b95cb964bca0b46010f58b820432..2930c39b67ccbdd2a8cac751f3d39464e70ee222 100644
--- a/cpp/full_code/NSVE_field_stats.cpp
+++ b/cpp/full_code/NSVE_field_stats.cpp
@@ -31,7 +31,16 @@
 template <typename rnumber>
 int NSVE_field_stats<rnumber>::read_parameters(void)
 {
+    TIMEZONE("NSVE_field_stats::read_parameters");
     this->postprocess::read_parameters();
+    hid_t parameter_file = H5Fopen(
+            (this->simname + std::string(".h5")).c_str(),
+            H5F_ACC_RDONLY,
+            H5P_DEFAULT);
+    this->niter_out = hdf5_tools::read_value<int>(
+            parameter_file, "/parameters/niter_out");
+    H5Fclose(parameter_file);
+    MPI_Barrier(this->comm);
     return EXIT_SUCCESS;
 }
 
@@ -76,6 +85,7 @@ int NSVE_field_stats<rnumber>::initialize(void)
     }
     this->fftw_plan_rigor = hdf5_tools::read_string(parameter_file, "parameters/fftw_plan_rigor");
     H5Fclose(parameter_file);
+    MPI_Barrier(this->comm);
     return EXIT_SUCCESS;
 }
 
diff --git a/cpp/full_code/NSVE_field_stats.hpp b/cpp/full_code/NSVE_field_stats.hpp
index 34292ef3bf44e6179c321833e4e48442c79bd17c..40116190acdb3f792fcb2f22c865de89a40593db 100644
--- a/cpp/full_code/NSVE_field_stats.hpp
+++ b/cpp/full_code/NSVE_field_stats.hpp
@@ -35,6 +35,8 @@ class NSVE_field_stats: public postprocess
     private:
         field_binary_IO<rnumber, COMPLEX, THREE> *bin_IO;
     public:
+        int niter_out;
+
         std::string fftw_plan_rigor;
 
         field<rnumber, FFTW, THREE> *vorticity;
diff --git a/cpp/full_code/bandpass_stats.cpp b/cpp/full_code/bandpass_stats.cpp
index 45b319330f8a5cb7b08e06de22511e6136a452f7..10d0dea9fbf4f07c5100b27ed8a4f8fa2a3e0f90 100644
--- a/cpp/full_code/bandpass_stats.cpp
+++ b/cpp/full_code/bandpass_stats.cpp
@@ -42,7 +42,6 @@ int bandpass_stats<rnumber>::initialize(void)
             (this->simname + std::string(".h5")).c_str(),
             H5F_ACC_RDONLY,
             H5P_DEFAULT);
-    this->niter_out = hdf5_tools::read_value<int>(parameter_file, "/parameters/niter_out");
     this->checkpoints_per_file = hdf5_tools::read_value<int>(parameter_file, "/parameters/checkpoints_per_file");
     if (this->checkpoints_per_file == INT_MAX) // value returned if dataset does not exist
         this->checkpoints_per_file = 1;
diff --git a/cpp/full_code/bandpass_stats.hpp b/cpp/full_code/bandpass_stats.hpp
index 9435333e6dae6994018d3d82ad87da3f2b3a82aa..3d4b13df9855c358545c9136279e4aa9d65c5ce6 100644
--- a/cpp/full_code/bandpass_stats.hpp
+++ b/cpp/full_code/bandpass_stats.hpp
@@ -46,7 +46,6 @@ class bandpass_stats: public NSVE_field_stats<rnumber>
 {
     public:
         int checkpoints_per_file;
-        int niter_out;
         double max_velocity_estimate;
         kspace<FFTW, SMOOTH> *kk;
         /* parameters that are read in read_parameters */
diff --git a/cpp/full_code/check_divergence.cpp b/cpp/full_code/check_divergence.cpp
index aaacf2c2bd91f5df82f61911d395ba1dcea5d0f5..79d33f1bca2cc90488ac03df9b65f60a4d259fea 100644
--- a/cpp/full_code/check_divergence.cpp
+++ b/cpp/full_code/check_divergence.cpp
@@ -49,7 +49,6 @@ int check_divergence<rnumber>::initialize(void)
             (this->simname + std::string(".h5")).c_str(),
             H5F_ACC_RDONLY,
             H5P_DEFAULT);
-    this->niter_out = hdf5_tools::read_value<int>(parameter_file, "/parameters/niter_out");
     this->checkpoints_per_file = hdf5_tools::read_value<int>(parameter_file, "/parameters/checkpoints_per_file");
     if (this->checkpoints_per_file == INT_MAX) // value returned if dataset does not exist
         this->checkpoints_per_file = 1;
diff --git a/cpp/full_code/check_divergence.hpp b/cpp/full_code/check_divergence.hpp
index 5f2308f1a5e34c3a6ef5e06beeb0028a3674dc8c..2f2395fcbdbaa80bc3e8fc80ebb32236ac47290a 100644
--- a/cpp/full_code/check_divergence.hpp
+++ b/cpp/full_code/check_divergence.hpp
@@ -33,7 +33,6 @@ class check_divergence: public NSVE_field_stats<rnumber>
 {
     public:
         int checkpoints_per_file;
-        int niter_out;
         kspace<FFTW, SMOOTH>* kk;
         field<rnumber, FFTW, ONE>* div_field;
 
diff --git a/cpp/full_code/field_single_to_double.cpp b/cpp/full_code/field_single_to_double.cpp
index 9e4731c7447b6c10ea9d545ea3a5996955e48921..c1ca04b3f8bdd1cd718b79ed8be80b29c4078bb1 100644
--- a/cpp/full_code/field_single_to_double.cpp
+++ b/cpp/full_code/field_single_to_double.cpp
@@ -46,12 +46,9 @@ int field_single_to_double<rnumber>::initialize(void)
             (this->simname + std::string(".h5")).c_str(),
             H5F_ACC_RDONLY,
             H5P_DEFAULT);
-    hid_t dset = H5Dopen(parameter_file, "/parameters/niter_out", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->niter_out);
-    H5Dclose(dset);
     if (H5Lexists(parameter_file, "/parameters/checkpoints_per_file", H5P_DEFAULT))
     {
-        dset = H5Dopen(parameter_file, "/parameters/checkpoints_per_file", H5P_DEFAULT);
+        hid_t dset = H5Dopen(parameter_file, "/parameters/checkpoints_per_file", H5P_DEFAULT);
         H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->checkpoints_per_file);
         H5Dclose(dset);
     }
diff --git a/cpp/full_code/field_single_to_double.hpp b/cpp/full_code/field_single_to_double.hpp
index 1cb0fdf67059b98c33611d0bb924f83c10a749f9..abfb70f6c90061c7b662b45f27bf8a26300171cd 100644
--- a/cpp/full_code/field_single_to_double.hpp
+++ b/cpp/full_code/field_single_to_double.hpp
@@ -33,7 +33,6 @@ class field_single_to_double: public NSVE_field_stats<rnumber>
 {
     public:
         int checkpoints_per_file;
-        int niter_out;
         kspace<FFTW, SMOOTH> *kk;
 
         field<double, FFTW, THREE> *vec_field_double;
diff --git a/cpp/full_code/get_3D_correlations.cpp b/cpp/full_code/get_3D_correlations.cpp
index 7f7c5f6d26760ea2d9d46c05fba9c339f6b1de38..afc53ea0d7a174722897d35eb50cab311b3e6987 100644
--- a/cpp/full_code/get_3D_correlations.cpp
+++ b/cpp/full_code/get_3D_correlations.cpp
@@ -34,7 +34,6 @@ int get_3D_correlations<rnumber>::initialize(void)
 {
     TIMEZONE("get_3D_correlations::initialize");
     this->NSVE_field_stats<rnumber>::initialize();
-    DEBUG_MSG("after NSVE_field_stats::initialize\n");
 
     // allocate kspace
     this->kk = new kspace<FFTW, SMOOTH>(
@@ -57,7 +56,6 @@ int get_3D_correlations<rnumber>::initialize(void)
             (this->simname + std::string(".h5")).c_str(),
             H5F_ACC_RDONLY,
             H5P_DEFAULT);
-    this->niter_out = hdf5_tools::read_value<int>(parameter_file, "/parameters/niter_out");
     this->checkpoints_per_file = hdf5_tools::read_value<int>(parameter_file, "/parameters/checkpoints_per_file");
     if (this->checkpoints_per_file == INT_MAX) // value returned if dataset does not exist
         this->checkpoints_per_file = 1;
@@ -71,62 +69,223 @@ int get_3D_correlations<rnumber>::initialize(void)
     this->iteration_list = hdf5_tools::read_vector<int>(
             parameter_file,
             "/get_3D_correlations/parameters/iteration_list");
+    this->full_snapshot_output = bool(
+            hdf5_tools::read_value<int>(
+                parameter_file,
+                "/get_3D_correlations/parameters/full_snapshot_output"));
     H5Fclose(parameter_file);
     // the following ensures the file is free for rank 0 to open in read/write mode
     MPI_Barrier(this->comm);
+    if (this->myrank == 0) {
+        // set caching parameters
+        hid_t fapl = H5Pcreate(H5P_FILE_ACCESS);
+        herr_t cache_err = H5Pset_cache(fapl, 0, 521, 134217728, 1.0);
+        variable_used_only_in_assert(cache_err);
+        DEBUG_MSG("when setting stat_file cache I got %d\n", cache_err);
+        this->stat_file = H5Fopen(
+                (this->simname + "_post.h5").c_str(),
+                H5F_ACC_RDWR,
+                fapl);
+    } else {
+        this->stat_file = 0;
+        }
+    int data_file_problem;
+    if (this->myrank == 0)
+        data_file_problem = hdf5_tools::require_size_file_datasets(
+                this->stat_file,
+                "get_3D_correlations",
+                (this->iteration_list.back() / this->niter_out) + 1);
+    MPI_Bcast(&data_file_problem, 1, MPI_INT, 0, this->comm);
+    if (data_file_problem > 0)
+    {
+        std::cerr <<
+            data_file_problem <<
+            " problems setting sizes of file datasets.\ntrying to exit now." <<
+            std::endl;
+        return EXIT_FAILURE;
+    }
     return EXIT_SUCCESS;
 }
 
 template <typename rnumber>
-int get_3D_correlations<rnumber>::work_on_current_iteration(void)
+int compute_correlation(
+        kspace<FFTW, SMOOTH> *kk,
+        field<rnumber, FFTW, THREE> *vel,
+        field<rnumber, FFTW, THREExTHREE> *Rij,
+        hid_t group,
+        const int toffset,
+        const std::string label = "velocity",
+        const std::string field_fname = "",
+        const int iteration = 0)
 {
-    TIMEZONE("get_3D_correlations::work_on_current_iteration");
-    this->read_current_cvorticity();
-
-    invert_curl(
-        this->kk,
-        this->vorticity,
-        this->vel);
-    *(this->Rij) = 0.0;
-    this->Rij->real_space_representation = false;
+    *(Rij) = 0.0;
+    Rij->real_space_representation = false;
 
-
-    const double dkvolume = this->kk->dkx*this->kk->dky*this->kk->dkz;
+    const double dkvolume = kk->dkx*kk->dky*kk->dkz;
     // compute Rij
-    this->kk->CLOOP_K2(
+    // the computation is taken from Pope
+    // this corresponds to $Rij(\mathbf{r}) = \langle u_i(\textbf{x}) u_j(\textbf{x} + \textbf{r})\rangle$
+    // (note that "r" appears in "u_j")
+    kk->CLOOP(
             [&](ptrdiff_t cindex,
                 ptrdiff_t xindex,
                 ptrdiff_t yindex,
                 ptrdiff_t zindex,
                 const double k2){
-            // compute v_i[k] v_j[-k]
+            // compute v_i[-k] v_j[k]
             for (unsigned int i=0; i<3; i++)
             for (unsigned int j=0; j<3; j++)
             {
-                this->Rij->cval(cindex, i, j, 0) = (
-                    this->vel->cval(cindex, i, 0)*this->vel->cval(cindex, j, 0) +
-                    this->vel->cval(cindex, i, 1)*this->vel->cval(cindex, j, 1)) / dkvolume;
-                this->Rij->cval(cindex, i, j, 1) = (
-                    this->vel->cval(cindex, i, 0)*this->vel->cval(cindex, j, 1) -
-                    this->vel->cval(cindex, i, 1)*this->vel->cval(cindex, j, 0)) / dkvolume;
+                Rij->cval(cindex, i, j, 0) = (
+                    vel->cval(cindex, i, 0)*vel->cval(cindex, j, 0) +
+                    vel->cval(cindex, i, 1)*vel->cval(cindex, j, 1)) / dkvolume;
+                Rij->cval(cindex, i, j, 1) = (
+                    vel->cval(cindex, i, 0)*vel->cval(cindex, j, 1) -
+                    vel->cval(cindex, i, 1)*vel->cval(cindex, j, 0)) / dkvolume;
             }
             });
 
     // go to real space
-    this->Rij->ift();
-
-    const std::string fname = (
-            this->simname +
-            std::string("_checkpoint_Rij_") +
-            std::to_string(this->iteration / (this->niter_out*this->checkpoints_per_file)) +
-            std::string(".h5"));
+    Rij->ift();
 
     /// output Rij field
-    this->Rij->io(
+    if (field_fname.size() > 0) {
+        std::cerr << "fname size is " << field_fname.size()
+            << ", and fname is " << field_fname << std::endl;
+        Rij->io(
+                field_fname,
+                "Rij",
+                iteration,
+                false);
+    }
+
+    // extract edges of domain (used for longitudinal and transversal components).
+    double Rijx[9*Rij->get_nx()];
+    double Rijy[9*Rij->get_ny()];
+    double Rijz[9*Rij->get_nz()];
+    double Rijz_local[9*Rij->get_nz()];
+    std::fill_n(Rijz_local, 9*Rij->get_nz(), 0);
+    std::fill_n(Rijz, 9*Rij->get_nz(), 0);
+    for (int i=0; i<3; i++)
+    {
+        for (int j=0; j<3; j++)
+        {
+            if (Rij->rlayout->myrank == Rij->rlayout->rank[0][0])
+            {
+                for (int ix=0; ix<Rij->get_nx(); ix++)
+                    Rijx[9*ix + (i*3+j)] = Rij->rval(
+                            ix, i, j);
+                for (int iy=0; iy<Rij->get_ny(); iy++)
+                    Rijy[9*iy + (i*3+j)] = Rij->rval(
+                            iy*(Rij->get_nx()+2), i, j);
+            }
+            for (std::size_t iiz=0; iiz<Rij->rlayout->subsizes[0]; iiz++)
+                Rijz_local[(i*3+j) + 9*(iiz + Rij->rlayout->starts[0])] =
+                    Rij->rval(iiz*(Rij->get_nx()+2)*Rij->get_ny(), i, j);
+        }
+    }
+    MPI_Allreduce(
+            Rijz_local,
+            Rijz,
+            9*Rij->get_nz(),
+            MPI_DOUBLE,
+            MPI_SUM,
+            Rij->comm);
+
+    // output edges of domain
+    if (Rij->rlayout->myrank == 0)
+    {
+        hid_t dset, mspace, wspace;
+        hsize_t count[4], offset[4];
+        offset[0] = toffset;
+        offset[1] = 0;
+        offset[2] = 0;
+        offset[3] = 0;
+        count[0] = 1;
+        count[1] = Rij->get_nx();
+        count[2] = 3;
+        count[3] = 3;
+        dset = H5Dopen(
+                group,
+                (std::string("correlations/R^") + label + std::string("_ij(0, 0, x)")).c_str(),
+                H5P_DEFAULT);
+        mspace = H5Screate_simple(4, count, NULL);
+        wspace = H5Dget_space(dset);
+        H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
+        H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, Rijx);
+        H5Dclose(dset);
+        H5Sclose(wspace);
+        H5Sclose(mspace);
+        dset = H5Dopen(
+                group,
+                (std::string("correlations/R^") + label + std::string("_ij(0, y, 0)")).c_str(),
+                H5P_DEFAULT);
+        count[1] = Rij->get_ny();
+        mspace = H5Screate_simple(4, count, NULL);
+        wspace = H5Dget_space(dset);
+        H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
+        H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, Rijy);
+        H5Dclose(dset);
+        H5Sclose(wspace);
+        H5Sclose(mspace);
+        dset = H5Dopen(
+                group,
+                (std::string("correlations/R^") + label + std::string("_ij(z, 0, 0)")).c_str(),
+                H5P_DEFAULT);
+        count[1] = Rij->get_nz();
+        mspace = H5Screate_simple(4, count, NULL);
+        wspace = H5Dget_space(dset);
+        H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
+        H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, Rijz);
+        H5Dclose(dset);
+        H5Sclose(wspace);
+        H5Sclose(mspace);
+    }
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int get_3D_correlations<rnumber>::work_on_current_iteration(void)
+{
+    TIMEZONE("get_3D_correlations::work_on_current_iteration");
+    this->read_current_cvorticity();
+
+    invert_curl(
+        this->kk,
+        this->vorticity,
+        this->vel);
+
+    std::string fname = "";
+    if (this->full_snapshot_output) {
+        fname = (
+             this->simname +
+             std::string("_checkpoint_Rij_") +
+             std::to_string(this->iteration / (this->niter_out*this->checkpoints_per_file)) +
+             std::string(".h5"));
+    }
+    /// initialize `stat_group`.
+    hid_t stat_group;
+    if (this->myrank == 0)
+        stat_group = H5Gopen(
+                this->stat_file,
+                "get_3D_correlations",
+                H5P_DEFAULT);
+    else
+        stat_group = 0;
+
+    compute_correlation(
+            this->kk,
+            this->vel,
+            this->Rij,
+            stat_group,
+            this->iteration / this->niter_out,
+            "velocity",
             fname,
-            "Rij",
-            this->iteration,
-            false);
+            this->iteration);
+
+    /// close stat group
+    if (this->myrank == 0)
+        H5Gclose(stat_group);
 
     return EXIT_SUCCESS;
 }
@@ -135,9 +294,11 @@ template <typename rnumber>
 int get_3D_correlations<rnumber>::finalize(void)
 {
     TIMEZONE("get_3D_correlations::finalize");
-    delete this->kk;
-    delete this->vel;
     delete this->Rij;
+    delete this->vel;
+    delete this->kk;
+    if (this->myrank == 0)
+        H5Fclose(this->stat_file);
     this->NSVE_field_stats<rnumber>::finalize();
     return EXIT_SUCCESS;
 }
diff --git a/cpp/full_code/get_3D_correlations.hpp b/cpp/full_code/get_3D_correlations.hpp
index 07ab25d144dd3a552a83848c6d3b1590f8d1613d..84187186a612f3bea7e32b80d040cb318e3bc0ad 100644
--- a/cpp/full_code/get_3D_correlations.hpp
+++ b/cpp/full_code/get_3D_correlations.hpp
@@ -32,8 +32,9 @@ template <typename rnumber>
 class get_3D_correlations: public NSVE_field_stats<rnumber>
 {
     public:
+        hid_t stat_file;
         int checkpoints_per_file;
-        int niter_out;
+        bool full_snapshot_output;
         kspace<FFTW, SMOOTH> *kk;
         field<rnumber, FFTW, THREE> *vel;
         field<rnumber, FFTW, THREExTHREE> *Rij;
diff --git a/cpp/full_code/get_rfields.cpp b/cpp/full_code/get_rfields.cpp
index 97d9d0b6944dce5fe1b40a70b66eafbfcb9b1209..9424355847b9c29ee300f08bad30dc44399de37b 100644
--- a/cpp/full_code/get_rfields.cpp
+++ b/cpp/full_code/get_rfields.cpp
@@ -59,7 +59,6 @@ int get_rfields<rnumber>::initialize(void)
             (this->simname + std::string(".h5")).c_str(),
             H5F_ACC_RDONLY,
             H5P_DEFAULT);
-    this->niter_out = hdf5_tools::read_value<int>(parameter_file, "/parameters/niter_out");
     this->checkpoints_per_file = hdf5_tools::read_value<int>(parameter_file, "/parameters/checkpoints_per_file");
     if (this->checkpoints_per_file == INT_MAX) // value returned if dataset does not exist
         this->checkpoints_per_file = 1;
diff --git a/cpp/full_code/get_velocity.cpp b/cpp/full_code/get_velocity.cpp
index 0411361b90b7725166bb99f6127a961ac981dca3..ac91faaff4fa47b1519c825e2acbe8a0d9533ad8 100644
--- a/cpp/full_code/get_velocity.cpp
+++ b/cpp/full_code/get_velocity.cpp
@@ -34,7 +34,6 @@ int get_velocity<rnumber>::initialize(void)
 {
     TIMEZONE("get_velocity::initialize");
     this->NSVE_field_stats<rnumber>::initialize();
-    DEBUG_MSG("after NSVE_field_stats::initialize\n");
 
     // allocate kspace
     this->kk = new kspace<FFTW, SMOOTH>(
@@ -48,7 +47,6 @@ int get_velocity<rnumber>::initialize(void)
             (this->simname + std::string(".h5")).c_str(),
             H5F_ACC_RDONLY,
             H5P_DEFAULT);
-    this->niter_out = hdf5_tools::read_value<int>(parameter_file, "/parameters/niter_out");
     this->checkpoints_per_file = hdf5_tools::read_value<int>(parameter_file, "/parameters/checkpoints_per_file");
     if (this->checkpoints_per_file == INT_MAX) // value returned if dataset does not exist
         this->checkpoints_per_file = 1;
diff --git a/cpp/full_code/get_velocity.hpp b/cpp/full_code/get_velocity.hpp
index 40befc377e0af2492324c04a4fee38a1f24e0e0d..ff453a9a0cc9c50e9c003f902b2cb4ab3dff0970 100644
--- a/cpp/full_code/get_velocity.hpp
+++ b/cpp/full_code/get_velocity.hpp
@@ -33,7 +33,6 @@ class get_velocity: public NSVE_field_stats<rnumber>
 {
     public:
         int checkpoints_per_file;
-        int niter_out;
         kspace<FFTW, SMOOTH> *kk;
         field<rnumber, FFTW, THREE> *vel;
 
diff --git a/cpp/full_code/joint_acc_vel_stats.cpp b/cpp/full_code/joint_acc_vel_stats.cpp
index 1cacd79eb3e9d97ad435de4e226c04855d7ab1f1..6de53c1b2bf814e71ca07817332f7e53b9631e69 100644
--- a/cpp/full_code/joint_acc_vel_stats.cpp
+++ b/cpp/full_code/joint_acc_vel_stats.cpp
@@ -51,7 +51,6 @@ int joint_acc_vel_stats<rnumber>::initialize(void)
             (this->simname + std::string(".h5")).c_str(),
             H5F_ACC_RDONLY,
             H5P_DEFAULT);
-    this->niter_out = hdf5_tools::read_value<int>(parameter_file, "/parameters/niter_out");
     if (H5Lexists(parameter_file, "/parameters/checkpoints_per_file", H5P_DEFAULT))
     {
         this->checkpoints_per_file = hdf5_tools::read_value<int>(parameter_file, "/parameters/checkpoints_per_file");
diff --git a/cpp/full_code/joint_acc_vel_stats.hpp b/cpp/full_code/joint_acc_vel_stats.hpp
index 2abf116835266efe571136f626582ce4e3b262d2..be5f42746801fbb8c9599f06467564b38f9032d2 100644
--- a/cpp/full_code/joint_acc_vel_stats.hpp
+++ b/cpp/full_code/joint_acc_vel_stats.hpp
@@ -37,7 +37,6 @@ class joint_acc_vel_stats: public NSVE_field_stats<rnumber>
         vorticity_equation<rnumber, FFTW> *ve;
 
         int checkpoints_per_file;
-        int niter_out;
         double max_acceleration_estimate;
         double max_velocity_estimate;
 
diff --git a/cpp/full_code/pressure_stats.cpp b/cpp/full_code/pressure_stats.cpp
index af891221731f98ffd53851fdcd05b8b1376f268d..9af94ef60325307bec3dc4bb494953cec245eab9 100644
--- a/cpp/full_code/pressure_stats.cpp
+++ b/cpp/full_code/pressure_stats.cpp
@@ -79,7 +79,6 @@ int pressure_stats<rnumber>::initialize(void)
             (this->simname + std::string(".h5")).c_str(),
             H5F_ACC_RDONLY,
             H5P_DEFAULT);
-    this->niter_out = hdf5_tools::read_value<int>(parameter_file, "/parameters/niter_out");
     this->checkpoints_per_file = hdf5_tools::read_value<int>(parameter_file, "/parameters/checkpoints_per_file");
     if (this->checkpoints_per_file == INT_MAX) // value returned if dataset does not exist
         this->checkpoints_per_file = 1;
diff --git a/cpp/full_code/pressure_stats.hpp b/cpp/full_code/pressure_stats.hpp
index 785013a7431db42511c629c07dfca6460a585600..86ae9457b7e0f9c475604c9f7cfdc859c7b07b36 100644
--- a/cpp/full_code/pressure_stats.hpp
+++ b/cpp/full_code/pressure_stats.hpp
@@ -46,7 +46,6 @@ class pressure_stats: public NSVE_field_stats<rnumber>
         field<rnumber, FFTW, ONE> *scalar;
 
         int checkpoints_per_file;
-        int niter_out;
         double max_pressure_estimate;
 
         pressure_stats(
diff --git a/cpp/full_code/resize.cpp b/cpp/full_code/resize.cpp
index 46b035f85bca1c277916df8da6caf06ca69c65a2..ffb19bdcbf5a4bdeb3d08c365a61567d9dc87369 100644
--- a/cpp/full_code/resize.cpp
+++ b/cpp/full_code/resize.cpp
@@ -40,8 +40,6 @@ int resize<rnumber>::initialize(void)
             H5F_ACC_RDONLY,
             H5P_DEFAULT);
 
-    this->niter_out = hdf5_tools::read_value<int>(
-            parameter_file, "/parameters/niter_out");
     H5Fclose(parameter_file);
     // the following ensures the file is free for rank 0 to open in read/write mode
     MPI_Barrier(this->comm);
diff --git a/cpp/full_code/resize.hpp b/cpp/full_code/resize.hpp
index 521c04618fa34433ee43a8f474320e5afec7606c..afb51cd83c8c74b479b1b819d430365be1d647c2 100644
--- a/cpp/full_code/resize.hpp
+++ b/cpp/full_code/resize.hpp
@@ -38,8 +38,6 @@ class resize: public NSVE_field_stats<rnumber>
         int new_ny;
         int new_nz;
 
-        int niter_out;
-
         field<rnumber, FFTW, THREE> *new_field;
 
         resize(
diff --git a/cpp/full_code/symmetrize_test.cpp b/cpp/full_code/symmetrize_test.cpp
index 6403bec61a2ca8ee2f6e0bd0a4019ebf417c96c6..a93153abe61cd25a65db883b2358f4d518302cf8 100644
--- a/cpp/full_code/symmetrize_test.cpp
+++ b/cpp/full_code/symmetrize_test.cpp
@@ -149,12 +149,12 @@ int symmetrize_test<rnumber>::do_work(void)
     // now compare the two fields
     max_diff = 0;
     k_at_max_diff = 0;
-    kk->CLOOP_K2(
-            [&](ptrdiff_t cindex,
-                ptrdiff_t xindex,
-                ptrdiff_t yindex,
-                ptrdiff_t zindex,
-                double k2){
+    kk->CLOOP(
+            [&](const ptrdiff_t cindex,
+                const ptrdiff_t xindex,
+                const ptrdiff_t yindex,
+                const ptrdiff_t zindex,
+                const double k2){
             double diff_re0 = test_field0->cval(cindex, 0, 0) - test_field1->cval(cindex, 0, 0);
             double diff_re1 = test_field0->cval(cindex, 1, 0) - test_field1->cval(cindex, 1, 0);
             double diff_re2 = test_field0->cval(cindex, 2, 0) - test_field1->cval(cindex, 2, 0);
@@ -235,12 +235,12 @@ int symmetrize_test<rnumber>::do_work(void)
     // now compare the two fields
     max_diff = 0;
     k_at_max_diff = 0;
-    kk->CLOOP_K2(
-            [&](ptrdiff_t cindex,
-                ptrdiff_t xindex,
-                ptrdiff_t yindex,
-                ptrdiff_t zindex,
-                double k2){
+    kk->CLOOP(
+            [&](const ptrdiff_t cindex,
+                const ptrdiff_t xindex,
+                const ptrdiff_t yindex,
+                const ptrdiff_t zindex,
+                const double k2){
             double diff_re0 = test_field0->cval(cindex, 0, 0) - test_field1->cval(cindex, 0, 0);
             double diff_re1 = test_field0->cval(cindex, 1, 0) - test_field1->cval(cindex, 1, 0);
             double diff_re2 = test_field0->cval(cindex, 2, 0) - test_field1->cval(cindex, 2, 0);
diff --git a/cpp/full_code/write_rpressure.hpp b/cpp/full_code/write_rpressure.hpp
index 38556fb20c0f46e9e62fd8a05e6b4ad614fe9ec6..eb61ca171c972c0160b28c00943fe662e59c94ba 100644
--- a/cpp/full_code/write_rpressure.hpp
+++ b/cpp/full_code/write_rpressure.hpp
@@ -34,8 +34,6 @@ template <typename rnumber>
 class write_rpressure: public NSVE_field_stats<rnumber>
 {
     public:
-        int niter_out;
-
         vorticity_equation<rnumber, FFTW> *ve;
         field<rnumber, FFTW, ONE> *pressure;
         field_binary_IO <rnumber, REAL, ONE> *bin_IO_scalar;
diff --git a/cpp/kspace.cpp b/cpp/kspace.cpp
index d199d721a6fcd145327064a750a420693879e923..8d0da816399f54efb8aa71a670cbda635b89a04b 100644
--- a/cpp/kspace.cpp
+++ b/cpp/kspace.cpp
@@ -138,7 +138,7 @@ kspace<be, dt>::kspace(
                 std::fill_n(nshell_local, this->nshells, 0);
             });
 
-    this->CLOOP_K2_NXMODES(
+    this->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -277,7 +277,7 @@ void kspace<be, dt>::low_pass(
         const double kmax)
 {
     const double km2 = kmax*kmax;
-    this->CLOOP_K2(
+    this->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -311,7 +311,7 @@ void kspace<be, dt>::ball_filter(
         const double ell)
 {
     const double prefactor0 = double(3) / std::pow(ell/2, 3);
-    this->CLOOP_K2(
+    this->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -405,7 +405,7 @@ void kspace<be, dt>::Gauss_n_filter(
         const double sigma)
 {
     const double prefactor = -std::pow(sigma, n)/2;
-    this->CLOOP_K2(
+    this->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -610,11 +610,11 @@ void kspace<be, dt>::dealias(typename fftw_interface<rnumber>::complex *__restri
             this->low_pass<rnumber, fc>(a, this->kM);
             break;
         case SMOOTH:
-            this->CLOOP_simd(
+            this->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
-                    const ptrdiff_t zindex){
+                    const ptrdiff_t zindex) {
                     const double kk2 = (std::pow(this->kx[xindex]/this->kMx, 2) +
                                         std::pow(this->ky[yindex]/this->kMy, 2) +
                                         std::pow(this->kz[zindex]/this->kMz, 2));
@@ -626,6 +626,77 @@ void kspace<be, dt>::dealias(typename fftw_interface<rnumber>::complex *__restri
     }
 }
 
+template <field_backend be,
+          kspace_dealias_type dt>
+template <typename rnumber>
+void kspace<be, dt>::project_divfree(
+        typename fftw_interface<rnumber>::complex *__restrict__ xa,
+        typename fftw_interface<rnumber>::complex *__restrict__ ya,
+        typename fftw_interface<rnumber>::complex *__restrict__ za,
+        const bool maintain_energy)
+{
+    TIMEZONE("kspace::project_divfree");
+    this->CLOOP(
+                [&](const ptrdiff_t cindex,
+                    const ptrdiff_t xindex,
+                    const ptrdiff_t yindex,
+                    const ptrdiff_t zindex,
+                    const double k2){
+                if (k2 > 0)
+        {
+                    const typename fftw_interface<rnumber>::complex tval = {
+                     rnumber((this->kx[xindex]*((*(xa + cindex))[0]) +
+                              this->ky[yindex]*((*(ya + cindex))[0]) +
+                              this->kz[zindex]*((*(za + cindex))[0]) ) / k2),
+                     rnumber((this->kx[xindex]*((*(xa + cindex))[1]) +
+                              this->ky[yindex]*((*(ya + cindex))[1]) +
+                              this->kz[zindex]*((*(za + cindex))[1]) ) / k2)};
+                    double initial_size = 1;
+                    double projected_size = 1;
+                    if (maintain_energy)
+                    {
+                        initial_size = std::sqrt(
+                                ((*(xa + cindex))[0])*((*(xa + cindex))[0]) +
+                                ((*(ya + cindex))[0])*((*(ya + cindex))[0]) +
+                                ((*(za + cindex))[0])*((*(za + cindex))[0]) +
+                                ((*(xa + cindex))[1])*((*(xa + cindex))[1]) +
+                                ((*(ya + cindex))[1])*((*(ya + cindex))[1]) +
+                                ((*(za + cindex))[1])*((*(za + cindex))[1]));
+                    }
+                    for (int imag_part=0; imag_part<2; imag_part++)
+                    {
+                        xa[cindex][imag_part] -= tval[imag_part]*this->kx[xindex];
+                        ya[cindex][imag_part] -= tval[imag_part]*this->ky[yindex];
+                        za[cindex][imag_part] -= tval[imag_part]*this->kz[zindex];
+                    }
+                    if (maintain_energy)
+                    {
+                        projected_size = std::sqrt(
+                                ((*(xa + cindex))[0])*((*(xa + cindex))[0]) +
+                                ((*(ya + cindex))[0])*((*(ya + cindex))[0]) +
+                                ((*(za + cindex))[0])*((*(za + cindex))[0]) +
+                                ((*(xa + cindex))[1])*((*(xa + cindex))[1]) +
+                                ((*(ya + cindex))[1])*((*(ya + cindex))[1]) +
+                                ((*(za + cindex))[1])*((*(za + cindex))[1]));
+                        if (projected_size > 0)
+                        #pragma omp simd
+                            for (int imag_part=0; imag_part<2; imag_part++)
+                            {
+                                (*(xa + cindex))[imag_part] *= initial_size / projected_size;
+                                (*(ya + cindex))[imag_part] *= initial_size / projected_size;
+                                (*(za + cindex))[imag_part] *= initial_size / projected_size;
+                            }
+                    }
+           }
+        }
+    );
+    if (this->layout->myrank == this->layout->rank[0][0]) {
+        std::fill_n((rnumber*)(xa), 2, 0.0);
+        std::fill_n((rnumber*)(ya), 2, 0.0);
+        std::fill_n((rnumber*)(za), 2, 0.0);
+    }
+}
+
 template <field_backend be,
           kspace_dealias_type dt>
 template <typename rnumber>
@@ -634,7 +705,7 @@ void kspace<be, dt>::project_divfree(
         const bool maintain_energy)
 {
     TIMEZONE("kspace::project_divfree");
-    this->CLOOP_K2(
+    this->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
@@ -702,7 +773,7 @@ template <typename rnumber>
 void kspace<be, dt>::rotate_divfree(typename fftw_interface<rnumber>::complex *__restrict__ a)
 {
     TIMEZONE("kspace::rotate_divfree");
-    this->CLOOP_K2_simd(
+    this->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
@@ -776,7 +847,7 @@ void kspace<be, dt>::cospectrum(
                 std::fill_n(spec_local, this->nshells*ncomp(fc)*ncomp(fc), 0);
             });
 
-    this->CLOOP_K2_NXMODES(
+    this->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -910,7 +981,7 @@ void kspace<be, dt>::cospectrum(
                 std::fill_n(spec_local, this->nshells*ncomp(fc)*ncomp(fc), 0);
             });
 
-    this->CLOOP_K2_NXMODES(
+    this->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -954,7 +1025,7 @@ double kspace<be, dt>::L2norm(
             1,
             shared_array_zero_initializer<double, 1>);
 
-    this->CLOOP_K2_NXMODES(
+    this->CLOOP(
             [&](const ptrdiff_t cindex,
                 const ptrdiff_t xindex,
                 const ptrdiff_t yindex,
@@ -1552,6 +1623,17 @@ template void kspace<FFTW, SMOOTH>::project_divfree<double>(
        typename fftw_interface<double>::complex *__restrict__ a,
        const bool);
 
+template void kspace<FFTW, SMOOTH>::project_divfree<float>(
+       typename fftw_interface<float>::complex *__restrict__ xa,
+       typename fftw_interface<float>::complex *__restrict__ ya,
+       typename fftw_interface<float>::complex *__restrict__ za,
+       const bool);
+template void kspace<FFTW, SMOOTH>::project_divfree<double>(
+       typename fftw_interface<double>::complex *__restrict__ xa,
+       typename fftw_interface<double>::complex *__restrict__ ya,
+       typename fftw_interface<double>::complex *__restrict__ za,
+       const bool);
+
 template void kspace<FFTW, SMOOTH>::force_divfree<float>(
        typename fftw_interface<float>::complex *__restrict__ a);
 template void kspace<FFTW, SMOOTH>::force_divfree<double>(
diff --git a/cpp/kspace.hpp b/cpp/kspace.hpp
index 8069ab0464e77c3da082e61219f7029dd66b2376..0344b19a094f141d997bc63ab0ac0a8652fccfe2 100644
--- a/cpp/kspace.hpp
+++ b/cpp/kspace.hpp
@@ -31,6 +31,8 @@
 #include "fftw_interface.hpp"
 #include "field_layout.hpp"
 
+#include <functional>
+
 enum field_backend {FFTW};
 enum kspace_dealias_type {ONE_HALF, TWO_THIRDS, SMOOTH};
 
@@ -158,21 +160,27 @@ class kspace
         double L2norm(
                 const rnumber(* __restrict__ a)[2]);
 
-        template <class func_type>
-        void CLOOP(func_type expression)
+        void CLOOP(std::function<void(const ptrdiff_t,
+                                      const ptrdiff_t,
+                                      const ptrdiff_t,
+                                      const ptrdiff_t)> expression)
         {
             start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
             #pragma omp parallel
             {
-                const hsize_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]);
-                const hsize_t end = OmpUtils::ForIntervalEnd(this->layout->subsizes[1]);
+                const ptrdiff_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]);
+                const ptrdiff_t end = OmpUtils::ForIntervalEnd(this->layout->subsizes[1]);
 
-                for (hsize_t yindex = 0; yindex < this->layout->subsizes[0]; yindex++){
-                    for (hsize_t zindex = start; zindex < end; zindex++){
+                for (ptrdiff_t yindex = 0;
+                     yindex < ptrdiff_t(this->layout->subsizes[0]);
+                     yindex++){
+                    for (ptrdiff_t zindex = start; zindex < end; zindex++){
                         const ptrdiff_t cindex = (
                                 yindex*this->layout->subsizes[1]*this->layout->subsizes[2] +
                                 zindex*this->layout->subsizes[2]);
-                        for (hsize_t xindex = 0; xindex < this->layout->subsizes[2]; xindex++)
+                        for (ptrdiff_t xindex = 0;
+                             xindex < ptrdiff_t(this->layout->subsizes[2]);
+                             xindex++)
                         {
                             expression(cindex + xindex, xindex, yindex, zindex);
                         }
@@ -181,44 +189,53 @@ class kspace
             }
             finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
         }
-        template <class func_type>
-        void CLOOP_simd(func_type expression)
+        void CLOOP(std::function<void(const ptrdiff_t)> expression)
         {
             start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
             #pragma omp parallel
             {
-                const hsize_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]);
-                const hsize_t end = OmpUtils::ForIntervalEnd(this->layout->subsizes[1]);
+                const ptrdiff_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]);
+                const ptrdiff_t end = OmpUtils::ForIntervalEnd(this->layout->subsizes[1]);
 
-                for (hsize_t yindex = 0; yindex < this->layout->subsizes[0]; yindex++){
-                    #pragma omp simd
-                    for (hsize_t zindex = start; zindex < end; zindex++){
+                for (ptrdiff_t yindex = 0;
+                     yindex < ptrdiff_t(this->layout->subsizes[0]);
+                     yindex++) {
+                    for (ptrdiff_t zindex = start; zindex < end; zindex++){
                         const ptrdiff_t cindex = (
                                 yindex*this->layout->subsizes[1]*this->layout->subsizes[2] +
                                 zindex*this->layout->subsizes[2]);
-                        for (hsize_t xindex = 0; xindex < this->layout->subsizes[2]; xindex++)
+                        for (ptrdiff_t xindex = 0;
+                             xindex < ptrdiff_t(this->layout->subsizes[2]);
+                             xindex++)
                         {
-                            expression(cindex + xindex, xindex, yindex, zindex);
+                            expression(cindex + xindex);
                         }
                     }
                 }
             }
             finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
         }
-        template <class func_type>
-        void CLOOP_K2(func_type expression)
+        void CLOOP(std::function<void(const ptrdiff_t,
+                                      const ptrdiff_t,
+                                      const ptrdiff_t,
+                                      const ptrdiff_t,
+                                      const double)> expression)
         {
             start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
             #pragma omp parallel
             {
-                const hsize_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]);
-                const hsize_t end = OmpUtils::ForIntervalEnd(this->layout->subsizes[1]);
+                const ptrdiff_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]);
+                const ptrdiff_t end = OmpUtils::ForIntervalEnd(this->layout->subsizes[1]);
 
-                for (hsize_t yindex = 0; yindex < this->layout->subsizes[0]; yindex++){
-                    for (hsize_t zindex = start; zindex < end; zindex++){
+                for (ptrdiff_t yindex = 0;
+                     yindex < ptrdiff_t(this->layout->subsizes[0]);
+                     yindex++) {
+                    for (ptrdiff_t zindex = start; zindex < end; zindex++){
                         const ptrdiff_t cindex = yindex*this->layout->subsizes[1]*this->layout->subsizes[2]
                                             + zindex*this->layout->subsizes[2];
-                        for (hsize_t xindex = 0; xindex < this->layout->subsizes[2]; xindex++)
+                        for (ptrdiff_t xindex = 0;
+                             xindex < ptrdiff_t(this->layout->subsizes[2]);
+                             xindex++)
                         {
                             expression(
                                     cindex+xindex,
@@ -234,54 +251,32 @@ class kspace
             }
             finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
         }
-        template <class func_type>
-        void CLOOP_K2_simd(func_type expression)
+        void CLOOP(std::function<void(const ptrdiff_t,
+                                      const ptrdiff_t,
+                                      const ptrdiff_t,
+                                      const ptrdiff_t,
+                                      const double,
+                                      const int)> expression)
         {
             start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
             #pragma omp parallel
             {
-                const hsize_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]);
-                const hsize_t end = OmpUtils::ForIntervalEnd(this->layout->subsizes[1]);
+                const ptrdiff_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]);
+                const ptrdiff_t end = OmpUtils::ForIntervalEnd(this->layout->subsizes[1]);
 
-                for (hsize_t yindex = 0; yindex < this->layout->subsizes[0]; yindex++){
-                    #pragma omp simd
-                    for (hsize_t zindex = start; zindex < end; zindex++){
-                        const ptrdiff_t cindex = yindex*this->layout->subsizes[1]*this->layout->subsizes[2]
-                                            + zindex*this->layout->subsizes[2];
-                        for (hsize_t xindex = 0; xindex < this->layout->subsizes[2]; xindex++)
-                        {
-                            expression(
-                                    cindex+xindex,
-                                    xindex,
-                                    yindex,
-                                    zindex,
-                                    (this->kx[xindex]*this->kx[xindex] +
-                                     this->ky[yindex]*this->ky[yindex] +
-                                     this->kz[zindex]*this->kz[zindex]));
-                        }
-                    }
-                }
-            }
-            finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
-        }
-        template <class func_type>
-        void CLOOP_K2_NXMODES(func_type expression)
-        {
-            start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
-            #pragma omp parallel
-            {
-                const hsize_t start = OmpUtils::ForIntervalStart(this->layout->subsizes[1]);
-                const hsize_t end = OmpUtils::ForIntervalEnd(this->layout->subsizes[1]);
-
-                for (hsize_t yindex = 0; yindex < this->layout->subsizes[0]; yindex++){
-                    for (hsize_t zindex = start; zindex < end; zindex++){
+                for (ptrdiff_t yindex = 0;
+                     yindex < ptrdiff_t(this->layout->subsizes[0]);
+                     yindex++) {
+                    for (ptrdiff_t zindex = start; zindex < end; zindex++){
                         const ptrdiff_t cindex = yindex*this->layout->subsizes[1]*this->layout->subsizes[2]
                                             + zindex*this->layout->subsizes[2];
                         const double k2 = (
                                 this->ky[yindex]*this->ky[yindex] +
                                 this->kz[zindex]*this->kz[zindex]);
                         expression(cindex, 0, yindex, zindex, k2, 1);
-                        for (hsize_t xindex = 1; xindex < this->layout->subsizes[2]; xindex++)
+                        for (ptrdiff_t xindex = 1;
+                             xindex < ptrdiff_t(this->layout->subsizes[2]);
+                             xindex++)
                         {
                             expression(cindex+xindex, xindex, yindex, zindex, k2 + this->kx[xindex]*this->kx[xindex], 2);
                         }
@@ -294,6 +289,12 @@ class kspace
         void project_divfree(
                 typename fftw_interface<rnumber>::complex *__restrict__ a,
                 const bool maintain_energy = false);
+        template <typename rnumber>
+        void project_divfree(
+                typename fftw_interface<rnumber>::complex *__restrict__ xa,
+                typename fftw_interface<rnumber>::complex *__restrict__ ya,
+                typename fftw_interface<rnumber>::complex *__restrict__ za,
+                const bool maintain_energy = false);
         // TODO: can the following be done in a cleaner way?
         template <typename rnumber>
         void force_divfree(typename fftw_interface<rnumber>::complex *__restrict__ a){
diff --git a/cpp/particles/interpolation/field_tinterpolator.hpp b/cpp/particles/interpolation/field_tinterpolator.hpp
index 3e005614c6ac351e663e61f25710fa1c2f3fdb65..abca8bfc0b11a8bda754936f31c4a177018f717f 100644
--- a/cpp/particles/interpolation/field_tinterpolator.hpp
+++ b/cpp/particles/interpolation/field_tinterpolator.hpp
@@ -101,9 +101,8 @@ class field_tinterpolator
                         // interpolation adds on top of existing values, so result must be cleared.
                         std::fill_n(result1, pset.getLocalNumberOfParticles()*ncomp(fc), 0);
                         pset.sample(*(this->field_list[1]), result1);
-                        // not using LOOP method because we want the inner loop over ncomp,
+                        // not using LOOP method below because we want the inner loop over ncomp,
                         // not over pset.getStateSize()
-                        DEBUG_MSG("t value in linear interpolation is %g\n", t);
                         for (partsize_t idx_part = 0; idx_part < pset.getLocalNumberOfParticles(); idx_part++)
                         {
                             for (unsigned int cc = 0; cc < ncomp(fc); cc++)
diff --git a/cpp/particles/particle_set_input_hdf5.hpp b/cpp/particles/particle_set_input_hdf5.hpp
index ec72f984d675dc97721af61a37323a432fcb63aa..af53f4fb33ffe646118b74b34b43f6794203134d 100644
--- a/cpp/particles/particle_set_input_hdf5.hpp
+++ b/cpp/particles/particle_set_input_hdf5.hpp
@@ -168,28 +168,38 @@ class particle_set_input_hdf5: public particle_set_input<partsize_t, particle_rn
             H5Sclose(fspace);
             H5Dclose(dataset);
 
-            // open label dataset
-            dataset = H5Dopen(
-                    particle_file,
-                    (species_name + std::string("/label/") + std::to_string(iteration)).c_str(),
-                    H5P_DEFAULT);
-            if (dataset >= 0 && dataset != H5E_ERROR) {
-                fspace = H5Screate_simple(1, fdims, NULL);
-                mspace = H5Screate_simple(1, mdims, NULL);
-                H5Sselect_hyperslab(fspace, H5S_SELECT_SET, foffset, NULL, mdims, NULL);
-                H5Dread(
-                        dataset,
-                        hdf5_tools::hdf5_type_id<partsize_t>(),
-                        mspace,
-                        fspace,
-                        H5P_DEFAULT,
-                        split_particle_label.get());
-                H5Sclose(mspace);
-                H5Sclose(fspace);
-                H5Dclose(dataset);
+            const std::string label_dset_name =
+                species_name + std::string("/label/");
+            if (H5Lexists(particle_file, label_dset_name.c_str(), H5P_DEFAULT) > 0)
+            {
+                // open label dataset
+                dataset = H5Dopen(
+                        particle_file,
+                        (label_dset_name + std::to_string(iteration)).c_str(),
+                        H5P_DEFAULT);
+                if (dataset >= 0 && dataset != H5E_ERROR) {
+                    fspace = H5Screate_simple(1, fdims, NULL);
+                    mspace = H5Screate_simple(1, mdims, NULL);
+                    H5Sselect_hyperslab(fspace, H5S_SELECT_SET, foffset, NULL, mdims, NULL);
+                    H5Dread(
+                            dataset,
+                            hdf5_tools::hdf5_type_id<partsize_t>(),
+                            mspace,
+                            fspace,
+                            H5P_DEFAULT,
+                            split_particle_label.get());
+                    H5Sclose(mspace);
+                    H5Sclose(fspace);
+                    H5Dclose(dataset);
+                } else {
+                    DEBUG_MSG("Could not read labels for species %s and iteration %d; H5Dopen returned %ld\n",
+                            species_name.c_str(), iteration, dataset);
+                    assert(dataset != H5E_ERROR);
+                    assert(dataset >= 0);
+                }
             } else {
-                DEBUG_MSG("Could not read labels for species %s and iteration %d; H5Dopen returned %ld\n",
-                        species_name.c_str(), iteration, dataset);
+                    DEBUG_MSG("Didn't find labels for species %s to read. Using default (label = index).\n",
+                            species_name.c_str());
             }
             delete[] mdims;
             delete[] fdims;
diff --git a/cpp/vorticity_equation.cpp b/cpp/vorticity_equation.cpp
index 40220ca1c57229dd4c9051e8fc49e242bd03d598..496b4434746489f9e492428d82dd9dc67da7df29 100644
--- a/cpp/vorticity_equation.cpp
+++ b/cpp/vorticity_equation.cpp
@@ -60,19 +60,31 @@ void vorticity_equation<rnumber, be>::update_checkpoint()
             hsize_t fields_stored;
             hid_t fid, group_id;
             fid = H5Fopen(fname.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
-            group_id = H5Gopen(fid, "vorticity/complex", H5P_DEFAULT);
-            H5Gget_num_objs(
-                    group_id,
-                    &fields_stored);
-            bool dset_exists = H5Lexists(
-                    group_id,
-                    std::to_string(this->iteration).c_str(),
+            bool vorticity_group_exists = H5Lexists(
+                    fid,
+                    "vorticity",
+                    H5P_DEFAULT);
+            if (vorticity_group_exists) {
+                bool complex_vorticity_group_exists = H5Lexists(
+                    fid,
+                    "vorticity/complex",
                     H5P_DEFAULT);
-            H5Gclose(group_id);
+                if (complex_vorticity_group_exists) {
+                    group_id = H5Gopen(fid, "vorticity/complex", H5P_DEFAULT);
+                    H5Gget_num_objs(
+                            group_id,
+                            &fields_stored);
+                    bool dset_exists = H5Lexists(
+                            group_id,
+                            std::to_string(this->iteration).c_str(),
+                            H5P_DEFAULT);
+                    H5Gclose(group_id);
+                    if ((int(fields_stored) >= this->checkpoints_per_file) &&
+                        !dset_exists)
+                        this->checkpoint++;
+                }
+            }
             H5Fclose(fid);
-            if ((int(fields_stored) >= this->checkpoints_per_file) &&
-                !dset_exists)
-                this->checkpoint++;
         }
         else
         {
@@ -172,7 +184,7 @@ void vorticity_equation<rnumber, be>::compute_vorticity()
 {
     TIMEZONE("vorticity_equation::compute_vorticity");
     this->cvorticity->real_space_representation = false;
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
@@ -201,7 +213,7 @@ void vorticity_equation<rnumber, be>::compute_velocity(field<rnumber, be, THREE>
     TIMEZONE("vorticity_equation::compute_velocity");
     *(this->u) = 0.0;
     this->u->real_space_representation = false;
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
@@ -337,7 +349,7 @@ void vorticity_equation<rnumber, be>::add_forcing(
                 1,
                 shared_array_zero_initializer<double, 1>);
         double energy_in_shell = 0;
-        this->kk->CLOOP_K2_NXMODES(
+        this->kk->CLOOP(
                     [&](const ptrdiff_t cindex,
                         const ptrdiff_t xindex,
                         const ptrdiff_t yindex,
@@ -354,8 +366,7 @@ void vorticity_equation<rnumber, be>::add_forcing(
                             vort_field->cval(cindex, 2, 0)*vort_field->cval(cindex, 2, 0) + vort_field->cval(cindex, 2, 1)*vort_field->cval(cindex, 2, 1)
                             ) / k2;
                     }
-        }
-        );
+        });
         local_energy_in_shell.mergeParallel();
         MPI_Allreduce(
                 local_energy_in_shell.getMasterData(),
@@ -418,7 +429,7 @@ void vorticity_equation<rnumber, be>::impose_forcing(
                 1,
                 shared_array_zero_initializer<double, 1>);
         double energy_in_shell, total_energy;
-        this->kk->CLOOP_K2_NXMODES(
+        this->kk->CLOOP(
                     [&](const ptrdiff_t cindex,
                         const ptrdiff_t xindex,
                         const ptrdiff_t yindex,
@@ -437,8 +448,7 @@ void vorticity_equation<rnumber, be>::impose_forcing(
                 if ((this->fk0 <= knorm) && (this->fk1 >= knorm))
                     *local_energy_in_shell.getMine() += mode_energy;
             }
-        }
-        );
+        });
         local_total_energy.mergeParallel();
         local_energy_in_shell.mergeParallel();
         MPI_Allreduce(
@@ -463,7 +473,7 @@ void vorticity_equation<rnumber, be>::impose_forcing(
         double temp_famplitude = std::sqrt(
                 (this->energy - total_energy + energy_in_shell)
                / energy_in_shell);
-        this->kk->CLOOP_K2(
+        this->kk->CLOOP(
                     [&](const ptrdiff_t cindex,
                         const ptrdiff_t xindex,
                         const ptrdiff_t yindex,
@@ -553,7 +563,7 @@ void vorticity_equation<rnumber, be>::Euler_step(double dt)
     TIMEZONE("vorticity_equation::step_Euler");
     *this->v[1] = 0.0;
     this->omega_nonlin(0, this->iteration*dt);
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
@@ -591,7 +601,7 @@ void vorticity_equation<rnumber, be>::step(double dt)
     TIMEZONE("vorticity_equation::step");
     *this->v[1] = 0.0;
     this->omega_nonlin(0, this->iteration*dt);
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
@@ -611,7 +621,7 @@ void vorticity_equation<rnumber, be>::step(double dt)
     this->kk->template force_divfree<rnumber>(this->v[1]->get_cdata());
 
     this->omega_nonlin(1, (this->iteration + 1)*dt);
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
@@ -635,7 +645,7 @@ void vorticity_equation<rnumber, be>::step(double dt)
     this->omega_nonlin(2, (this->iteration + 1./2.)*dt);
     // store old vorticity
     *this->v[1] = *this->v[0];
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
@@ -681,7 +691,8 @@ void vorticity_equation<rnumber, be>::compute_pressure(field<rnumber, be, ONE> *
             this->v[1]->rval(rindex,cc) = this->u->rval(rindex,cc)*this->u->rval(rindex,cc);
         });
     this->v[1]->dft();
-    this->kk->CLOOP_K2(
+    this->kk->template dealias<rnumber, THREE>(this->v[1]->get_cdata());
+    this->kk->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const hsize_t xindex,
                     const hsize_t yindex,
@@ -708,7 +719,8 @@ void vorticity_equation<rnumber, be>::compute_pressure(field<rnumber, be, ONE> *
             this->v[1]->rval(rindex,cc) = this->u->rval(rindex,cc)*this->u->rval(rindex,(cc+1)%3);
         });
     this->v[1]->dft();
-    this->kk->CLOOP_K2(
+    this->kk->template dealias<rnumber, THREE>(this->v[1]->get_cdata());
+    this->kk->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const hsize_t xindex,
                     const hsize_t yindex,
@@ -765,7 +777,7 @@ void vorticity_equation<rnumber, be>::compute_Lagrangian_acceleration(
         this->v[1],
         acceleration);
     // loop over all modes, adding the rest of the terms
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
@@ -802,7 +814,7 @@ void vorticity_equation<rnumber, be>::compute_Eulerian_acceleration(
     this->compute_velocity(this->cvorticity);
     acceleration->real_space_representation = false;
     /* put in linear terms */
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
@@ -847,7 +859,7 @@ void vorticity_equation<rnumber, be>::compute_Eulerian_acceleration(
     );
     this->v[1]->dft();
     this->kk->template dealias<rnumber, THREE>(this->v[1]->get_cdata());
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
@@ -886,7 +898,7 @@ void vorticity_equation<rnumber, be>::compute_Eulerian_acceleration(
     );
     this->v[1]->dft();
     this->kk->template dealias<rnumber, THREE>(this->v[1]->get_cdata());
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
                 [&](const ptrdiff_t cindex,
                     const ptrdiff_t xindex,
                     const ptrdiff_t yindex,
diff --git a/examples/correlation_length/test.py b/examples/correlation_length/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..617bf793fd38659d2915a3f4bda908fb83817ab8
--- /dev/null
+++ b/examples/correlation_length/test.py
@@ -0,0 +1,188 @@
+from TurTLE import DNS, PP
+
+import os
+import h5py
+import matplotlib.pyplot as plt
+import numpy as np
+
+def read_correlation(
+        group,
+        field_name = 'velocity',
+        components = '(0, 0, x)',
+        iteration = -1):
+    dataset = group['R^' + field_name + '_ij' + components][iteration]
+    Rij = dataset[:dataset.shape[0]//2]
+    Rij[1:] += dataset[dataset.shape[0]//2+1:dataset.shape[0]][::-1]
+    Rij[1:] /= 2
+    return Rij
+
+def compute_stat_possibilities(
+        cc,
+        iteration):
+    post_file = h5py.File(cc.simname + '_post.h5', 'r')
+    group = post_file['get_3D_correlations/correlations']
+    Rijx = read_correlation(group, components = '(0, 0, x)')
+    Rijy = read_correlation(group, components = '(0, y, 0)')
+    Rijz = read_correlation(group, components = '(z, 0, 0)')
+    post_file.close()
+    data_file = cc.get_data_file()
+    stat_index = iteration//cc.parameters['niter_stat']
+    # read latest slice data
+    cc.statistics['correlations/energy'] = data_file['statistics/moments/velocity'][
+              stat_index, 2, 3] / 2
+    cc.statistics['correlations/Uint'] = (2*(cc.statistics['correlations/energy']/3))**0.5
+    cc.statistics['correlations/R_LL'] = (Rijx[:, 0, 0] + Rijy[:, 1, 1] + Rijz[:, 2, 2])
+    cc.statistics['correlations/rho_LL'] = cc.statistics['correlations/R_LL'] / (2*cc.statistics['correlations/energy'])
+    cc.statistics['correlations/x'] = cc.get_coord('x')[:cc.parameters['nx']//2]
+    dx = cc.statistics['correlations/x'][1] - cc.statistics['correlations/x'][0]
+    cc.statistics['correlations/Lint'] = np.trapz(
+            cc.statistics['correlations/rho_LL'], dx = dx)
+    E_k = 0.5*(data_file['statistics/spectra/velocity_velocity'][stat_index, :, 0, 0]
+           + data_file['statistics/spectra/velocity_velocity'][stat_index, :, 1, 1]
+           + data_file['statistics/spectra/velocity_velocity'][stat_index, :, 2, 2])
+    # normalization factor is (4 pi * shell_width * kshell^2) / (nmodes in shell * dkx*dky*dkz)
+    norm_factor = (4*np.pi*cc.statistics['dk']*cc.statistics['kshell']**2) / (cc.parameters['dkx']*cc.parameters['dky']*cc.parameters['dkz']*cc.statistics['nshell'])
+    E_k_normalized = E_k * norm_factor
+    kval = np.round(cc.statistics['kshell'])
+    kval_normalized = cc.statistics['kshell']
+    # See Pope equations 6.211 and 6.216
+    for key in ['E11_k1_6.211_sum',
+                'E11_k1_6.211_trapz',
+                'E11_k1_6.211_normalized_sum',
+                'E11_k1_6.211_normalized_trapz',
+                'E11_k1_6.216_sum',
+                'E11_k1_6.216_trapz',
+                'E11_k1_6.216_normalized_sum',
+                'E11_k1_6.216_normalized_trapz']:
+        cc.statistics['correlations/' + key] = np.zeros_like(E_k)
+    for kk in range(kval.shape[0]-2):
+        qq = kval[kk] * cc.statistics['correlations/x']
+        cc.statistics['correlations/E11_k1_6.211_sum'][kk] = (
+                (2.0 / np.pi)
+              * np.sum(cc.statistics['correlations/R_LL'] * np.cos(qq))*dx)
+        cc.statistics['correlations/E11_k1_6.211_trapz'][kk] = (
+                (2.0 / np.pi)
+              * np.trapz(cc.statistics['correlations/R_LL'] * np.cos(qq), dx = dx))
+        qq = kval_normalized[kk] * cc.statistics['correlations/x']
+        cc.statistics['correlations/E11_k1_6.211_normalized_sum'][kk] = (
+                (2.0 / np.pi)
+              * np.sum(cc.statistics['correlations/R_LL'] * np.cos(qq))*dx)
+        cc.statistics['correlations/E11_k1_6.211_normalized_trapz'][kk] = (
+                (2.0 / np.pi)
+              * np.trapz(cc.statistics['correlations/R_LL'] * np.cos(qq), dx = dx))
+        prefactor = 3 # because R_LL = 3 * R_11
+        cc.statistics['correlations/E11_k1_6.216_trapz'][kk] = prefactor*np.trapz(
+                (E_k[kk+1:-2] / kval[kk+1:-2]) * (1 - kval[kk]**2 / kval[kk+1:-2]**2),
+                dx = cc.statistics['dk'])
+        cc.statistics['correlations/E11_k1_6.216_sum'][kk] = prefactor*np.sum(
+                (E_k[kk+1:-2] / kval[kk+1:-2]) * (1 - kval[kk]**2 / kval[kk+1:-2]**2))
+        cc.statistics['correlations/E11_k1_6.216_normalized_trapz'][kk] = prefactor*np.trapz(
+                (E_k_normalized[kk+1:-2] / kval_normalized[kk+1:-2])
+                * (1 - kval_normalized[kk]**2 / kval_normalized[kk+1:-2]**2),
+                kval_normalized[kk+1:-2])
+        cc.statistics['correlations/E11_k1_6.216_normalized_sum'][kk] = prefactor*np.sum(
+                (E_k_normalized[kk+1:-2] / kval_normalized[kk+1:-2])
+                * (1 - kval_normalized[kk]**2 / kval_normalized[kk+1:-2]**2))
+    for key in ['R_LL_from_spectrum_sum',
+                'R_LL_from_spectrum_trapz',
+                'R_LL_from_normalized_spectrum_sum',
+                'R_LL_from_normalized_spectrum_trapz']:
+        cc.statistics['correlations/' + key] = np.zeros_like(
+            cc.statistics['correlations/R_LL'])
+        cc.statistics['correlations/' + key][0] = np.sum(E_k)*2
+    for ii in range(1, cc.statistics['correlations/R_LL'].shape[0]):
+        rr = cc.statistics['correlations/x'][ii]
+        qq = kval[1:-2] * rr
+        cc.statistics['correlations/R_LL_from_spectrum_sum'][ii] = 2 * 3 * np.sum(
+                E_k[1:-2]
+                * (np.sin(qq) - qq * np.cos(qq))
+                / qq**3)
+        cc.statistics['correlations/R_LL_from_spectrum_trapz'][ii] = 2 * 3 * np.trapz(
+                E_k[1:-2]
+                * (np.sin(qq) - qq * np.cos(qq))
+                / qq**3,
+                dx = cc.statistics['dk'])
+        qq = kval_normalized[1:-2] * rr
+        cc.statistics['correlations/R_LL_from_normalized_spectrum_sum'][ii] = 2 * 3 * np.sum(
+                E_k_normalized[1:-2]
+                * (np.sin(qq) - qq * np.cos(qq))
+                / qq**3)
+        cc.statistics['correlations/R_LL_from_normalized_spectrum_trapz'][ii] = 2 * 3 * np.trapz(
+                E_k_normalized[1:-2]
+                * (np.sin(qq) - qq * np.cos(qq))
+                / qq**3,
+                kval_normalized[1:-2])
+    kshell = cc.statistics['kshell']
+    cc.statistics['correlations/Lint_from_spectrum'] = (
+            (np.pi /
+              (2*cc.statistics['correlations/Uint']**2)) *
+             np.trapz(E_k_normalized[:-2] /
+                      np.maximum(1, kshell[:-2]),
+                      kshell[:-2]))
+    return None
+
+def main():
+    simname = 'fbM_N0128_kMeta1.5_tf2'
+    if not os.path.exists(simname + '.h5'):
+        print('please download ' + simname + ' data from shared folder')
+    if not os.path.exists(simname + '_post.h5'):
+        c = PP()
+        c.launch([
+            'get_3D_correlations',
+            '--simname', simname,
+            '--full_snapshot_output', '1',
+            '--iter0', '8192',
+            '--iter1', '8192'])
+
+    c = DNS(work_dir = './',
+            simname = simname)
+    c.compute_statistics()
+    compute_stat_possibilities(c, 8192)
+
+    f = plt.figure()
+    a = f.add_subplot(111)
+    a.plot(c.statistics['correlations/x'],
+           c.statistics['correlations/R_LL'])
+    for kk in ['R_LL_from_spectrum_sum',
+               'R_LL_from_spectrum_trapz',
+               'R_LL_from_normalized_spectrum_sum',
+               'R_LL_from_normalized_spectrum_trapz']:
+        a.plot(c.statistics['correlations/x'],
+               c.statistics['correlations/' + kk])
+    f.savefig('test.pdf')
+    plt.close(f)
+    return None
+
+def main0():
+    simname = 'fbM_N0128_kMeta1.5_tf2'
+    if not os.path.exists(simname + '.h5'):
+        print('please download ' + simname + ' data from shared folder')
+    if not os.path.exists(simname + '_post.h5'):
+        c = PP()
+        c.launch([
+            'get_3D_correlations',
+            '--simname', simname,
+            '--full_snapshot_output', '1',
+            '--iter0', '8192',
+            '--iter1', '8192'])
+
+    data_file = h5py.File(simname + '_post.h5', 'r')
+    group = data_file['get_3D_correlations/correlations']
+    f = plt.figure()
+    a = f.add_subplot(111)
+    # read latest slice data
+    Rijx = read_correlation(group, components = '(0, 0, x)')
+    Rijy = read_correlation(group, components = '(0, y, 0)')
+    Rijz = read_correlation(group, components = '(z, 0, 0)')
+    Rll = (Rijx[:, 0, 0] + Rijy[:, 1, 1] + Rijz[:, 2, 2])/3
+    Rnn = (Rijx[:, 0, 1] + Rijy[:, 1, 2] + Rijz[:, 2, 0]
+         + Rijx[:, 0, 2] + Rijy[:, 1, 0] + Rijz[:, 2, 1])/6
+    a.plot(Rll)
+    a.plot(Rnn)
+    f.savefig('test.pdf')
+    plt.close(f)
+    data_file.close()
+    return None
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/ou_solver/ornstein_uhlenbeck_process.cpp b/examples/ou_solver/ornstein_uhlenbeck_process.cpp
index 1aceae469f8bd3aaa63907a4d3ca3570ce0c7d29..0e2990bf7d38e794bef288b5e97c36aee28b676c 100644
--- a/examples/ou_solver/ornstein_uhlenbeck_process.cpp
+++ b/examples/ou_solver/ornstein_uhlenbeck_process.cpp
@@ -88,7 +88,7 @@ void ornstein_uhlenbeck_process<rnumber,be>::step(
 {
     // put in "CFL"-criterium TODO!!!
     TIMEZONE("ornstein_uhlenbeck_process::step");
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
                 [&](ptrdiff_t cindex,
                     ptrdiff_t xindex,
                     ptrdiff_t yindex,
@@ -206,7 +206,7 @@ void ornstein_uhlenbeck_process<rnumber, be>::set_from_field(
 {
   assert(src->real_space_representation==false);
 
-  this->kk->CLOOP_K2(
+  this->kk->CLOOP(
           [&](ptrdiff_t cindex,
             ptrdiff_t xindex,
             ptrdiff_t yindex,
@@ -236,7 +236,7 @@ void ornstein_uhlenbeck_process<rnumber, be>::strip_from_field(
 {
   assert(src->real_space_representation==false);
 
-  this->kk->CLOOP_K2(
+  this->kk->CLOOP(
           [&](ptrdiff_t cindex,
             ptrdiff_t xindex,
             ptrdiff_t yindex,
@@ -270,7 +270,7 @@ void ornstein_uhlenbeck_process<rnumber,be>::add_to_field_replace(
   if (uv == "vorticity") field_to_replace = this->ou_field_vort;
   else field_to_replace = this->ou_field;
 
-  this->kk->CLOOP_K2(
+  this->kk->CLOOP(
           [&](ptrdiff_t cindex,
             ptrdiff_t xindex,
             ptrdiff_t yindex,
@@ -298,7 +298,7 @@ void ornstein_uhlenbeck_process<rnumber,be>::add_to_field_replace(
 template <class rnumber, field_backend be>
 void ornstein_uhlenbeck_process<rnumber,be>::calc_ou_vorticity(void)
 {
-  this->kk->CLOOP_K2(
+  this->kk->CLOOP(
               [&](ptrdiff_t cindex,
                   ptrdiff_t xindex,
                   ptrdiff_t yindex,
@@ -323,7 +323,7 @@ void ornstein_uhlenbeck_process<rnumber,be>::calc_ou_vorticity(void)
 template <class rnumber, field_backend be>
 void ornstein_uhlenbeck_process<rnumber,be>::calc_ou_velocity(void)
 {
-  this->kk->CLOOP_K2(
+  this->kk->CLOOP(
               [&](ptrdiff_t cindex,
                   ptrdiff_t xindex,
                   ptrdiff_t yindex,
diff --git a/examples/ou_solver/ou_vorticity_equation.cpp b/examples/ou_solver/ou_vorticity_equation.cpp
index cd1bc87d4975a79b26e15821b6d5082736a4374d..7f652939f1ca15d6ddf518702842efb2dafb771f 100644
--- a/examples/ou_solver/ou_vorticity_equation.cpp
+++ b/examples/ou_solver/ou_vorticity_equation.cpp
@@ -20,7 +20,7 @@ void ou_vorticity_equation<rnumber, be>::step(double dt)
     this->ou->add_to_field_replace(this->cvorticity, "vorticity");
     *this->v[1] = 0.0;
     this->omega_nonlin(0);
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
                 [&](ptrdiff_t cindex,
                     ptrdiff_t xindex,
                     ptrdiff_t yindex,
@@ -39,7 +39,7 @@ void ou_vorticity_equation<rnumber, be>::step(double dt)
     );
 
     this->omega_nonlin(1);
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
                 [&](ptrdiff_t cindex,
                     ptrdiff_t xindex,
                     ptrdiff_t yindex,
@@ -62,7 +62,7 @@ void ou_vorticity_equation<rnumber, be>::step(double dt)
     this->omega_nonlin(2);
     // store old vorticity
     *this->v[1] = *this->v[0];
-    this->kk->CLOOP_K2(
+    this->kk->CLOOP(
                 [&](ptrdiff_t cindex,
                     ptrdiff_t xindex,
                     ptrdiff_t yindex,
diff --git a/gitlab-ci/config.yaml b/gitlab-ci/config.yaml
index 50c02f97b2fa9fa42a22e058a0ea07471e49383e..87579ed778d4fa3c14fcbea03a2253fe3b7ea70a 100644
--- a/gitlab-ci/config.yaml
+++ b/gitlab-ci/config.yaml
@@ -60,7 +60,14 @@ stages:
     rules:
       - if: $CI_COMMIT_TAG
         when: never
+      - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
+        when: never
       - if: $CI_COMMIT_BRANCH
+      - changes:
+        - "**/CMakeLists.txt"
+        - "**/*.hpp.in"
+        - "**/*.hpp"
+        - "**/*.cpp"
 
 .build-intel:
     stage: build
@@ -78,7 +85,14 @@ stages:
     rules:
       - if: $CI_COMMIT_TAG
         when: never
+      - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
+        when: never
       - if: $CI_COMMIT_BRANCH
+      - changes:
+        - "**/CMakeLists.txt"
+        - "**/*.hpp.in"
+        - "**/*.hpp"
+        - "**/*.cpp"
 
 
 .test-gcc:
@@ -90,7 +104,14 @@ stages:
     rules:
       - if: $CI_COMMIT_TAG
         when: never
+      - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
+        when: never
       - if: $CI_COMMIT_BRANCH
+      - changes:
+        - "**/CMakeLists.txt"
+        - "**/*.hpp.in"
+        - "**/*.hpp"
+        - "**/*.cpp"
 
 .test-intel:
     stage: test
@@ -102,7 +123,14 @@ stages:
     rules:
       - if: $CI_COMMIT_TAG
         when: never
+      - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
+        when: never
       - if: $CI_COMMIT_BRANCH
+      - changes:
+        - "**/CMakeLists.txt"
+        - "**/*.hpp.in"
+        - "**/*.hpp"
+        - "**/*.cpp"
 
 .build-doc-base:
     stage: build