diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36eacbb7eb9964e076a230834f1415b38c86a3ab
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,309 @@
+#######################################################################
+#                                                                     #
+#  Copyright 2019 Max Planck Institute                                #
+#                 for Dynamics and Self-Organization                  #
+#                                                                     #
+#  This file is part of bfps.                                         #
+#                                                                     #
+#  bfps is free software: you can redistribute it and/or modify       #
+#  it under the terms of the GNU General Public License as published  #
+#  by the Free Software Foundation, either version 3 of the License,  #
+#  or (at your option) any later version.                             #
+#                                                                     #
+#  bfps is distributed in the hope that it will be useful,            #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
+#  GNU General Public License for more details.                       #
+#                                                                     #
+#  You should have received a copy of the GNU General Public License  #
+#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
+#                                                                     #
+# Contact: Cristian.Lalescu@ds.mpg.de                                 #
+#                                                                     #
+#######################################################################
+
+
+
+cmake_minimum_required(VERSION 3.10)
+cmake_policy(VERSION 3.12)
+
+if (DEFINED ENV{MPICXX})
+    message(STATUS "Using CMAKE_CXX_COMPILER=MPICXX")
+    set(CMAKE_CXX_COMPILER $ENV{MPICXX})
+else()
+    message(STATUS "MPICXX environment variable undefined, trying to find MPI")
+    set(MPI_STATIC ON)
+    find_package(MPI REQUIRED)
+endif()
+
+if (DEFINED ENV{MPICC})
+    set(CMAKE_C_COMPILER $ENV{MPICC})
+    message(STATUS "Using CMAKE_C_COMPILER=MPICC")
+endif()
+
+if (DEFINED ENV{CMAKE_INSTALL_PREFIX})
+    set(CMAKE_INSTALL_PREFIX $ENV{CMAKE_INSTALL_PREFIX})
+endif()
+
+project(BFPS)
+
+execute_process(COMMAND python ${PROJECT_SOURCE_DIR}/get_version.py OUTPUT_VARIABLE BFPS_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+project(BFPS
+        VERSION ${BFPS_VERSION}
+        LANGUAGES CXX)
+
+
+set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/morse ${CMAKE_MODULE_PATH})
+set(BFPS_LIBS "")
+
+#####################################################################################
+## MPI
+
+set(CMAKE_CXX_COMPILE_FLAGS "${CMAKE_CXX_COMPILE_FLAGS} ${MPI_CXX_COMPILE_OPTIONS}")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MPI_CXX_LINK_FLAGS}")
+include_directories(${MPI_CXX_INCLUDE_DIRS})
+add_definitions(${MPI_CXX_COMPILE_DEFINITIONS})
+list(APPEND BFPS_LIBS "${MPI_CXX_LIBRARIES}")
+
+#####################################################################################
+## CXX Standard
+
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+# set(CMAKE_CXX_EXTENSIONS OFF)
+
+#####################################################################################
+## OpenMP
+
+find_package(OpenMP REQUIRED)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
+list(APPEND BFPS_LIBS "${OpenMP_CXX_LIB_NAMES}")
+
+#####################################################################################
+## Extra flags
+
+set(CMAKE_CXX_COMPILE_FLAGS "${CMAKE_CXX_COMPILE_FLAGS} $ENV{BFPS_OPTIMIZATION_FLAGS} -Wall -g")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_COMPILE_FLAGS}")
+
+#####################################################################################
+## HDF5
+
+set(HDF5_STATIC ON)
+if(NOT DEFINED ENV{HDF5_ROOT})
+    message(WARNING "The environment variable HDF5_ROOT is undefined, this might cause trouble in finding the HDF5")
+endif()
+
+set(HDF5_PREFER_PARALLEL TRUE)
+find_package(HDF5 REQUIRED)
+
+message(STATUS "HDF5_C_INCLUDE_DIRS ${HDF5_C_INCLUDE_DIRS}")
+
+include_directories(${HDF5_C_INCLUDE_DIRS})
+add_definitions(${HDF5_C_DEFINITIONS})
+list(APPEND BFPS_LIBS "${HDF5_C_LIBRARIES}")
+
+option(BFPS_HDF5_USE_SZIP "Set to on to also link against SZIP" OFF)
+
+if(BFPS_HDF5_USE_SZIP)
+    option(BFPS_HDF5_SZIP_LIB_PATH "Additional lib path for SZIP" "")
+    if(BFPS_HDF5_SZIP_LIB_PATH)
+        link_directories(${BFPS_HDF5_SZIP_LIB_PATH})
+    endif()
+    list(APPEND BFPS_LIBS "z")
+endif()
+
+#####################################################################################
+## FFTW
+
+set(FFTW_STATIC ON)
+if(NOT DEFINED ENV{FFTW_DIR})
+    message(WARNING "The environment variable FFTW_DIR is undefined, this might cause trouble in finding the FFTW")
+endif()
+
+find_package(FFTW REQUIRED OMP)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FFTW_CFLAGS_OTHER}")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FFTW_LDFLAGS_OTHER}")
+list(APPEND BFPS_LIBS "${FFTW_LIBRARIES}")
+include_directories(${FFTW_INCLUDE_DIRS})
+link_directories(${FFTW_LIBRARY_DIRS})
+
+find_package(FFTW REQUIRED OMP SIMPLE)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FFTW_CFLAGS_OTHER}")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FFTW_LDFLAGS_OTHER}")
+list(APPEND BFPS_LIBS "${FFTW_LIBRARIES}")
+include_directories(${FFTW_INCLUDE_DIRS})
+link_directories(${FFTW_LIBRARY_DIRS})
+
+# hack for FFTW MPI libs
+find_library(
+    FFTWF_MPI fftw3f_mpi
+    HINTS ${FFTW_LIBRARY_DIRS})
+set(BFPS_LIBS ${FFTWF_MPI} ${BFPS_LIBS})
+find_library(
+    FFTW_MPI fftw3_mpi
+    HINTS ${FFTW_LIBRARY_DIRS})
+set(BFPS_LIBS ${FFTW_MPI} ${BFPS_LIBS})
+
+
+#####################################################################################
+## Get the links and include from deps
+
+get_property(ALL_INCLUDE_DIRS DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES)
+get_property(ALL_LINK_DIRS DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY LINK_DIRECTORIES)
+
+#####################################################################################
+## Build the lib
+
+include_directories(${PROJECT_SOURCE_DIR}/cpp)
+
+#file(GLOB_RECURSE cpp_for_lib ${PROJECT_SOURCE_DIR}/*.cpp)
+set(cpp_for_lib
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/code_base.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/direct_numerical_simulation.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVE.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/joint_acc_vel_stats.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/test.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/filter_test.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/field_test.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/symmetrize_test.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/field_output_test.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/get_rfields.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/field_single_to_double.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/resize.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVE_field_stats.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/native_binary_to_hdf5.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/postprocess.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/field.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/kspace.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/field_layout.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/hdf5_tools.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/fftw_tools.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/vorticity_equation.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/field_binary_IO.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n1.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n2.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n3.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n4.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n5.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n6.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n7.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n8.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n9.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n10.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/Lagrange_polys.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/scope_timer.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/test_interpolation.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVEparticles.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVEcomplex_particles.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVEp_extra_sampling.cpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/particles_inner_computer.cpp)
+set(hpp_for_lib
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/code_base.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/direct_numerical_simulation.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVE.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/joint_acc_vel_stats.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/test.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/filter_test.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/field_test.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/symmetrize_test.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/field_output_test.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/get_rfields.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/field_single_to_double.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/resize.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVE_field_stats.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/native_binary_to_hdf5.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/postprocess.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/field.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/kspace.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/field_layout.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/hdf5_tools.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/fftw_tools.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/vorticity_equation.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/field_binary_IO.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n1.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n2.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n3.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n4.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n5.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n6.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n7.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n8.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n9.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline_n10.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/Lagrange_polys.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/scope_timer.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/test_interpolation.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVEparticles.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVEcomplex_particles.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVEp_extra_sampling.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/particles_inner_computer.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/abstract_particles_input.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/abstract_particles_output.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/abstract_particles_system.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/alltoall_exchanger.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/env_utils.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/lock_free_bool_array.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/p2p_computer_empty.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/p2p_computer.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/p2p_distr_mpi.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/p2p_tree.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/particles_adams_bashforth.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/particles_distr_mpi.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/particles_field_computer.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/particles_generic_interp.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/particles_inner_computer_empty.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/particles_input_hdf5.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/particles_output_hdf5.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/particles_output_mpiio.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/particles_output_sampling_hdf5.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/particles_sampling.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/particles_system_builder.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/particles_system.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/particles/particles_utils.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/main_code.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/codes_with_no_output.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVE_no_output.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/full_code/NSVEparticles_no_output.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/base.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/fftw_interface.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/bfps_timer.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/omputils.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/shared_array.hpp
+    ${PROJECT_SOURCE_DIR}/cpp/spline.hpp
+    )
+#file(GLOB_RECURSE hpp_for_lib ${PROJECT_SOURCE_DIR}/*.hpp)
+LIST(APPEND source_files ${hpp_for_lib} ${cpp_for_lib})
+
+add_library(bfps ${source_files})
+
+target_link_libraries(bfps ${BFPS_LIBS})
+
+install(TARGETS bfps EXPORT BFPS_EXPORT DESTINATION lib/ )
+install(DIRECTORY ${PROJECT_SOURCE_DIR}/cpp/ DESTINATION include/bfps/ FILES_MATCHING PATTERN "*.h*")
+
+#####################################################################################
+##Â Export the configuration
+
+configure_file(${PROJECT_SOURCE_DIR}/cmake/BFPSConfig.cmake.in ${PROJECT_BINARY_DIR}/BFPSConfig.cmake @ONLY)
+
+install(FILES "${PROJECT_BINARY_DIR}/BFPSConfig.cmake" DESTINATION lib/)
+export(TARGETS bfps FILE "${PROJECT_BINARY_DIR}/BFPSLibraryDepends.cmake")
+install(EXPORT BFPS_EXPORT DESTINATION lib/)
+
+
+#####################################################################################
+##Â Install the python wrapper
+# copy command
+install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E copy_directory ${PROJECT_SOURCE_DIR}/bfps ${PROJECT_BINARY_DIR}/python/bfps/)")
+if(EXISTS "${PROJECT_SOURCE_DIR}/host_info.py")
+    install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/host_info.py ${PROJECT_BINARY_DIR}/python/bfps/)")
+else()
+    install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/pc_host_info.py ${PROJECT_BINARY_DIR}/python/bfps/host_info.py)")
+endif()
+install(CODE "execute_process(COMMAND python ${PROJECT_SOURCE_DIR}/setup.py install --force --prefix=${CMAKE_INSTALL_PREFIX} WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/python/)")
+
diff --git a/README.rst b/README.rst
index ddb9f2447db919248100368a9a08b13297d5e3a4..7dc457a7b00c99feec225d73b446ead083ef0a00 100644
--- a/README.rst
+++ b/README.rst
@@ -58,22 +58,10 @@ Use a console; navigate to the ``bfps`` folder, and type:
 **Full installation**
 
 If you want to run simulations on the machine where you're installing,
-you will need to call `compile_library` before installing.
+you will need to use `cmake` to compile and install the full library.
 Your machine needs to have an MPI compiler installed, the HDF5 C library
-and FFTW >= 3.4.
-The file `machine_settings_py.py` should be modified
-appropriately for your machine (otherwise the `compile_library` command will most
-likely fail).
-This file will be copied the first time you run `setup.py` into
-`$HOME/.config/bfps/machine_settings.py`, **where it will be imported from
-afterwards** --- any future edits **must** be made to the new file.
-You may, obviously, edit it afterwards and rerun the `compile_library` command as
-needed.
-
-.. code:: bash
-
-    python setup.py compile_library
-    python setup.py install
+and FFTW >= 3.4 --- detailed instructions are
+included at the end of this document.
 
 -------------
 Documentation
@@ -82,8 +70,8 @@ Documentation
 While the code is not fully documented yet, basic information is already
 available, and it is recommended that you generate the manual and go
 through it carefully.
-Please don't be shy about asking for specific improvements to the
-current text.
+Please do ask for specific improvements to the current text where it is
+found lacking.
 In order to generate the manual, navigate to the repository folder, and
 execute the following commands:
 
@@ -99,10 +87,113 @@ type ``make html`` instead of ``make latexpdf``.
 Comments
 --------
 
+* the `cmake` folder contains files extracted from
+  https://gitlab.inria.fr/solverstack/morse_cmake, a separate project licensed
+  under the "CeCILL-C" license, please see
+  http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.html for
+  details.
+
 * particles: initialization of multistep solvers is done with lower
   order methods, so direct convergence tests will fail.
 
-* Code is used mainly with Python 3.4 and 3.5.
-  In principle it should be easy to maintain compatibility with Python
-  2.7.x, but as of `bfps 1.8` this is no longer a main concern.
+* code is only known to work with HDF5 1.8.x.
+
+* code is used mainly with Python 3.5 and later, and it is not tested at
+  all with Python 2.x
+
+-------------------------------
+Installation with prerequisites
+-------------------------------
+
+These installation steps assume that you have a working MPI compiler,
+properly configured on your system (i.e. the various configure scripts
+are able to find it), as well as the `cmake` tool.
+We recommend to specify the desired MPI C++ compiler by exporting the
+environment variable `MPICXX` --- the BFPS cmake configuration looks for
+this variable.
+We also recommend that an environment variable `BFPS_OPTIMIZATION_FLAGS`
+is defined appropriately.
+In particular, for clusters of unknown architecture it helps to log into
+individual nodes and run the following command:
+
+.. code:: bash
+
+    gcc -march=native -Q --help=target
+
+Detailed full installation instructions:
+
+1. Make directory PREFIX on a local fast partition.
+
+2. Download, compile, install FFTW (latest version 3.x from http://www.fftw.org/).
+   Execute the following commands in order, feel free to customize
+   optimisation flags for your own computer (see http://www.fftw.org/fftw3_doc/Installation-on-Unix.html):
+
+    .. code:: bash
+
+        ./configure --prefix=PREFIX --enable-float --enable-sse --enable-mpi --enable-openmp --enable-threads
+        make
+        make install
+        ./configure --prefix=PREFIX  --enable-sse2 --enable-avx512 --enable-mpi --enable-openmp --enable-threads
+        make
+        make install
+
+   BFPS will try to find FFTW using the FindFFTW from the Morse project.
+   If the package is installed in a non standard location, it is recommanded
+   to setup the environment variables: `FFTW_DIR` (or `FFTW_INCDIR` and `FFTW_LIBDIR`).
+
+3. Download, compile, install HDF5 (version 1.8.x, currently available
+   at https://portal.hdfgroup.org/display/support/HDF5+1.8.20#files).
+   We are using parallel I/O, therefore we must use the plain C interface of HDF5:
+
+    .. code:: bash
+
+        ./configure --prefix=PREFIX --enable-parallel
+        make
+        make install
+
+   BFPS will try to find HDF5 using the regular FindHDF5.
+   Therefore, if the package is installed in a non standard location, it is recommanded
+   to setup the environment variable: HDF5_ROOT.
+
+3. Optional.
+   We recommend the creation of a virtual python3 environment (also under PREFIX) that will be used for installing bfps and dependencies.
+   Please see https://docs.python-guide.org/dev/virtualenvs/.
+
+4. Clone bfps repository.
+
+    .. code:: bash
+
+        git clone git@gitlab.mpcdf.mpg.de:clalescu/bfps.git
+
+5. Go into bfps repository, execute
+
+    .. code:: bash
+
+        mkdir build
+        cd build
+        cmake ..
+        # possibly : cmake .. -DCMAKE_INSTALL_PREFIX=INSTALL_DIR
+        make
+        # to get a verbose compilation process, use
+        VERBOSE=1 make
+        make install
+
+6. If you used a custom install location (i.e. `CMAKE_INSTALL_PREFIX`)
+   you must include this location in the environment variable
+   `CMAKE_PREFIX_PATH`.
+   This ensures that the required `BFPSConfig.cmake` file is accessible for
+   future use by the package.
+
+7. Using BFPS from an external project.
+   BFPS creates and installs 3 files alongside the C++ headers and
+   library:
+
+    .. code:: bash
+
+        -- Installing: install/lib/BFPSConfig.cmake
+        -- Installing: install/lib/BFPS_EXPORT.cmake
+        -- Installing: install/lib/BFPS_EXPORT-noconfig.cmake
+
+   In case these files provide incomplete information, it is necessary to update
+   the cmake input config file: bfps/cmake/BFPSConfig.cmake.in.
 
diff --git a/bfps/DNS.py b/bfps/DNS.py
index 4f26b86c5d4739e1bb3989f2e4a7d9a70ad3f009..bb4385458d8ce44a18ca1d18d88d5192aee147c6 100644
--- a/bfps/DNS.py
+++ b/bfps/DNS.py
@@ -1,26 +1,25 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
+################################################################################
+#                                                                              #
+#  Copyright 2015-2019 Max Planck Institute for Dynamics and Self-Organization #
+#                                                                              #
+#  This file is part of bfps.                                                  #
+#                                                                              #
+#  bfps is free software: you can redistribute it and/or modify                #
+#  it under the terms of the GNU General Public License as published           #
+#  by the Free Software Foundation, either version 3 of the License,           #
+#  or (at your option) any later version.                                      #
+#                                                                              #
+#  bfps is distributed in the hope that it will be useful,                     #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+#  GNU General Public License for more details.                                #
+#                                                                              #
+#  You should have received a copy of the GNU General Public License           #
+#  along with bfps.  If not, see <http://www.gnu.org/licenses/>                #
+#                                                                              #
+# Contact: Cristian.Lalescu@ds.mpg.de                                          #
+#                                                                              #
+################################################################################
 
 
 
@@ -50,13 +49,8 @@ class DNS(_code):
                 self,
                 work_dir = work_dir,
                 simname = simname)
-        self.host_info = {'type'        : 'cluster',
-                          'environment' : None,
-                          'deltanprocs' : 1,
-                          'queue'       : '',
-                          'mail_address': '',
-                          'mail_events' : None}
         self.generate_default_parameters()
+        self.statistics = {}
         return None
     def set_precision(
             self,
@@ -78,7 +72,8 @@ class DNS(_code):
             self.C_field_dtype = 'double'
             self.fluid_precision = 'double'
         return None
-    def write_src(self):
+    def write_src(
+            self):
         self.version_message = (
                 '/***********************************************************************\n' +
                 '* this code automatically generated by bfps\n' +
@@ -114,30 +109,11 @@ class DNS(_code):
         with open(self.name + '.cpp', 'w') as outfile:
             outfile.write(self.version_message + '\n\n')
             outfile.write(self.includes + '\n\n')
-            outfile.write(
-                    self.cread_pars(
-                       template_class = '{0}<rnumber>::'.format(self.dns_type),
-                        template_prefix = 'template <typename rnumber> ',
-                        simname_variable = 'this->simname.c_str()',
-                        prepend_this = True) +
-                    '\n\n')
-            for rnumber in ['float', 'double']:
-                outfile.write(self.cread_pars(
-                    template_class = '{0}<{1}>::'.format(self.dns_type, rnumber),
-                    template_prefix = 'template '.format(rnumber),
-                    just_declaration = True) + '\n\n')
-            if self.dns_type in ['NSVEparticles', 'NSVE_no_output', 'NSVEparticles_no_output']:
-                outfile.write('template <typename rnumber> int NSVE<rnumber>::read_parameters(){return EXIT_SUCCESS;}\n')
-                outfile.write('template int NSVE<float>::read_parameters();\n')
-                outfile.write('template int NSVE<double>::read_parameters();\n\n')
-            if self.dns_type in ['NSVEparticles_no_output']:
-                outfile.write('template <typename rnumber> int NSVEparticles<rnumber>::read_parameters(){return EXIT_SUCCESS;}\n')
-                outfile.write('template int NSVEparticles<float>::read_parameters();\n')
-                outfile.write('template int NSVEparticles<double>::read_parameters();\n\n')
             outfile.write(self.main + '\n')
         return None
     def generate_default_parameters(self):
         # these parameters are relevant for all DNS classes
+        self.parameters['fftw_plan_rigor'] = 'FFTW_ESTIMATE'
         self.parameters['dealias_type'] = int(1)
         self.parameters['dkx'] = float(1.0)
         self.parameters['dky'] = float(1.0)
@@ -150,19 +126,35 @@ class DNS(_code):
         self.parameters['nu'] = float(0.1)
         self.parameters['fmode'] = int(1)
         self.parameters['famplitude'] = float(0.5)
+        self.parameters['friction_coefficient'] = float(0.5)
+        self.parameters['energy'] = float(0.5)
+        self.parameters['injection_rate'] = float(0.4)
         self.parameters['fk0'] = float(2.0)
         self.parameters['fk1'] = float(4.0)
-        self.parameters['forcing_type'] = 'linear'
+        self.parameters['forcing_type'] = 'fixed_energy_injection_rate'
         self.parameters['histogram_bins'] = int(256)
         self.parameters['max_velocity_estimate'] = float(1)
         self.parameters['max_vorticity_estimate'] = float(1)
         # parameters specific to particle version
         self.NSVEp_extra_parameters = {}
         self.NSVEp_extra_parameters['niter_part'] = int(1)
+        self.NSVEp_extra_parameters['niter_part_fine_period'] = int(10)
+        self.NSVEp_extra_parameters['niter_part_fine_duration'] = int(0)
         self.NSVEp_extra_parameters['nparticles'] = int(10)
         self.NSVEp_extra_parameters['tracers0_integration_steps'] = int(4)
         self.NSVEp_extra_parameters['tracers0_neighbours'] = int(1)
         self.NSVEp_extra_parameters['tracers0_smoothness'] = int(1)
+        self.NSVEp_extra_parameters['tracers0_enable_p2p'] = int(0)
+        self.NSVEp_extra_parameters['tracers0_enable_inner'] = int(0)
+        self.NSVEp_extra_parameters['tracers0_enable_vorticity_omega'] = int(0)
+        self.NSVEp_extra_parameters['tracers0_cutoff'] = float(1)
+        self.NSVEp_extra_parameters['tracers0_inner_v0'] = float(1)
+        self.NSVEp_extra_parameters['tracers0_lambda'] = float(1)
+        #self.extra_parameters = {}
+        #for key in ['NSVE', 'NSVE_no_output', 'NSVEparticles', 'NSVEparticles_no_output', 'NSVEcomplex_particles']:
+        #    self.extra_parameters[key] = {}
+        #for key in ['NSVEparticles', 'NSVEparticles_no_output', 'NSVEcomplex_particles']:
+        #    self.extra_parameters[key].update(self.NSVEp_extra_parameters)
         return None
     def get_kspace(self):
         kspace = {}
@@ -198,8 +190,12 @@ class DNS(_code):
         return os.path.join(self.work_dir, self.simname + '_particles.h5')
     def get_particle_file(self):
         return h5py.File(self.get_particle_file_name(), 'r')
+    def get_cache_file_name(self):
+        return os.path.join(self.work_dir, self.simname + '_cache.h5')
+    def get_cache_file(self):
+        return h5py.File(self.get_cache_file_name(), 'r')
     def get_postprocess_file_name(self):
-        return os.path.join(self.work_dir, self.simname + '_postprocess.h5')
+        return self.get_cache_file_name()
     def get_postprocess_file(self):
         return h5py.File(self.get_postprocess_file_name(), 'r')
     def compute_statistics(self, iter0 = 0, iter1 = None):
@@ -215,75 +211,134 @@ class DNS(_code):
         tensors, and the enstrophy spectrum is also used to
         compute the dissipation :math:`\\varepsilon(t)`.
         These basic quantities are stored in a newly created HDF5 file,
-        ``simname_postprocess.h5``.
+        ``simname_cache.h5``.
         """
         if len(list(self.statistics.keys())) > 0:
             return None
-        self.read_parameters()
-        with self.get_data_file() as data_file:
-            if 'moments' not in data_file['statistics'].keys():
-                return None
-            iter0 = min((data_file['statistics/moments/velocity'].shape[0] *
-                         self.parameters['niter_stat']-1),
-                        iter0)
-            if type(iter1) == type(None):
-                iter1 = data_file['iteration'].value
-            else:
-                iter1 = min(data_file['iteration'].value, iter1)
-            ii0 = iter0 // self.parameters['niter_stat']
-            ii1 = iter1 // self.parameters['niter_stat']
-            self.statistics['kshell'] = data_file['kspace/kshell'].value
-            self.statistics['kM'] = data_file['kspace/kM'].value
-            self.statistics['dk'] = data_file['kspace/dk'].value
-            computation_needed = True
-            pp_file = h5py.File(self.get_postprocess_file_name(), 'a')
-            if 'ii0' in pp_file.keys():
-                computation_needed =  not (ii0 == pp_file['ii0'].value and
-                                           ii1 == pp_file['ii1'].value)
+        if not os.path.exists(self.get_data_file_name()):
+            if os.path.exists(self.get_cache_file_name()):
+                self.read_parameters(fname = self.get_cache_file_name())
+                with self.get_cache_file() as pp_file:
+                    for k in ['t',
+                              'energy(t)',
+                              'energy(k)',
+                              'enstrophy(t)',
+                              'enstrophy(k)',
+                              'R_ij(t)',
+                              'vel_max(t)',
+                              'renergy(t)']:
+                        if k in pp_file.keys():
+                            self.statistics[k] = pp_file[k].value
+                    self.statistics['kM'] = pp_file['kspace/kM'].value
+                    self.statistics['dk'] = pp_file['kspace/dk'].value
+                    self.statistics['kshell'] = pp_file['kspace/kshell'].value
+                    self.statistics['nshell'] = pp_file['kspace/nshell'].value
+        else:
+            self.read_parameters()
+            with self.get_data_file() as data_file:
+                if 'moments' not in data_file['statistics'].keys():
+                    return None
+                iter0 = min((data_file['statistics/moments/velocity'].shape[0] *
+                             self.parameters['niter_stat']-1),
+                            iter0)
+                if type(iter1) == type(None):
+                    iter1 = data_file['iteration'].value
+                else:
+                    iter1 = min(data_file['iteration'].value, iter1)
+                ii0 = iter0 // self.parameters['niter_stat']
+                ii1 = iter1 // self.parameters['niter_stat']
+                self.statistics['kshell'] = data_file['kspace/kshell'].value
+                self.statistics['nshell'] = data_file['kspace/nshell'].value
+                for kk in [-1, -2]:
+                    if (self.statistics['kshell'][kk] == 0):
+                        self.statistics['kshell'][kk] = np.nan
+                self.statistics['kM'] = data_file['kspace/kM'].value
+                self.statistics['dk'] = data_file['kspace/dk'].value
+                computation_needed = True
+                pp_file = h5py.File(self.get_postprocess_file_name(), 'a')
+                if not ('parameters' in pp_file.keys()):
+                    data_file.copy('parameters', pp_file)
+                    data_file.copy('kspace', pp_file)
+                if 'ii0' in pp_file.keys():
+                    computation_needed =  not (ii0 == pp_file['ii0'].value and
+                                               ii1 == pp_file['ii1'].value)
+                    if computation_needed:
+                        for k in ['t', 'vel_max(t)', 'renergy(t)',
+                                  'energy(t)', 'enstrophy(t)',
+                                  'energy(k)', 'enstrophy(k)',
+                                  'energy(t, k)',
+                                  'enstrophy(t, k)',
+                                  'R_ij(t)',
+                                  'ii0', 'ii1', 'iter0', 'iter1']:
+                            if k in pp_file.keys():
+                                del pp_file[k]
                 if computation_needed:
-                    for k in pp_file.keys():
-                        del pp_file[k]
-            if computation_needed:
-                pp_file['iter0'] = iter0
-                pp_file['iter1'] = iter1
-                pp_file['ii0'] = ii0
-                pp_file['ii1'] = ii1
-                pp_file['t'] = (self.parameters['dt']*
-                                self.parameters['niter_stat']*
-                                (np.arange(ii0, ii1+1).astype(np.float)))
-                pp_file['energy(t, k)'] = (
-                    data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 0, 0] +
-                    data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 1, 1] +
-                    data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 2, 2])/2
-                pp_file['enstrophy(t, k)'] = (
-                    data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] +
-                    data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] +
-                    data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2
-                pp_file['vel_max(t)'] = data_file['statistics/moments/velocity']  [ii0:ii1+1, 9, 3]
-                pp_file['renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2
-            for k in ['t',
-                      'energy(t, k)',
-                      'enstrophy(t, k)',
-                      'vel_max(t)',
-                      'renergy(t)']:
-                if k in pp_file.keys():
-                    self.statistics[k] = pp_file[k].value
-            self.compute_time_averages()
+                    pp_file['iter0'] = iter0
+                    pp_file['iter1'] = iter1
+                    pp_file['ii0'] = ii0
+                    pp_file['ii1'] = ii1
+                    pp_file['t'] = (self.parameters['dt']*
+                                    self.parameters['niter_stat']*
+                                    (np.arange(ii0, ii1+1).astype(np.float)))
+                    phi_ij = data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1]
+                    pp_file['R_ij(t)'] = np.sum(phi_ij, axis = 1)
+                    energy_tk = (
+                        phi_ij[:, :, 0, 0] +
+                        phi_ij[:, :, 1, 1] +
+                        phi_ij[:, :, 2, 2])/2
+                    pp_file['energy(t)'] = np.sum(energy_tk, axis = 1)
+                    pp_file['energy(k)'] = np.mean(energy_tk, axis = 0)*(4*np.pi*self.statistics['kshell']**2) / (self.statistics['dk']*self.statistics['nshell'])
+                    enstrophy_tk = (
+                        data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] +
+                        data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] +
+                        data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2
+                    pp_file['enstrophy(t)'] = np.sum(enstrophy_tk, axis = 1)
+                    pp_file['enstrophy(k)'] = np.mean(enstrophy_tk, axis = 0)*(4*np.pi*self.statistics['kshell']**2) / (self.statistics['dk']*self.statistics['nshell'])
+                    pp_file['vel_max(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 9, 3]
+                    pp_file['renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2
+        for k in ['t',
+                  'energy(t)',
+                  'energy(k)',
+                  'enstrophy(t)',
+                  'enstrophy(k)',
+                  'R_ij(t)',
+                  'vel_max(t)',
+                  'renergy(t)']:
+            if k in pp_file.keys():
+                self.statistics[k] = pp_file[k].value
+        # sanity check --- Parseval theorem check
+        assert(np.max(np.abs(
+                self.statistics['renergy(t)'] -
+                self.statistics['energy(t)']) / self.statistics['energy(t)']) < 1e-5)
+        self.compute_time_averages()
+        return None
+    def compute_Reynolds_stress_invariants(
+            self):
+        """
+        see Choi and Lumley, JFM v436 p59 (2001)
+        """
+        Rij = self.statistics['R_ij(t)']
+        Rij /= (2*self.statistics['energy(t)'][:, None, None])
+        Rij[:, 0, 0] -= 1./3
+        Rij[:, 1, 1] -= 1./3
+        Rij[:, 2, 2] -= 1./3
+        self.statistics['I2(t)'] = np.sqrt(np.einsum('...ij,...ij', Rij, Rij, optimize = True) / 6)
+        self.statistics['I3(t)'] = np.cbrt(np.einsum('...ij,...jk,...ki', Rij, Rij, Rij, optimize = True) / 6)
         return None
     def compute_time_averages(self):
         """Compute easy stats.
 
         Further computation of statistics based on the contents of
-        ``simname_postprocess.h5``.
+        ``simname_cache.h5``.
         Standard quantities are as follows
         (consistent with [Ishihara]_):
 
         .. math::
 
             U_{\\textrm{int}}(t) = \\sqrt{\\frac{2E(t)}{3}}, \\hskip .5cm
-            L_{\\textrm{int}}(t) = \\frac{\pi}{2U_{int}^2(t)} \\int \\frac{dk}{k} E(t, k), \\hskip .5cm
-            T_{\\textrm{int}}(t) =
-            \\frac{L_{\\textrm{int}}(t)}{U_{\\textrm{int}}(t)}
+            L_{\\textrm{int}} = \\frac{\pi}{2U_{int}^2} \\int \\frac{dk}{k} E(k), \\hskip .5cm
+            T_{\\textrm{int}} =
+            \\frac{L_{\\textrm{int}}}{U_{\\textrm{int}}}
 
             \\eta_K = \\left(\\frac{\\nu^3}{\\varepsilon}\\right)^{1/4}, \\hskip .5cm
             \\tau_K = \\left(\\frac{\\nu}{\\varepsilon}\\right)^{1/2}, \\hskip .5cm
@@ -300,21 +355,14 @@ class DNS(_code):
                       J. Fluid Mech.,
                       **592**, 335-366, 2007
         """
-        for key in ['energy', 'enstrophy']:
-            self.statistics[key + '(t)'] = (self.statistics['dk'] *
-                                            np.sum(self.statistics[key + '(t, k)'], axis = 1))
         self.statistics['Uint(t)'] = np.sqrt(2*self.statistics['energy(t)'] / 3)
-        self.statistics['Lint(t)'] = ((self.statistics['dk']*np.pi /
-                                       (2*self.statistics['Uint(t)']**2)) *
-                                      np.nansum(self.statistics['energy(t, k)'] /
-                                                self.statistics['kshell'][None, :], axis = 1))
         for key in ['energy',
                     'enstrophy',
-                    'vel_max',
-                    'Uint',
-                    'Lint']:
+                    'mean_trS2',
+                    'Uint']:
             if key + '(t)' in self.statistics.keys():
                 self.statistics[key] = np.average(self.statistics[key + '(t)'], axis = 0)
+        self.statistics['vel_max'] = np.max(self.statistics['vel_max(t)'])
         for suffix in ['', '(t)']:
             self.statistics['diss'    + suffix] = (self.parameters['nu'] *
                                                    self.statistics['enstrophy' + suffix]*2)
@@ -322,9 +370,6 @@ class DNS(_code):
                                                    self.statistics['diss' + suffix])**.25
             self.statistics['tauK'    + suffix] =  (self.parameters['nu'] /
                                                     self.statistics['diss' + suffix])**.5
-            self.statistics['Re' + suffix] = (self.statistics['Uint' + suffix] *
-                                              self.statistics['Lint' + suffix] /
-                                              self.parameters['nu'])
             self.statistics['lambda' + suffix] = (15 * self.parameters['nu'] *
                                                   self.statistics['Uint' + suffix]**2 /
                                                   self.statistics['diss' + suffix])**.5
@@ -335,6 +380,13 @@ class DNS(_code):
                                                  self.statistics['etaK' + suffix])
             if self.parameters['dealias_type'] == 1:
                 self.statistics['kMeta' + suffix] *= 0.8
+        self.statistics['Lint'] = ((np.pi /
+                                    (2*self.statistics['Uint']**2)) *
+                                   np.nansum(self.statistics['energy(k)'] /
+                                                self.statistics['kshell']))
+        self.statistics['Re'] = (self.statistics['Uint'] *
+                                 self.statistics['Lint'] /
+                                 self.parameters['nu'])
         self.statistics['Tint'] = self.statistics['Lint'] / self.statistics['Uint']
         self.statistics['Taylor_microscale'] = self.statistics['lambda']
         return None
@@ -371,12 +423,11 @@ class DNS(_code):
         return None
     def write_par(
             self,
-            iter0 = 0,
-            particle_ic = None):
+            iter0 = 0):
         assert (self.parameters['niter_todo'] % self.parameters['niter_stat'] == 0)
         assert (self.parameters['niter_todo'] % self.parameters['niter_out']  == 0)
         assert (self.parameters['niter_out']  % self.parameters['niter_stat'] == 0)
-        if self.dns_type in ['NSVEparticles_no_output', 'NSVEparticles']:
+        if self.dns_type in ['NSVEparticles_no_output', 'NSVEcomplex_particles', 'NSVEparticles']:
             assert (self.parameters['niter_todo'] % self.parameters['niter_part'] == 0)
             assert (self.parameters['niter_out']  % self.parameters['niter_part'] == 0)
         _code.write_par(self, iter0 = iter0)
@@ -419,36 +470,8 @@ class DNS(_code):
                                                  4),
                                      dtype = np.int64)
             ofile['checkpoint'] = int(0)
-        if self.dns_type in ['NSVE', 'NSVE_no_output']:
+        if (self.dns_type in ['NSVE', 'NSVE_no_output']):
             return None
-
-        if type(particle_ic) == type(None):
-            pbase_shape = (self.parameters['nparticles'],)
-            number_of_particles = self.parameters['nparticles']
-        else:
-            pbase_shape = particle_ic.shape[:-1]
-            assert(particle_ic.shape[-1] == 3)
-            number_of_particles = 1
-            for val in pbase_shape[1:]:
-                number_of_particles *= val
-        with h5py.File(self.get_checkpoint_0_fname(), 'a') as ofile:
-            s = 0
-            ofile.create_group('tracers{0}'.format(s))
-            ofile.create_group('tracers{0}/rhs'.format(s))
-            ofile.create_group('tracers{0}/state'.format(s))
-            ofile['tracers{0}/rhs'.format(s)].create_dataset(
-                    '0',
-                    shape = (
-                        (self.parameters['tracers{0}_integration_steps'.format(s)],) +
-                        pbase_shape +
-                        (3,)),
-                    dtype = np.float)
-            ofile['tracers{0}/state'.format(s)].create_dataset(
-                    '0',
-                    shape = (
-                        pbase_shape +
-                        (3,)),
-                    dtype = np.float)
         return None
     def job_parser_arguments(
             self,
@@ -472,6 +495,10 @@ class DNS(_code):
                 metavar = 'NTHREADS_PER_PROCESS',
                 help = 'number of threads to use per MPI process',
                 default = 1)
+        parser.add_argument(
+                '--no-debug',
+                action = 'store_true',
+                dest = 'no_debug')
         parser.add_argument(
                 '--no-submit',
                 action = 'store_true',
@@ -602,28 +629,32 @@ class DNS(_code):
         parser_NSVEparticles_no_output = subparsers.add_parser(
                 'NSVEparticles_no_output',
                 help = 'plain Navier-Stokes vorticity formulation, with basic fluid tracers, checkpoints are NOT SAVED')
-        self.simulation_parser_arguments(parser_NSVEparticles_no_output)
-        self.job_parser_arguments(parser_NSVEparticles_no_output)
-        self.particle_parser_arguments(parser_NSVEparticles_no_output)
-        self.parameters_to_parser_arguments(parser_NSVEparticles_no_output)
-        self.parameters_to_parser_arguments(
-                parser_NSVEparticles_no_output,
-                self.NSVEp_extra_parameters)
 
         parser_NSVEp2 = subparsers.add_parser(
                 'NSVEparticles',
                 help = 'plain Navier-Stokes vorticity formulation, with basic fluid tracers')
-        self.simulation_parser_arguments(parser_NSVEp2)
-        self.job_parser_arguments(parser_NSVEp2)
-        self.particle_parser_arguments(parser_NSVEp2)
-        self.parameters_to_parser_arguments(parser_NSVEp2)
-        self.parameters_to_parser_arguments(
-                parser_NSVEp2,
-                self.NSVEp_extra_parameters)
+
+        parser_NSVEp2p = subparsers.add_parser(
+                'NSVEcomplex_particles',
+                help = 'plain Navier-Stokes vorticity formulation, with oriented active particles')
+
+        parser_NSVEp_extra = subparsers.add_parser(
+                'NSVEp_extra_sampling',
+                help = 'plain Navier-Stokes vorticity formulation, with basic fluid tracers, that sample velocity gradient, as well as pressure and its derivatives.')
+
+        for parser in ['NSVEparticles_no_output', 'NSVEp2', 'NSVEp2p', 'NSVEp_extra']:
+            eval('self.simulation_parser_arguments({0})'.format('parser_' + parser))
+            eval('self.job_parser_arguments({0})'.format('parser_' + parser))
+            eval('self.particle_parser_arguments({0})'.format('parser_' + parser))
+            eval('self.parameters_to_parser_arguments({0})'.format('parser_' + parser))
+            eval('self.parameters_to_parser_arguments('
+                    'parser_{0},'
+                    'self.NSVEp_extra_parameters)'.format(parser))
         return None
     def prepare_launch(
             self,
-            args = []):
+            args = [],
+            extra_parameters = None):
         """Set up reasonable parameters.
 
         With the default Lundgren forcing applied in the band [2, 4],
@@ -654,16 +685,13 @@ class DNS(_code):
         self.dns_type = opt.DNS_class
         self.name = self.dns_type + '-' + self.fluid_precision + '-v' + bfps.__version__
         # merge parameters if needed
-        if self.dns_type in ['NSVEparticles', 'NSVEparticles_no_output']:
+        if self.dns_type in ['NSVEparticles', 'NSVEcomplex_particles', 'NSVEparticles_no_output', 'NSVEp_extra_sampling']:
             for k in self.NSVEp_extra_parameters.keys():
                 self.parameters[k] = self.NSVEp_extra_parameters[k]
-        self.parameters['nu'] = (opt.kMeta * 2 / opt.n)**(4./3)
-        self.parameters['dt'] = (opt.dtfactor / opt.n)
-        # custom famplitude for 288 and 576
-        if opt.n == 288:
-            self.parameters['famplitude'] = 0.45
-        elif opt.n == 576:
-            self.parameters['famplitude'] = 0.47
+        if type(extra_parameters) != type(None):
+            if self.dns_type in extra_parameters.keys():
+                for k in extra_parameters[self.dns_type].keys():
+                    self.parameters[k] = extra_parameters[self.dns_type][k]
         if ((self.parameters['niter_todo'] % self.parameters['niter_out']) != 0):
             self.parameters['niter_out'] = self.parameters['niter_todo']
         if len(opt.src_work_dir) == 0:
@@ -672,7 +700,7 @@ class DNS(_code):
             opt.dkx = 2. / opt.Lx
         if type(opt.dky) == type(None):
             opt.dky = 2. / opt.Ly
-        if type(opt.dkx) == type(None):
+        if type(opt.dkz) == type(None):
             opt.dkz = 2. / opt.Lz
         if type(opt.nx) == type(None):
             opt.nx = opt.n
@@ -680,11 +708,49 @@ class DNS(_code):
             opt.ny = opt.n
         if type(opt.nz) == type(None):
             opt.nz = opt.n
+        if type(opt.fk0) == type(None):
+            opt.fk0 = self.parameters['fk0']
+        if type(opt.fk1) == type(None):
+            opt.fk1 = self.parameters['fk1']
+        if type(opt.injection_rate) == type(None):
+            opt.injection_rate = self.parameters['injection_rate']
+        if type(opt.dealias_type) == type(None):
+            opt.dealias_type = self.parameters['dealias_type']
+        if (opt.nx > opt.n or
+            opt.ny > opt.n or
+            opt.nz > opt.n):
+            opt.n = min(opt.nx, opt.ny, opt.nz)
+            print("Warning: '-n' parameter changed to minimum of nx, ny, nz. This affects the computation of nu.")
+        self.parameters['dt'] = (opt.dtfactor / opt.n)
+        self.parameters['nu'] = (opt.kMeta * 2 / opt.n)**(4./3)
+        # check value of kMax
+        kM = opt.n * 0.5
+        if opt.dealias_type == 1:
+            kM *= 0.8
+        # tweak forcing/viscosity based on forcint type
+        if opt.forcing_type == 'linear':
+            # custom famplitude for 288 and 576
+            if opt.n == 288:
+                self.parameters['famplitude'] = 0.45
+            elif opt.n == 576:
+                self.parameters['famplitude'] = 0.47
+        elif opt.forcing_type == 'fixed_energy_injection_rate':
+            # use the fact that mean dissipation rate is equal to injection rate
+            self.parameters['nu'] = (
+                    opt.injection_rate *
+                    (opt.kMeta / kM)**4)**(1./3)
+        elif opt.forcing_type == 'fixed_energy':
+            kf = 1. / (1./opt.fk0 +
+                       1./opt.fk1)
+            self.parameters['nu'] = (
+                    (opt.kMeta / kM)**(4./3) *
+                    (np.pi / kf)**(1./3) *
+                    (2*self.parameters['energy'] / 3)**0.5)
         if type(opt.checkpoints_per_file) == type(None):
             # hardcoded FFTW complex representation size
             field_size = 3*(opt.nx+2)*opt.ny*opt.nz*self.fluid_dtype.itemsize
             checkpoint_size = field_size
-            if self.dns_type in ['NSVEparticles', 'NSVEparticles_no_output']:
+            if self.dns_type in ['NSVEparticles', 'NSVEcomplex_particles', 'NSVEparticles_no_output', 'NSVEp_extra_sampling']:
                 rhs_size = self.parameters['tracers0_integration_steps']
                 if type(opt.tracers0_integration_steps) != type(None):
                     rhs_size = opt.tracers0_integration_steps
@@ -708,28 +774,64 @@ class DNS(_code):
         return os.path.join(
                     self.work_dir,
                     self.simname + '_checkpoint_0.h5')
+    def get_checkpoint_fname(self, iteration = 0):
+        checkpoint = iteration // self.parameters['checkpoints_per_file']
+        return os.path.join(
+                    self.work_dir,
+                    self.simname + '_checkpoint_{0}.h5'.format(checkpoint))
     def generate_tracer_state(
             self,
             rseed = None,
-            species = 0):
-        with h5py.File(self.get_checkpoint_0_fname(), 'a') as data_file:
-            dset = data_file[
-                'tracers{0}/state/0'.format(species)]
-            if not type(rseed) == type(None):
-                np.random.seed(rseed)
-            nn = self.parameters['nparticles']
-            cc = int(0)
-            batch_size = int(1e6)
-            while nn > 0:
-                if nn > batch_size:
-                    dset[cc*batch_size:(cc+1)*batch_size] = np.random.random(
-                            (batch_size, 3))*2*np.pi
-                    nn -= batch_size
-                else:
-                    dset[cc*batch_size:cc*batch_size+nn] = np.random.random(
-                            (nn, 3))*2*np.pi
-                    nn = 0
-                cc += 1
+            species = 0,
+            integration_steps = None,
+            ncomponents = 3):
+        try:
+            if type(integration_steps) == type(None):
+                integration_steps = self.NSVEp_extra_parameters['tracers0_integration_steps']
+            if 'tracers{0}_integration_steps'.format(species) in self.parameters.keys():
+                integration_steps = self.parameters['tracers{0}_integration_steps'.format(species)]
+            if self.dns_type == 'NSVEcomplex_particles' and species == 0:
+                ncomponents = 6
+            with h5py.File(self.get_checkpoint_0_fname(), 'a') as data_file:
+                nn = self.parameters['nparticles']
+                if not 'tracers{0}'.format(species) in data_file.keys():
+                    data_file.create_group('tracers{0}'.format(species))
+                    data_file.create_group('tracers{0}/rhs'.format(species))
+                    data_file.create_group('tracers{0}/state'.format(species))
+                data_file['tracers{0}/rhs'.format(species)].create_dataset(
+                        '0',
+                        shape = (integration_steps, nn, ncomponents,),
+                        dtype = np.float)
+                dset = data_file['tracers{0}/state'.format(species)].create_dataset(
+                        '0',
+                        shape = (nn, ncomponents,),
+                        dtype = np.float)
+                if not type(rseed) == type(None):
+                    np.random.seed(rseed)
+                cc = int(0)
+                batch_size = int(1e6)
+                def get_random_phases(npoints):
+                    return np.random.random(
+                                (npoints, 3))*2*np.pi
+                def get_random_versors(npoints):
+                    bla = np.random.normal(
+                            size = (npoints, 3))
+                    bla  /= np.sum(bla**2, axis = 1)[:, None]**.5
+                    return bla
+                while nn > 0:
+                    if nn > batch_size:
+                        dset[cc*batch_size:(cc+1)*batch_size, :3] = get_random_phases(batch_size)
+                        if dset.shape[1] == 6:
+                            dset[cc*batch_size:(cc+1)*batch_size, 3:] = get_random_versors(batch_size)
+                        nn -= batch_size
+                    else:
+                        dset[cc*batch_size:cc*batch_size+nn, :3] = get_random_phases(nn)
+                        if dset.shape[1] == 6:
+                            dset[cc*batch_size:cc*batch_size+nn, 3:] = get_random_versors(nn)
+                        nn = 0
+                    cc += 1
+        except Exception as e:
+            print(e)
         return None
     def generate_vector_field(
             self,
@@ -774,21 +876,21 @@ class DNS(_code):
         """
         np.random.seed(rseed)
         Kdata00 = scalar_generator(
-                self.parameters['nz']//2,
-                self.parameters['ny']//2,
-                self.parameters['nx']//2,
+                self.parameters['nz'],
+                self.parameters['ny'],
+                self.parameters['nx'],
                 p = spectra_slope,
                 amplitude = amplitude).astype(self.ctype)
         Kdata01 = scalar_generator(
-                self.parameters['nz']//2,
-                self.parameters['ny']//2,
-                self.parameters['nx']//2,
+                self.parameters['nz'],
+                self.parameters['ny'],
+                self.parameters['nx'],
                 p = spectra_slope,
                 amplitude = amplitude).astype(self.ctype)
         Kdata02 = scalar_generator(
-                self.parameters['nz']//2,
-                self.parameters['ny']//2,
-                self.parameters['nx']//2,
+                self.parameters['nz'],
+                self.parameters['ny'],
+                self.parameters['nx'],
                 p = spectra_slope,
                 amplitude = amplitude).astype(self.ctype)
         Kdata0 = np.zeros(
@@ -818,113 +920,128 @@ class DNS(_code):
         # the case of increasing/decreasing by the same factor in all directions.
         # in principle we could write something more generic, but i'm not sure
         # how complicated that would be
-        dst_shape = (self.parameters['nz'],
-                     self.parameters['ny'],
+        dst_shape = (self.parameters['ny'],
+                     self.parameters['nz'],
                      (self.parameters['nx']+2) // 2,
                      3)
         src_file = h5py.File(src_file_name, 'r')
         if (src_file[src_dset_name].shape == dst_shape):
-            if make_link and (src_file[src_dset_name].dtype == self.ctype):
-                dst_file[dst_dset_name] = h5py.ExternalLink(
-                        src_file_name,
-                        src_dset_name)
-            else:
-                dst_file.create_dataset(
-                        dst_dset_name,
-                        shape = dst_shape,
-                        dtype = self.ctype,
-                        fillvalue = 0.0)
-                for kz in range(src_file[src_dset_name].shape[0]):
-                    dst_file[dst_dset_name][kz] = src_file[src_dset_name][kz]
+            dst_file[dst_dset_name] = h5py.ExternalLink(
+                    src_file_name,
+                    src_dset_name)
         else:
-            print('aloha')
             min_shape = (min(dst_shape[0], src_file[src_dset_name].shape[0]),
                          min(dst_shape[1], src_file[src_dset_name].shape[1]),
                          min(dst_shape[2], src_file[src_dset_name].shape[2]),
                          3)
-            print(self.ctype)
+            src_shape = src_file[src_dset_name].shape
             dst_file.create_dataset(
                     dst_dset_name,
                     shape = dst_shape,
                     dtype = np.dtype(self.ctype),
                     fillvalue = complex(0))
-            for kz in range(min_shape[0]):
-                dst_file[dst_dset_name][kz,:min_shape[1], :min_shape[2]] = \
-                        src_file[src_dset_name][kz, :min_shape[1], :min_shape[2]]
+            for kz in range(min_shape[0]//2):
+                dst_file[dst_dset_name][kz,:min_shape[1]//2, :min_shape[2]] = \
+                        src_file[src_dset_name][kz, :min_shape[1]//2, :min_shape[2]]
+                dst_file[dst_dset_name][kz,
+                                        dst_shape[1] - min_shape[1]//2+1:,
+                                        :min_shape[2]] = \
+                        src_file[src_dset_name][kz,
+                                                src_shape[1] - min_shape[1]//2+1,
+                                                :min_shape[2]]
+                if kz > 0:
+                    dst_file[dst_dset_name][-kz,:min_shape[1]//2, :min_shape[2]] = \
+                            src_file[src_dset_name][-kz, :min_shape[1]//2, :min_shape[2]]
+                    dst_file[dst_dset_name][-kz,
+                                            dst_shape[1] - min_shape[1]//2+1:,
+                                            :min_shape[2]] = \
+                            src_file[src_dset_name][-kz,
+                                                    src_shape[1] - min_shape[1]//2+1,
+                                                    :min_shape[2]]
+        return None
+    def generate_particle_data(
+            self,
+            opt = None):
+        if self.parameters['nparticles'] > 0:
+            self.generate_tracer_state(
+                    species = 0,
+                    rseed = opt.particle_rand_seed)
+            if not os.path.exists(self.get_particle_file_name()):
+                with h5py.File(self.get_particle_file_name(), 'w') as particle_file:
+                    particle_file.create_group('tracers0/position')
+                    particle_file.create_group('tracers0/velocity')
+                    particle_file.create_group('tracers0/acceleration')
+                    if self.dns_type in ['NSVEcomplex_particles']:
+                        particle_file.create_group('tracers0/orientation')
+                        particle_file.create_group('tracers0/velocity_gradient')
+                    if self.dns_type in ['NSVEp_extra_sampling']:
+                        particle_file.create_group('tracers0/velocity_gradient')
+                        particle_file.create_group('tracers0/pressure')
+                        particle_file.create_group('tracers0/pressure_gradient')
+                        particle_file.create_group('tracers0/pressure_Hessian')
+        return None
+    def generate_initial_condition(
+            self,
+            opt = None):
+        # take care of fields' initial condition
+        # first, check if initial field exists
+        need_field = False
+        if not os.path.exists(self.get_checkpoint_0_fname()):
+            need_field = True
+        else:
+            f = h5py.File(self.get_checkpoint_0_fname(), 'r')
+            try:
+                dset = f['vorticity/complex/0']
+                need_field = (dset.shape == (self.parameters['ny'],
+                                             self.parameters['nz'],
+                                             self.parameters['nx']//2+1,
+                                             3))
+            except:
+                need_field = True
+            f.close()
+        if need_field:
+            f = h5py.File(self.get_checkpoint_0_fname(), 'a')
+            if len(opt.src_simname) > 0:
+                source_cp = 0
+                src_file = 'not_a_file'
+                while True:
+                    src_file = os.path.join(
+                        os.path.realpath(opt.src_work_dir),
+                        opt.src_simname + '_checkpoint_{0}.h5'.format(source_cp))
+                    f0 = h5py.File(src_file, 'r')
+                    if '{0}'.format(opt.src_iteration) in f0['vorticity/complex'].keys():
+                        f0.close()
+                        break
+                    source_cp += 1
+                self.copy_complex_field(
+                        src_file,
+                        'vorticity/complex/{0}'.format(opt.src_iteration),
+                        f,
+                        'vorticity/complex/{0}'.format(0))
+            else:
+                data = self.generate_vector_field(
+                       write_to_file = False,
+                       spectra_slope = 2.0,
+                       amplitude = 0.05)
+                f['vorticity/complex/{0}'.format(0)] = data
+            f.close()
+        # now take care of particles' initial condition
+        if self.dns_type in ['NSVEparticles', 'NSVEcomplex_particles', 'NSVEparticles_no_output', 'NSVEp_extra_sampling']:
+            self.generate_particle_data(opt = opt)
         return None
     def launch_jobs(
             self,
-            opt = None,
-            particle_initial_condition = None):
-        if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')):
-            # take care of fields' initial condition
-            if not os.path.exists(self.get_checkpoint_0_fname()):
-                f = h5py.File(self.get_checkpoint_0_fname(), 'w')
-                if len(opt.src_simname) > 0:
-                    source_cp = 0
-                    src_file = 'not_a_file'
-                    while True:
-                        src_file = os.path.join(
-                            os.path.realpath(opt.src_work_dir),
-                            opt.src_simname + '_checkpoint_{0}.h5'.format(source_cp))
-                        f0 = h5py.File(src_file, 'r')
-                        if '{0}'.format(opt.src_iteration) in f0['vorticity/complex'].keys():
-                            f0.close()
-                            break
-                        source_cp += 1
-                    self.copy_complex_field(
-                            src_file,
-                            'vorticity/complex/{0}'.format(opt.src_iteration),
-                            f,
-                            'vorticity/complex/{0}'.format(0))
-                else:
-                    data = self.generate_vector_field(
-                           write_to_file = False,
-                           spectra_slope = 2.0,
-                           amplitude = 0.05)
-                    f['vorticity/complex/{0}'.format(0)] = data
-                f.close()
-            ## take care of particles' initial condition
-            #if self.dns_type in ['NSVEparticles', 'NSVEparticles_no_output']:
-            #    if opt.pclouds > 1:
-            #        np.random.seed(opt.particle_rand_seed)
-            #        if opt.pcloud_type == 'random-cube':
-            #            particle_initial_condition = (
-            #                np.random.random((opt.pclouds, 1, 3))*2*np.pi +
-            #                np.random.random((1, self.parameters['nparticles'], 3))*opt.particle_cloud_size)
-            #        elif opt.pcloud_type == 'regular-cube':
-            #            onedarray = np.linspace(
-            #                    -opt.particle_cloud_size/2,
-            #                    opt.particle_cloud_size/2,
-            #                    self.parameters['nparticles'])
-            #            particle_initial_condition = np.zeros(
-            #                    (opt.pclouds,
-            #                     self.parameters['nparticles'],
-            #                     self.parameters['nparticles'],
-            #                     self.parameters['nparticles'], 3),
-            #                    dtype = np.float64)
-            #            particle_initial_condition[:] = \
-            #                np.random.random((opt.pclouds, 1, 1, 1, 3))*2*np.pi
-            #            particle_initial_condition[..., 0] += onedarray[None, None, None, :]
-            #            particle_initial_condition[..., 1] += onedarray[None, None, :, None]
-            #            particle_initial_condition[..., 2] += onedarray[None, :, None, None]
-            self.write_par(
-                    particle_ic = None)
-            if self.dns_type in ['NSVEparticles', 'NSVEparticles_no_output']:
-                if self.parameters['nparticles'] > 0:
-                    self.generate_tracer_state(
-                            species = 0,
-                            rseed = opt.particle_rand_seed)
-                    if not os.path.exists(self.get_particle_file_name()):
-                        with h5py.File(self.get_particle_file_name(), 'w') as particle_file:
-                            particle_file.create_group('tracers0/velocity')
-                            particle_file.create_group('tracers0/acceleration')
+            opt = None):
+        if not os.path.exists(self.get_data_file_name()):
+            self.generate_initial_condition(opt = opt)
+            self.write_par()
         self.run(
                 nb_processes = opt.nb_processes,
                 nb_threads_per_process = opt.nb_threads_per_process,
                 njobs = opt.njobs,
                 hours = opt.minutes // 60,
                 minutes = opt.minutes % 60,
-                no_submit = opt.no_submit)
+                no_submit = opt.no_submit,
+                no_debug = opt.no_debug)
         return None
 
diff --git a/bfps/FluidConvert.py b/bfps/FluidConvert.py
deleted file mode 100644
index 58d19116bfb8ab386ef9783babb2ad8da79760e4..0000000000000000000000000000000000000000
--- a/bfps/FluidConvert.py
+++ /dev/null
@@ -1,140 +0,0 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
-
-
-
-import numpy as np
-import pickle
-import os
-from ._fluid_base import _fluid_particle_base
-from ._base import _base
-import bfps
-
-class FluidConvert(_fluid_particle_base):
-    """This class is meant to be used for conversion of native DNS field
-    representations to real-space representations of velocity/vorticity
-    fields.
-    It may be superseeded by streamlined functionality in the future...
-    """
-    def __init__(
-            self,
-            name = 'FluidConvert-v' + bfps.__version__,
-            work_dir = './',
-            simname = 'test',
-            fluid_precision = 'single',
-            use_fftw_wisdom = False):
-        _fluid_particle_base.__init__(
-                self,
-                name = name + '-' + fluid_precision,
-                work_dir = work_dir,
-                simname = simname,
-                dtype = fluid_precision,
-                use_fftw_wisdom = use_fftw_wisdom)
-        self.spec_parameters = {}
-        self.spec_parameters['write_rvelocity']  = 1
-        self.spec_parameters['write_rvorticity'] = 1
-        self.spec_parameters['write_rTrS2'] = 1
-        self.spec_parameters['write_renstrophy'] = 1
-        self.spec_parameters['write_rpressure'] = 1
-        self.spec_parameters['iter0'] = 0
-        self.spec_parameters['iter1'] = -1
-        self.fill_up_fluid_code()
-        self.finalize_code(postprocess_mode = True)
-        return None
-    def fill_up_fluid_code(self):
-        self.definitions += self.cread_pars(
-                parameters = self.spec_parameters,
-                function_suffix = '_specific',
-                file_group = 'conversion_parameters')
-        self.variables += self.cdef_pars(
-                parameters = self.spec_parameters)
-        self.main_start += 'read_parameters_specific();\n'
-        self.fluid_includes += '#include <cstring>\n'
-        self.fluid_variables += ('double t;\n' +
-                                 'fluid_solver<{0}> *fs;\n').format(self.C_dtype)
-        self.fluid_definitions += """
-                //begincpp
-                void do_conversion(fluid_solver<{0}> *bla)
-                {{
-                    bla->read('v', 'c');
-                    if (write_rvelocity)
-                        bla->write('u', 'r');
-                    if (write_rvorticity)
-                        bla->write('v', 'r');
-                    if (write_rTrS2)
-                        bla->write_rTrS2();
-                    if (write_renstrophy)
-                        bla->write_renstrophy();
-                    if (write_rpressure)
-                        bla->write_rpressure();
-                }}
-                //endcpp
-                """.format(self.C_dtype)
-        self.fluid_start += """
-                //begincpp
-                fs = new fluid_solver<{0}>(
-                        simname,
-                        nx, ny, nz,
-                        dkx, dky, dkz,
-                        dealias_type,
-                        DEFAULT_FFTW_FLAG);
-                //endcpp
-                """.format(self.C_dtype)
-        self.fluid_loop += """
-                //begincpp
-                fs->iteration = frame_index;
-                do_conversion(fs);
-                //endcpp
-                """
-        self.fluid_end += 'delete fs;\n'
-        return None
-    def specific_parser_arguments(
-            self,
-            parser):
-        _fluid_particle_base.specific_parser_arguments(self, parser)
-        self.parameters_to_parser_arguments(
-                parser,
-                parameters = self.spec_parameters)
-        return None
-    def launch(
-            self,
-            args = [],
-            **kwargs):
-        opt = self.prepare_launch(args)
-        if opt.iter1 == -1:
-            opt.iter1 = self.get_data_file()['iteration'].value
-        self.pars_from_namespace(
-                opt,
-                parameters = self.spec_parameters)
-        self.rewrite_par(
-                group = 'conversion_parameters',
-                parameters = self.spec_parameters)
-        self.run(opt.nb_processes,
-		 1,
-                 hours = opt.minutes // 60,
-                 minutes = opt.minutes % 60,
-                 err_file = 'err_convert',
-                 out_file = 'out_convert')
-        return None
-
diff --git a/bfps/FluidResize.py b/bfps/FluidResize.py
deleted file mode 100644
index fb5e26208f6960d447bc927bd9e207354620d188..0000000000000000000000000000000000000000
--- a/bfps/FluidResize.py
+++ /dev/null
@@ -1,156 +0,0 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
-
-
-
-import os
-
-import bfps
-from ._fluid_base import _fluid_particle_base
-
-class FluidResize(_fluid_particle_base):
-    """This class is meant to resize snapshots of DNS states to new grids.
-    Typical stuff for DNS of turbulence.
-    It will become superfluous when HDF5 is used for field I/O.
-    """
-    def __init__(
-            self,
-            name = 'FluidResize-v' + bfps.__version__,
-            work_dir = './',
-            simname = 'test',
-            fluid_precision = 'single',
-            use_fftw_wisdom = False):
-        _fluid_particle_base.__init__(
-                self,
-                name = name + '-' + fluid_precision,
-                work_dir = work_dir,
-                simname = simname,
-                dtype = fluid_precision,
-                use_fftw_wisdom = use_fftw_wisdom)
-        self.parameters['src_simname'] = 'test'
-        self.parameters['dst_iter'] = 0
-        self.parameters['dst_nx'] = 32
-        self.parameters['dst_ny'] = 32
-        self.parameters['dst_nz'] = 32
-        self.parameters['dst_simname'] = 'new_test'
-        self.parameters['dst_dkx'] = 1.0
-        self.parameters['dst_dky'] = 1.0
-        self.parameters['dst_dkz'] = 1.0
-        self.fill_up_fluid_code()
-        self.finalize_code()
-        return None
-    def fill_up_fluid_code(self):
-        self.fluid_includes += '#include <cstring>\n'
-        self.fluid_includes += '#include "fftw_tools.hpp"\n'
-        self.fluid_variables += ('double t;\n' +
-                                 'fluid_solver<' + self.C_dtype + '> *fs0, *fs1;\n')
-        self.fluid_start += """
-                //begincpp
-                char fname[512];
-                fs0 = new fluid_solver<{0}>(
-                        src_simname,
-                        nx, ny, nz,
-                        dkx, dky, dkz);
-                fs1 = new fluid_solver<{0}>(
-                        dst_simname,
-                        dst_nx, dst_ny, dst_nz,
-                        dst_dkx, dst_dky, dst_dkz);
-                fs0->iteration = iteration;
-                fs1->iteration = 0;
-                DEBUG_MSG("about to read field\\n");
-                fs0->read('v', 'c');
-                DEBUG_MSG("field read, about to copy data\\n");
-                double a, b;
-                fs0->compute_velocity(fs0->cvorticity);
-                a = 0.5*fs0->autocorrel(fs0->cvelocity);
-                b = 0.5*fs0->autocorrel(fs0->cvorticity);
-                DEBUG_MSG("old field %d %g %g\\n", fs0->iteration, a, b);
-                copy_complex_array<{0}>(fs0->cd, fs0->cvorticity,
-                                        fs1->cd, fs1->cvorticity,
-                                        3);
-                DEBUG_MSG("data copied, about to write new field\\n");
-                fs1->write('v', 'c');
-                DEBUG_MSG("finished writing\\n");
-                fs1->compute_velocity(fs1->cvorticity);
-                a = 0.5*fs1->autocorrel(fs1->cvelocity);
-                b = 0.5*fs1->autocorrel(fs1->cvorticity);
-                DEBUG_MSG("new field %d %g %g\\n", fs1->iteration, a, b);
-                //endcpp
-                """.format(self.C_dtype)
-        self.fluid_end += """
-                //begincpp
-                delete fs0;
-                delete fs1;
-                //endcpp
-                """
-        return None
-    def specific_parser_arguments(
-            self,
-            parser):
-        _fluid_particle_base.specific_parser_arguments(self, parser)
-        parser.add_argument(
-                '-m',
-                type = int,
-                dest = 'm',
-                default = 32,
-                metavar = 'M',
-                help = 'resize from N to M')
-        parser.add_argument(
-                '--src_wd',
-                type = str,
-                dest = 'src_work_dir',
-                required = True)
-        parser.add_argument(
-                '--src_iteration',
-                type = int,
-                dest = 'src_iteration',
-                required = True)
-        return None
-    def launch(
-            self,
-            args = [],
-            **kwargs):
-        opt = self.prepare_launch(args)
-        cmd_line_pars = vars(opt)
-        for k in ['dst_nx', 'dst_ny', 'dst_nz']:
-            if type(cmd_line_pars[k]) == type(None):
-                cmd_line_pars[k] = opt.m
-        # the 3 dst_ni have been updated in opt itself at this point
-        # I'm not sure if this code is future-proof...
-        self.parameters['niter_todo'] = 0
-        self.pars_from_namespace(opt)
-        src_file = os.path.join(
-                os.path.realpath(opt.src_work_dir),
-                opt.src_simname + '_cvorticity_i{0:0>5x}'.format(opt.src_iteration))
-        read_file = os.path.join(
-                self.work_dir,
-                opt.src_simname + '_cvorticity_i{0:0>5x}'.format(opt.src_iteration))
-        self.write_par(iter0 = opt.src_iteration)
-        if not os.path.exists(read_file):
-            os.symlink(src_file, read_file)
-        self.run(ncpu = opt.ncpu,
-                 hours = opt.minutes // 60,
-                 minutes = opt.minutes % 60)
-        return None
-
diff --git a/bfps/NSManyParticles.py b/bfps/NSManyParticles.py
deleted file mode 100644
index 03f7345f61b27299bd2da60ea0c4d44924112837..0000000000000000000000000000000000000000
--- a/bfps/NSManyParticles.py
+++ /dev/null
@@ -1,92 +0,0 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
-
-
-
-import bfps
-
-class NSManyParticles(bfps.NavierStokes):
-    def specific_parser_arguments(
-            self,
-            parser):
-        bfps.NavierStokes.specific_parser_arguments(self, parser)
-        parser.add_argument(
-                '--particle-class',
-                default = 'rFFTW_distributed_particles',
-                dest = 'particle_class',
-                type = str)
-        parser.add_argument(
-                '--interpolator-class',
-                default = 'rFFTW_interpolator',
-                dest = 'interpolator_class',
-                type = str)
-        parser.add_argument('--neighbours',
-                type = int,
-                dest = 'neighbours',
-                default = 3)
-        parser.add_argument('--smoothness',
-                type = int,
-                dest = 'smoothness',
-                default = 2)
-        return None
-    def launch(
-            self,
-            args = [],
-            **kwargs):
-        opt = self.prepare_launch(args = args)
-        self.fill_up_fluid_code()
-        if type(opt.nparticles) == int:
-            if opt.nparticles > 0:
-                self.add_3D_rFFTW_field(
-                        name = 'rFFTW_acc')
-                interp_list = []
-                for n in range(1, opt.neighbours):
-                    interp_list.append('Lagrange_n{0}'.format(n))
-                    self.add_interpolator(
-                            interp_type = 'Lagrange',
-                            name = interp_list[-1],
-                            neighbours = n,
-                            class_name =  opt.interpolator_class)
-                    for m in range(1, opt.smoothness):
-                        interp_list.append('spline_n{0}m{1}'.format(n, m))
-                        self.add_interpolator(
-                                interp_type = 'spline',
-                                name = interp_list[-1],
-                                neighbours = n,
-                                smoothness = m,
-                                class_name =  opt.interpolator_class)
-                self.add_particles(
-                        integration_steps = 2,
-                        interpolator = interp_list,
-                        acc_name = 'rFFTW_acc',
-                        class_name = opt.particle_class)
-                self.add_particles(
-                        integration_steps = 4,
-                        interpolator = interp_list,
-                        acc_name = 'rFFTW_acc',
-                        class_name = opt.particle_class)
-        self.finalize_code()
-        self.launch_jobs(opt = opt)
-        return None
-
diff --git a/bfps/NSVorticityEquation.py b/bfps/NSVorticityEquation.py
deleted file mode 100644
index 5f87097fefbb56f731a75597395d42423fc17ba6..0000000000000000000000000000000000000000
--- a/bfps/NSVorticityEquation.py
+++ /dev/null
@@ -1,864 +0,0 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
-
-
-
-import sys
-import os
-import numpy as np
-import h5py
-import argparse
-
-import bfps
-import bfps.tools
-from bfps._code import _code
-from bfps._fluid_base import _fluid_particle_base
-
-class NSVorticityEquation(_fluid_particle_base):
-    def __init__(
-            self,
-            name = 'NSVorticityEquation-v' + bfps.__version__,
-            work_dir = './',
-            simname = 'test',
-            fluid_precision = 'single',
-            fftw_plan_rigor = 'FFTW_MEASURE',
-            use_fftw_wisdom = True):
-        """
-            This code uses checkpoints for DNS restarts, and it can be stopped
-            by creating the file "stop_<simname>" in the working directory.
-            For postprocessing of field snapshots, consider creating a separate
-            HDF5 file (from the python wrapper) which contains links to all the
-            different snapshots.
-        """
-        self.fftw_plan_rigor = fftw_plan_rigor
-        _fluid_particle_base.__init__(
-                self,
-                name = name + '-' + fluid_precision,
-                work_dir = work_dir,
-                simname = simname,
-                dtype = fluid_precision,
-                use_fftw_wisdom = use_fftw_wisdom)
-        self.parameters['nu'] = float(0.1)
-        self.parameters['fmode'] = 1
-        self.parameters['famplitude'] = float(0.5)
-        self.parameters['fk0'] = float(2.0)
-        self.parameters['fk1'] = float(4.0)
-        self.parameters['forcing_type'] = 'linear'
-        self.parameters['histogram_bins'] = int(256)
-        self.parameters['max_velocity_estimate'] = float(1)
-        self.parameters['max_vorticity_estimate'] = float(1)
-        self.parameters['checkpoints_per_file'] = int(1)
-        self.file_datasets_grow = """
-                //begincpp
-                hid_t group;
-                group = H5Gopen(stat_file, "/statistics", H5P_DEFAULT);
-                H5Ovisit(group, H5_INDEX_NAME, H5_ITER_NATIVE, grow_statistics_dataset, NULL);
-                H5Gclose(group);
-                //endcpp
-                """
-        self.style = {}
-        self.statistics = {}
-        self.fluid_output = """
-                fs->io_checkpoint(false);
-                """
-        # vorticity_equation specific things
-        self.includes += '#include "vorticity_equation.hpp"\n'
-        self.store_kspace = """
-                //begincpp
-                if (myrank == 0 && iteration == 0)
-                {
-                    TIMEZONE("fluid_base::store_kspace");
-                    hsize_t dims[4];
-                    hid_t space, dset;
-                    // store kspace information
-                    dset = H5Dopen(stat_file, "/kspace/kshell", H5P_DEFAULT);
-                    space = H5Dget_space(dset);
-                    H5Sget_simple_extent_dims(space, dims, NULL);
-                    H5Sclose(space);
-                    if (fs->kk->nshells != dims[0])
-                    {
-                        DEBUG_MSG(
-                            "ERROR: computed nshells %d not equal to data file nshells %d\\n",
-                            fs->kk->nshells, dims[0]);
-                    }
-                    H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->kshell.front());
-                    H5Dclose(dset);
-                    dset = H5Dopen(stat_file, "/kspace/nshell", H5P_DEFAULT);
-                    H5Dwrite(dset, H5T_NATIVE_INT64, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->nshell.front());
-                    H5Dclose(dset);
-                    dset = H5Dopen(stat_file, "/kspace/kM", H5P_DEFAULT);
-                    H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->kM);
-                    H5Dclose(dset);
-                    dset = H5Dopen(stat_file, "/kspace/dk", H5P_DEFAULT);
-                    H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kk->dk);
-                    H5Dclose(dset);
-                }
-                //endcpp
-                """
-        return None
-    def add_particles(
-            self,
-            integration_steps = 2,
-            neighbours = 1,
-            smoothness = 1):
-        assert(integration_steps > 0 and integration_steps < 6)
-        self.particle_species = 1
-        self.parameters['tracers0_integration_steps'] = int(integration_steps)
-        self.parameters['tracers0_neighbours'] = int(neighbours)
-        self.parameters['tracers0_smoothness'] = int(smoothness)
-        self.parameters['tracers0_interpolator'] = 'spline'
-        self.particle_includes += """
-                #include "particles/particles_system_builder.hpp"
-                #include "particles/particles_output_hdf5.hpp"
-                """
-        ## initialize
-        self.particle_start += """
-            DEBUG_MSG(
-                    "current fname is %s\\n and iteration is %d",
-                    fs->get_current_fname().c_str(),
-                    fs->iteration);
-            std::unique_ptr<abstract_particles_system<long long int, double>> ps = particles_system_builder(
-                    fs->cvelocity,              // (field object)
-                    fs->kk,                     // (kspace object, contains dkx, dky, dkz)
-                    tracers0_integration_steps, // to check coherency between parameters and hdf input file (nb rhs)
-                    (long long int)nparticles,                 // to check coherency between parameters and hdf input file
-                    fs->get_current_fname(),    // particles input filename
-                    std::string("/tracers0/state/") + std::to_string(fs->iteration), // dataset name for initial input
-                    std::string("/tracers0/rhs/")  + std::to_string(fs->iteration), // dataset name for initial input
-                    tracers0_neighbours,        // parameter (interpolation no neighbours)
-                    tracers0_smoothness,        // parameter
-                    MPI_COMM_WORLD,
-                    fs->iteration+1);
-            particles_output_hdf5<long long int, double,3,3> particles_output_writer_mpi(
-                        MPI_COMM_WORLD,
-                        "tracers0",
-                        nparticles,
-                        tracers0_integration_steps);
-                    """
-        self.particle_loop += """
-                fs->compute_velocity(fs->cvorticity);
-                fs->cvelocity->ift();
-                ps->completeLoop(dt);
-                """
-        self.particle_output = """
-                {
-                    particles_output_writer_mpi.open_file(fs->get_current_fname());
-                    particles_output_writer_mpi.save(ps->getParticlesPositions(),
-                                                     ps->getParticlesRhs(),
-                                                     ps->getParticlesIndexes(),
-                                                     ps->getLocalNbParticles(),
-                                                     fs->iteration);
-                    particles_output_writer_mpi.close_file();
-                }
-                           """
-        self.particle_end += 'ps.release();\n'
-        return None
-    def create_stat_output(
-            self,
-            dset_name,
-            data_buffer,
-            data_type = 'H5T_NATIVE_DOUBLE',
-            size_setup = None,
-            close_spaces = True):
-        new_stat_output_txt = 'Cdset = H5Dopen(stat_file, "{0}", H5P_DEFAULT);\n'.format(dset_name)
-        if not type(size_setup) == type(None):
-            new_stat_output_txt += (
-                    size_setup +
-                    'wspace = H5Dget_space(Cdset);\n' +
-                    'ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);\n' +
-                    'mspace = H5Screate_simple(ndims, count, NULL);\n' +
-                    'H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);\n')
-        new_stat_output_txt += ('H5Dwrite(Cdset, {0}, mspace, wspace, H5P_DEFAULT, {1});\n' +
-                                'H5Dclose(Cdset);\n').format(data_type, data_buffer)
-        if close_spaces:
-            new_stat_output_txt += ('H5Sclose(mspace);\n' +
-                                    'H5Sclose(wspace);\n')
-        return new_stat_output_txt
-    def write_fluid_stats(self):
-        self.fluid_includes += '#include <cmath>\n'
-        self.fluid_includes += '#include "fftw_tools.hpp"\n'
-        self.stat_src += """
-                //begincpp
-                hid_t stat_group;
-                if (myrank == 0)
-                    stat_group = H5Gopen(stat_file, "statistics", H5P_DEFAULT);
-                fs->compute_velocity(fs->cvorticity);
-                *tmp_vec_field = fs->cvelocity->get_cdata();
-                tmp_vec_field->compute_stats(
-                    fs->kk,
-                    stat_group,
-                    "velocity",
-                    fs->iteration / niter_stat,
-                    max_velocity_estimate/sqrt(3));
-                //endcpp
-                """
-        self.stat_src += """
-                //begincpp
-                *tmp_vec_field = fs->cvorticity->get_cdata();
-                tmp_vec_field->compute_stats(
-                    fs->kk,
-                    stat_group,
-                    "vorticity",
-                    fs->iteration / niter_stat,
-                    max_vorticity_estimate/sqrt(3));
-                //endcpp
-                """
-        self.stat_src += """
-                //begincpp
-                if (myrank == 0)
-                    H5Gclose(stat_group);
-                if (myrank == 0)
-                {{
-                    hid_t Cdset, wspace, mspace;
-                    int ndims;
-                    hsize_t count[4], offset[4], dims[4];
-                    offset[0] = fs->iteration/niter_stat;
-                    offset[1] = 0;
-                    offset[2] = 0;
-                    offset[3] = 0;
-                //endcpp
-                """.format(self.C_dtype)
-        if self.dtype == np.float32:
-            field_H5T = 'H5T_NATIVE_FLOAT'
-        elif self.dtype == np.float64:
-            field_H5T = 'H5T_NATIVE_DOUBLE'
-        self.stat_src += self.create_stat_output(
-                '/statistics/xlines/velocity',
-                'fs->rvelocity->get_rdata()',
-                data_type = field_H5T,
-                size_setup = """
-                    count[0] = 1;
-                    count[1] = nx;
-                    count[2] = 3;
-                    """,
-                close_spaces = False)
-        self.stat_src += self.create_stat_output(
-                '/statistics/xlines/vorticity',
-                'fs->rvorticity->get_rdata()',
-                data_type = field_H5T)
-        self.stat_src += '}\n'
-        ## checkpoint
-        self.stat_src += """
-                //begincpp
-                if (myrank == 0)
-                {
-                    std::string fname = (
-                        std::string("stop_") +
-                        std::string(simname));
-                    {
-                        struct stat file_buffer;
-                        stop_code_now = (stat(fname.c_str(), &file_buffer) == 0);
-                    }
-                }
-                MPI_Bcast(&stop_code_now, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
-                //endcpp
-                """
-        return None
-    def fill_up_fluid_code(self):
-        self.fluid_includes += '#include <cstring>\n'
-        self.fluid_variables += (
-                'vorticity_equation<{0}, FFTW> *fs;\n'.format(self.C_dtype) +
-                'field<{0}, FFTW, THREE> *tmp_vec_field;\n'.format(self.C_dtype) +
-                'field<{0}, FFTW, ONE> *tmp_scal_field;\n'.format(self.C_dtype))
-        self.fluid_definitions += """
-                    typedef struct {{
-                        {0} re;
-                        {0} im;
-                    }} tmp_complex_type;
-                    """.format(self.C_dtype)
-        self.write_fluid_stats()
-        if self.dtype == np.float32:
-            field_H5T = 'H5T_NATIVE_FLOAT'
-        elif self.dtype == np.float64:
-            field_H5T = 'H5T_NATIVE_DOUBLE'
-        self.variables += 'int checkpoint;\n'
-        self.variables += 'bool stop_code_now;\n'
-        self.read_checkpoint = """
-                //begincpp
-                if (myrank == 0)
-                {
-                    hid_t dset = H5Dopen(stat_file, "checkpoint", H5P_DEFAULT);
-                    H5Dread(
-                        dset,
-                        H5T_NATIVE_INT,
-                        H5S_ALL,
-                        H5S_ALL,
-                        H5P_DEFAULT,
-                        &checkpoint);
-                    H5Dclose(dset);
-                }
-                MPI_Bcast(&checkpoint, 1, MPI_INT, 0, MPI_COMM_WORLD);
-                fs->checkpoint = checkpoint;
-                //endcpp
-        """
-        self.store_checkpoint = """
-                //begincpp
-                checkpoint = fs->checkpoint;
-                if (myrank == 0)
-                {
-                    hid_t dset = H5Dopen(stat_file, "checkpoint", H5P_DEFAULT);
-                    H5Dwrite(
-                        dset,
-                        H5T_NATIVE_INT,
-                        H5S_ALL,
-                        H5S_ALL,
-                        H5P_DEFAULT,
-                        &checkpoint);
-                    H5Dclose(dset);
-                }
-                //endcpp
-        """
-        self.fluid_start += """
-                //begincpp
-                char fname[512];
-                fs = new vorticity_equation<{0}, FFTW>(
-                        simname,
-                        nx, ny, nz,
-                        dkx, dky, dkz,
-                        {1});
-                tmp_vec_field = new field<{0}, FFTW, THREE>(
-                        nx, ny, nz,
-                        MPI_COMM_WORLD,
-                        {1});
-                tmp_scal_field = new field<{0}, FFTW, ONE>(
-                        nx, ny, nz,
-                        MPI_COMM_WORLD,
-                        {1});
-                fs->checkpoints_per_file = checkpoints_per_file;
-                fs->nu = nu;
-                fs->fmode = fmode;
-                fs->famplitude = famplitude;
-                fs->fk0 = fk0;
-                fs->fk1 = fk1;
-                strncpy(fs->forcing_type, forcing_type, 128);
-                fs->iteration = iteration;
-                {2}
-                fs->cvorticity->real_space_representation = false;
-                fs->io_checkpoint();
-                //endcpp
-                """.format(
-                        self.C_dtype,
-                        self.fftw_plan_rigor,
-                        self.read_checkpoint)
-        self.fluid_start += self.store_kspace
-        self.fluid_start += 'stop_code_now = false;\n'
-        self.fluid_loop = 'fs->step(dt);\n'
-        self.fluid_loop += ('if (fs->iteration % niter_out == 0)\n{\n' +
-                            self.fluid_output +
-                            self.particle_output +
-                            self.store_checkpoint +
-                            '\n}\n' +
-                            'if (stop_code_now){\n' +
-                            'iteration = fs->iteration;\n' +
-                            'break;\n}\n')
-        self.fluid_end = ('if (fs->iteration % niter_out != 0)\n{\n' +
-                          self.fluid_output +
-                          self.particle_output +
-                          self.store_checkpoint +
-                          'DEBUG_MSG("checkpoint value is %d\\n", checkpoint);\n' +
-                          '\n}\n' +
-                          'delete fs;\n' +
-                          'delete tmp_vec_field;\n' +
-                          'delete tmp_scal_field;\n')
-        return None
-    def get_postprocess_file_name(self):
-        return os.path.join(self.work_dir, self.simname + '_postprocess.h5')
-    def get_postprocess_file(self):
-        return h5py.File(self.get_postprocess_file_name(), 'r')
-    def compute_statistics(self, iter0 = 0, iter1 = None):
-        """Run basic postprocessing on raw data.
-        The energy spectrum :math:`E(t, k)` and the enstrophy spectrum
-        :math:`\\frac{1}{2}\omega^2(t, k)` are computed from the
-
-        .. math::
-
-            \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{u_i} \\hat{u_j}^*, \\hskip .5cm
-            \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{\omega_i} \\hat{\\omega_j}^*
-
-        tensors, and the enstrophy spectrum is also used to
-        compute the dissipation :math:`\\varepsilon(t)`.
-        These basic quantities are stored in a newly created HDF5 file,
-        ``simname_postprocess.h5``.
-        """
-        if len(list(self.statistics.keys())) > 0:
-            return None
-        self.read_parameters()
-        with self.get_data_file() as data_file:
-            if 'moments' not in data_file['statistics'].keys():
-                return None
-            iter0 = min((data_file['statistics/moments/velocity'].shape[0] *
-                         self.parameters['niter_stat']-1),
-                        iter0)
-            if type(iter1) == type(None):
-                iter1 = data_file['iteration'].value
-            else:
-                iter1 = min(data_file['iteration'].value, iter1)
-            ii0 = iter0 // self.parameters['niter_stat']
-            ii1 = iter1 // self.parameters['niter_stat']
-            self.statistics['kshell'] = data_file['kspace/kshell'].value
-            self.statistics['kM'] = data_file['kspace/kM'].value
-            self.statistics['dk'] = data_file['kspace/dk'].value
-            computation_needed = True
-            pp_file = h5py.File(self.get_postprocess_file_name(), 'a')
-            if 'ii0' in pp_file.keys():
-                computation_needed =  not (ii0 == pp_file['ii0'].value and
-                                           ii1 == pp_file['ii1'].value)
-                if computation_needed:
-                    for k in pp_file.keys():
-                        del pp_file[k]
-            if computation_needed:
-                pp_file['iter0'] = iter0
-                pp_file['iter1'] = iter1
-                pp_file['ii0'] = ii0
-                pp_file['ii1'] = ii1
-                pp_file['t'] = (self.parameters['dt']*
-                                self.parameters['niter_stat']*
-                                (np.arange(ii0, ii1+1).astype(np.float)))
-                pp_file['energy(t, k)'] = (
-                    data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 0, 0] +
-                    data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 1, 1] +
-                    data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 2, 2])/2
-                pp_file['enstrophy(t, k)'] = (
-                    data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] +
-                    data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] +
-                    data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2
-                pp_file['vel_max(t)'] = data_file['statistics/moments/velocity']  [ii0:ii1+1, 9, 3]
-                pp_file['renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2
-            for k in ['t',
-                      'energy(t, k)',
-                      'enstrophy(t, k)',
-                      'vel_max(t)',
-                      'renergy(t)']:
-                if k in pp_file.keys():
-                    self.statistics[k] = pp_file[k].value
-            self.compute_time_averages()
-        return None
-    def compute_time_averages(self):
-        """Compute easy stats.
-
-        Further computation of statistics based on the contents of
-        ``simname_postprocess.h5``.
-        Standard quantities are as follows
-        (consistent with [Ishihara]_):
-
-        .. math::
-
-            U_{\\textrm{int}}(t) = \\sqrt{\\frac{2E(t)}{3}}, \\hskip .5cm
-            L_{\\textrm{int}}(t) = \\frac{\pi}{2U_{int}^2(t)} \\int \\frac{dk}{k} E(t, k), \\hskip .5cm
-            T_{\\textrm{int}}(t) =
-            \\frac{L_{\\textrm{int}}(t)}{U_{\\textrm{int}}(t)}
-
-            \\eta_K = \\left(\\frac{\\nu^3}{\\varepsilon}\\right)^{1/4}, \\hskip .5cm
-            \\tau_K = \\left(\\frac{\\nu}{\\varepsilon}\\right)^{1/2}, \\hskip .5cm
-            \\lambda = \\sqrt{\\frac{15 \\nu U_{\\textrm{int}}^2}{\\varepsilon}}
-
-            Re = \\frac{U_{\\textrm{int}} L_{\\textrm{int}}}{\\nu}, \\hskip
-            .5cm
-            R_{\\lambda} = \\frac{U_{\\textrm{int}} \\lambda}{\\nu}
-
-        .. [Ishihara] T. Ishihara et al,
-                      *Small-scale statistics in high-resolution direct numerical
-                      simulation of turbulence: Reynolds number dependence of
-                      one-point velocity gradient statistics*.
-                      J. Fluid Mech.,
-                      **592**, 335-366, 2007
-        """
-        for key in ['energy', 'enstrophy']:
-            self.statistics[key + '(t)'] = (self.statistics['dk'] *
-                                            np.sum(self.statistics[key + '(t, k)'], axis = 1))
-        self.statistics['Uint(t)'] = np.sqrt(2*self.statistics['energy(t)'] / 3)
-        self.statistics['Lint(t)'] = ((self.statistics['dk']*np.pi /
-                                       (2*self.statistics['Uint(t)']**2)) *
-                                      np.nansum(self.statistics['energy(t, k)'] /
-                                                self.statistics['kshell'][None, :], axis = 1))
-        for key in ['energy',
-                    'enstrophy',
-                    'vel_max',
-                    'Uint',
-                    'Lint']:
-            if key + '(t)' in self.statistics.keys():
-                self.statistics[key] = np.average(self.statistics[key + '(t)'], axis = 0)
-        for suffix in ['', '(t)']:
-            self.statistics['diss'    + suffix] = (self.parameters['nu'] *
-                                                   self.statistics['enstrophy' + suffix]*2)
-            self.statistics['etaK'    + suffix] = (self.parameters['nu']**3 /
-                                                   self.statistics['diss' + suffix])**.25
-            self.statistics['tauK'    + suffix] =  (self.parameters['nu'] /
-                                                    self.statistics['diss' + suffix])**.5
-            self.statistics['Re' + suffix] = (self.statistics['Uint' + suffix] *
-                                              self.statistics['Lint' + suffix] /
-                                              self.parameters['nu'])
-            self.statistics['lambda' + suffix] = (15 * self.parameters['nu'] *
-                                                  self.statistics['Uint' + suffix]**2 /
-                                                  self.statistics['diss' + suffix])**.5
-            self.statistics['Rlambda' + suffix] = (self.statistics['Uint' + suffix] *
-                                                   self.statistics['lambda' + suffix] /
-                                                   self.parameters['nu'])
-            self.statistics['kMeta' + suffix] = (self.statistics['kM'] *
-                                                 self.statistics['etaK' + suffix])
-            if self.parameters['dealias_type'] == 1:
-                self.statistics['kMeta' + suffix] *= 0.8
-        self.statistics['Tint'] = self.statistics['Lint'] / self.statistics['Uint']
-        self.statistics['Taylor_microscale'] = self.statistics['lambda']
-        return None
-    def set_plt_style(
-            self,
-            style = {'dashes' : (None, None)}):
-        self.style.update(style)
-        return None
-    def convert_complex_from_binary(
-            self,
-            field_name = 'vorticity',
-            iteration = 0,
-            file_name = None):
-        """read the Fourier representation of a vector field.
-
-        Read the binary file containing iteration ``iteration`` of the
-        field ``field_name``, and write it in a ``.h5`` file.
-        """
-        data = np.memmap(
-                os.path.join(self.work_dir,
-                             self.simname + '_{0}_i{1:0>5x}'.format('c' + field_name, iteration)),
-                dtype = self.ctype,
-                mode = 'r',
-                shape = (self.parameters['ny'],
-                         self.parameters['nz'],
-                         self.parameters['nx']//2+1,
-                         3))
-        if type(file_name) == type(None):
-            file_name = self.simname + '_{0}_i{1:0>5x}.h5'.format('c' + field_name, iteration)
-            file_name = os.path.join(self.work_dir, file_name)
-        f = h5py.File(file_name, 'a')
-        f[field_name + '/complex/{0}'.format(iteration)] = data
-        f.close()
-        return None
-    def write_par(
-            self,
-            iter0 = 0,
-            particle_ic = None):
-        _fluid_particle_base.write_par(self, iter0 = iter0)
-        with h5py.File(self.get_data_file_name(), 'r+') as ofile:
-            kspace = self.get_kspace()
-            nshells = kspace['nshell'].shape[0]
-            vec_stat_datasets = ['velocity', 'vorticity']
-            scal_stat_datasets = []
-            for k in vec_stat_datasets:
-                time_chunk = 2**20//(8*3*self.parameters['nx']) # FIXME: use proper size of self.dtype
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/xlines/' + k,
-                                     (1, self.parameters['nx'], 3),
-                                     chunks = (time_chunk, self.parameters['nx'], 3),
-                                     maxshape = (None, self.parameters['nx'], 3),
-                                     dtype = self.dtype)
-            for k in vec_stat_datasets:
-                time_chunk = 2**20//(8*3*3*nshells)
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/spectra/' + k + '_' + k,
-                                     (1, nshells, 3, 3),
-                                     chunks = (time_chunk, nshells, 3, 3),
-                                     maxshape = (None, nshells, 3, 3),
-                                     dtype = np.float64)
-                time_chunk = 2**20//(8*4*10)
-                time_chunk = max(time_chunk, 1)
-                a = ofile.create_dataset('statistics/moments/' + k,
-                                     (1, 10, 4),
-                                     chunks = (time_chunk, 10, 4),
-                                     maxshape = (None, 10, 4),
-                                     dtype = np.float64)
-                time_chunk = 2**20//(8*4*self.parameters['histogram_bins'])
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/histograms/' + k,
-                                     (1,
-                                      self.parameters['histogram_bins'],
-                                      4),
-                                     chunks = (time_chunk,
-                                               self.parameters['histogram_bins'],
-                                               4),
-                                     maxshape = (None,
-                                                 self.parameters['histogram_bins'],
-                                                 4),
-                                     dtype = np.int64)
-            ofile['checkpoint'] = int(0)
-        if self.particle_species == 0:
-            return None
-
-        if type(particle_ic) == type(None):
-            pbase_shape = (self.parameters['nparticles'],)
-            number_of_particles = self.parameters['nparticles']
-        else:
-            pbase_shape = particle_ic.shape[:-1]
-            assert(particle_ic.shape[-1] == 3)
-            number_of_particles = 1
-            for val in pbase_shape[1:]:
-                number_of_particles *= val
-        with h5py.File(self.get_checkpoint_0_fname(), 'a') as ofile:
-            s = 0
-            ofile.create_group('tracers{0}'.format(s))
-            ofile.create_group('tracers{0}/rhs'.format(s))
-            ofile.create_group('tracers{0}/state'.format(s))
-            ofile['tracers{0}/rhs'.format(s)].create_dataset(
-                    '0',
-                    shape = (
-                        (self.parameters['tracers{0}_integration_steps'.format(s)],) +
-                        pbase_shape +
-                        (3,)),
-                    dtype = np.float)
-            ofile['tracers{0}/state'.format(s)].create_dataset(
-                    '0',
-                    shape = (
-                        pbase_shape +
-                        (3,)),
-                    dtype = np.float)
-        return None
-    def specific_parser_arguments(
-            self,
-            parser):
-        _fluid_particle_base.specific_parser_arguments(self, parser)
-        parser.add_argument(
-                '--src-wd',
-                type = str,
-                dest = 'src_work_dir',
-                default = '')
-        parser.add_argument(
-                '--src-simname',
-                type = str,
-                dest = 'src_simname',
-                default = '')
-        parser.add_argument(
-                '--src-iteration',
-                type = int,
-                dest = 'src_iteration',
-                default = 0)
-        parser.add_argument(
-               '--njobs',
-               type = int, dest = 'njobs',
-               default = 1)
-        parser.add_argument(
-               '--kMeta',
-               type = float,
-               dest = 'kMeta',
-               default = 2.0)
-        parser.add_argument(
-               '--dtfactor',
-               type = float,
-               dest = 'dtfactor',
-               default = 0.5,
-               help = 'dt is computed as DTFACTOR / N')
-        parser.add_argument(
-               '--particle-rand-seed',
-               type = int,
-               dest = 'particle_rand_seed',
-               default = None)
-        parser.add_argument(
-               '--pclouds',
-               type = int,
-               dest = 'pclouds',
-               default = 1,
-               help = ('number of particle clouds. Particle "clouds" '
-                       'consist of particles distributed according to '
-                       'pcloud-type.'))
-        parser.add_argument(
-                '--pcloud-type',
-                choices = ['random-cube',
-                           'regular-cube'],
-                dest = 'pcloud_type',
-                default = 'random-cube')
-        parser.add_argument(
-               '--particle-cloud-size',
-               type = float,
-               dest = 'particle_cloud_size',
-               default = 2*np.pi)
-        parser.add_argument(
-                '--neighbours',
-                type = int,
-                dest = 'neighbours',
-                default = 1)
-        parser.add_argument(
-                '--smoothness',
-                type = int,
-                dest = 'smoothness',
-                default = 1)
-        return None
-    def prepare_launch(
-            self,
-            args = []):
-        """Set up reasonable parameters.
-
-        With the default Lundgren forcing applied in the band [2, 4],
-        we can estimate the dissipation, therefore we can estimate
-        :math:`k_M \\eta_K` and constrain the viscosity.
-
-        In brief, the command line parameter :math:`k_M \\eta_K` is
-        used in the following formula for :math:`\\nu` (:math:`N` is the
-        number of real space grid points per coordinate):
-
-        .. math::
-
-            \\nu = \\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/3}
-
-        With this choice, the average dissipation :math:`\\varepsilon`
-        will be close to 0.4, and the integral scale velocity will be
-        close to 0.77, yielding the approximate value for the Taylor
-        microscale and corresponding Reynolds number:
-
-        .. math::
-
-            \\lambda \\approx 4.75\\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/6}, \\hskip .5in
-            R_\\lambda \\approx 3.7 \\left(\\frac{N}{2 k_M \\eta_K} \\right)^{4/6}
-
-        """
-        opt = _code.prepare_launch(self, args = args)
-        self.parameters['nu'] = (opt.kMeta * 2 / opt.n)**(4./3)
-        self.parameters['dt'] = (opt.dtfactor / opt.n)
-        # custom famplitude for 288 and 576
-        if opt.n == 288:
-            self.parameters['famplitude'] = 0.45
-        elif opt.n == 576:
-            self.parameters['famplitude'] = 0.47
-        if ((self.parameters['niter_todo'] % self.parameters['niter_out']) != 0):
-            self.parameters['niter_out'] = self.parameters['niter_todo']
-        if len(opt.src_work_dir) == 0:
-            opt.src_work_dir = os.path.realpath(opt.work_dir)
-        self.pars_from_namespace(opt)
-        return opt
-    def launch(
-            self,
-            args = [],
-            **kwargs):
-        opt = self.prepare_launch(args = args)
-        if type(opt.nparticles) != type(None):
-            if opt.nparticles > 0:
-                self.name += '-particles'
-                self.add_particles(
-                    integration_steps = 4,
-                    neighbours = opt.neighbours,
-                    smoothness = opt.smoothness)
-        self.fill_up_fluid_code()
-        self.finalize_code()
-        self.launch_jobs(opt = opt, **kwargs)
-        return None
-    def get_checkpoint_0_fname(self):
-        return os.path.join(
-                    self.work_dir,
-                    self.simname + '_checkpoint_0.h5')
-    def generate_tracer_state(
-            self,
-            rseed = None,
-            iteration = 0,
-            species = 0,
-            write_to_file = False,
-            ncomponents = 3,
-            testing = False,
-            data = None):
-        if (type(data) == type(None)):
-            if not type(rseed) == type(None):
-                np.random.seed(rseed)
-            #point with problems: 5.37632864e+00,   6.10414710e+00,   6.25256493e+00]
-            data = np.zeros(self.parameters['nparticles']*ncomponents).reshape(-1, ncomponents)
-            data[:, :3] = np.random.random((self.parameters['nparticles'], 3))*2*np.pi
-        if testing:
-            #data[0] = np.array([3.26434, 4.24418, 3.12157])
-            data[:] = np.array([ 0.72086101,  2.59043666,  6.27501953])
-        with h5py.File(self.get_checkpoint_0_fname(), 'a') as data_file:
-            data_file['tracers{0}/state/0'.format(species)][:] = data
-        if write_to_file:
-            data.tofile(
-                    os.path.join(
-                        self.work_dir,
-                        "tracers{0}_state_i{1:0>5x}".format(species, iteration)))
-        return data
-    def launch_jobs(
-            self,
-            opt = None,
-            particle_initial_condition = None):
-        if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')):
-            # take care of fields' initial condition
-            if not os.path.exists(self.get_checkpoint_0_fname()):
-                f = h5py.File(self.get_checkpoint_0_fname(), 'w')
-                if len(opt.src_simname) > 0:
-                    source_cp = 0
-                    src_file = 'not_a_file'
-                    while True:
-                        src_file = os.path.join(
-                            os.path.realpath(opt.src_work_dir),
-                            opt.src_simname + '_checkpoint_{0}.h5'.format(source_cp))
-                        f0 = h5py.File(src_file, 'r')
-                        if '{0}'.format(opt.src_iteration) in f0['vorticity/complex'].keys():
-                            f0.close()
-                            break
-                        source_cp += 1
-                    f['vorticity/complex/{0}'.format(0)] = h5py.ExternalLink(
-                            src_file,
-                            'vorticity/complex/{0}'.format(opt.src_iteration))
-                else:
-                    data = self.generate_vector_field(
-                           write_to_file = False,
-                           spectra_slope = 2.0,
-                           amplitude = 0.05)
-                    f['vorticity/complex/{0}'.format(0)] = data
-                f.close()
-            # take care of particles' initial condition
-            if opt.pclouds > 1:
-                np.random.seed(opt.particle_rand_seed)
-                if opt.pcloud_type == 'random-cube':
-                    particle_initial_condition = (
-                        np.random.random((opt.pclouds, 1, 3))*2*np.pi +
-                        np.random.random((1, self.parameters['nparticles'], 3))*opt.particle_cloud_size)
-                elif opt.pcloud_type == 'regular-cube':
-                    onedarray = np.linspace(
-                            -opt.particle_cloud_size/2,
-                            opt.particle_cloud_size/2,
-                            self.parameters['nparticles'])
-                    particle_initial_condition = np.zeros(
-                            (opt.pclouds,
-                             self.parameters['nparticles'],
-                             self.parameters['nparticles'],
-                             self.parameters['nparticles'], 3),
-                            dtype = np.float64)
-                    particle_initial_condition[:] = \
-                        np.random.random((opt.pclouds, 1, 1, 1, 3))*2*np.pi
-                    particle_initial_condition[..., 0] += onedarray[None, None, None, :]
-                    particle_initial_condition[..., 1] += onedarray[None, None, :, None]
-                    particle_initial_condition[..., 2] += onedarray[None, :, None, None]
-            self.write_par(
-                    particle_ic = particle_initial_condition)
-            if self.parameters['nparticles'] > 0:
-                data = self.generate_tracer_state(
-                        species = 0,
-                        rseed = opt.particle_rand_seed,
-                        data = particle_initial_condition)
-                for s in range(1, self.particle_species):
-                    self.generate_tracer_state(species = s, data = data)
-        self.run(
-                nb_processes = opt.nb_processes,
-                nb_threads_per_process = opt.nb_threads_per_process,
-                njobs = opt.njobs,
-                hours = opt.minutes // 60,
-                minutes = opt.minutes % 60,
-                no_submit = opt.no_submit)
-        return None
-
-if __name__ == '__main__':
-    pass
-
diff --git a/bfps/NavierStokes.py b/bfps/NavierStokes.py
deleted file mode 100644
index 59fb907c4a79f73dec5b6a8cfcb06d99b0b584bb..0000000000000000000000000000000000000000
--- a/bfps/NavierStokes.py
+++ /dev/null
@@ -1,1213 +0,0 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
-
-
-
-import sys
-import os
-import numpy as np
-import h5py
-import argparse
-
-import bfps
-import bfps.tools
-from ._code import _code
-from ._fluid_base import _fluid_particle_base
-
-class NavierStokes(_fluid_particle_base):
-    """Objects of this class can be used to generate production DNS codes.
-    Any functionality that users require should be available through this class,
-    in the sense that they can implement whatever they need by simply inheriting
-    this class.
-    """
-    def __init__(
-            self,
-            name = 'NavierStokes-v' + bfps.__version__,
-            work_dir = './',
-            simname = 'test',
-            fluid_precision = 'single',
-            fftw_plan_rigor = 'FFTW_MEASURE',
-            frozen_fields = False,
-            use_fftw_wisdom = True,
-            QR_stats_on = False,
-            Lag_acc_stats_on = False):
-        self.QR_stats_on = QR_stats_on
-        self.Lag_acc_stats_on = Lag_acc_stats_on
-        self.frozen_fields = frozen_fields
-        self.fftw_plan_rigor = fftw_plan_rigor
-        _fluid_particle_base.__init__(
-                self,
-                name = name + '-' + fluid_precision,
-                work_dir = work_dir,
-                simname = simname,
-                dtype = fluid_precision,
-                use_fftw_wisdom = use_fftw_wisdom)
-        self.parameters['nu'] = 0.1
-        self.parameters['fmode'] = 1
-        self.parameters['famplitude'] = 0.5
-        self.parameters['fk0'] = 2.0
-        self.parameters['fk1'] = 4.0
-        self.parameters['forcing_type'] = 'linear'
-        self.parameters['histogram_bins'] = 256
-        self.parameters['max_velocity_estimate'] = 1.0
-        self.parameters['max_vorticity_estimate'] = 1.0
-        self.parameters['max_Lag_acc_estimate'] = 1.0
-        self.parameters['max_pressure_estimate'] = 1.0
-        self.parameters['QR2D_histogram_bins'] = 64
-        self.parameters['max_trS2_estimate'] = 1.0
-        self.parameters['max_Q_estimate'] = 1.0
-        self.parameters['max_R_estimate'] = 1.0
-        self.file_datasets_grow = """
-                //begincpp
-                hid_t group;
-                group = H5Gopen(stat_file, "/statistics", H5P_DEFAULT);
-                H5Ovisit(group, H5_INDEX_NAME, H5_ITER_NATIVE, grow_statistics_dataset, NULL);
-                H5Gclose(group);
-                //endcpp
-                """
-        self.style = {}
-        self.statistics = {}
-        self.fluid_output = 'fs->write(\'v\', \'c\');\n'
-        return None
-    def create_stat_output(
-            self,
-            dset_name,
-            data_buffer,
-            data_type = 'H5T_NATIVE_DOUBLE',
-            size_setup = None,
-            close_spaces = True):
-        new_stat_output_txt = 'Cdset = H5Dopen(stat_file, "{0}", H5P_DEFAULT);\n'.format(dset_name)
-        if not type(size_setup) == type(None):
-            new_stat_output_txt += (
-                    size_setup +
-                    'wspace = H5Dget_space(Cdset);\n' +
-                    'ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);\n' +
-                    'mspace = H5Screate_simple(ndims, count, NULL);\n' +
-                    'H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);\n')
-        new_stat_output_txt += ('H5Dwrite(Cdset, {0}, mspace, wspace, H5P_DEFAULT, {1});\n' +
-                                'H5Dclose(Cdset);\n').format(data_type, data_buffer)
-        if close_spaces:
-            new_stat_output_txt += ('H5Sclose(mspace);\n' +
-                                    'H5Sclose(wspace);\n')
-        return new_stat_output_txt
-    def write_fluid_stats(self):
-        self.fluid_includes += '#include <cmath>\n'
-        self.fluid_includes += '#include "fftw_tools.hpp"\n'
-        self.stat_src += """
-                //begincpp
-                hid_t stat_group;
-                if (myrank == 0)
-                    stat_group = H5Gopen(stat_file, "statistics", H5P_DEFAULT);
-                fs->compute_velocity(fs->cvorticity);
-                std::vector<double> max_estimate_vector;
-                max_estimate_vector.resize(4);
-                *tmp_vec_field = fs->cvelocity;
-                switch(fs->dealias_type)
-                {
-                    case 0:
-                        tmp_vec_field->compute_stats(
-                            kk_two_thirds,
-                            stat_group,
-                            "velocity",
-                            fs->iteration / niter_stat,
-                            max_velocity_estimate/sqrt(3));
-                        break;
-                    case 1:
-                        tmp_vec_field->compute_stats(
-                            kk_smooth,
-                            stat_group,
-                            "velocity",
-                            fs->iteration / niter_stat,
-                            max_velocity_estimate/sqrt(3));
-                        break;
-                }
-                //endcpp
-                """
-        if self.Lag_acc_stats_on:
-            self.stat_src += """
-                    //begincpp
-                    tmp_vec_field->real_space_representation = false;
-                    fs->compute_Lagrangian_acceleration(tmp_vec_field->get_cdata());
-                    switch(fs->dealias_type)
-                    {
-                        case 0:
-                            tmp_vec_field->compute_stats(
-                                kk_two_thirds,
-                                stat_group,
-                                "Lagrangian_acceleration",
-                                fs->iteration / niter_stat,
-                                max_Lag_acc_estimate);
-                            break;
-                        case 1:
-                            tmp_vec_field->compute_stats(
-                                kk_smooth,
-                                stat_group,
-                                "Lagrangian_acceleration",
-                                fs->iteration / niter_stat,
-                                max_Lag_acc_estimate);
-                            break;
-                    }
-                    tmp_scal_field->real_space_representation = false;
-                    fs->compute_velocity(fs->cvorticity);
-                    fs->ift_velocity();
-                    fs->compute_pressure(tmp_scal_field->get_cdata());
-                    switch(fs->dealias_type)
-                    {
-                        case 0:
-                            tmp_scal_field->compute_stats(
-                                kk_two_thirds,
-                                stat_group,
-                                "pressure",
-                                fs->iteration / niter_stat,
-                                max_pressure_estimate);
-                            break;
-                        case 1:
-                            tmp_scal_field->compute_stats(
-                                kk_smooth,
-                                stat_group,
-                                "pressure",
-                                fs->iteration / niter_stat,
-                                max_pressure_estimate);
-                            break;
-                    }
-                    //endcpp
-                    """
-        self.stat_src += """
-                //begincpp
-                *tmp_vec_field = fs->cvorticity;
-                switch(fs->dealias_type)
-                {
-                    case 0:
-                        tmp_vec_field->compute_stats(
-                            kk_two_thirds,
-                            stat_group,
-                            "vorticity",
-                            fs->iteration / niter_stat,
-                            max_vorticity_estimate/sqrt(3));
-                        break;
-                    case 1:
-                        tmp_vec_field->compute_stats(
-                            kk_smooth,
-                            stat_group,
-                            "vorticity",
-                            fs->iteration / niter_stat,
-                            max_vorticity_estimate/sqrt(3));
-                        break;
-                }
-                //endcpp
-                """
-        if self.QR_stats_on:
-            self.stat_src += """
-                //begincpp
-                double *trS2_Q_R_moments  = new double[10*3];
-                double *gradu_moments     = new double[10*9];
-                ptrdiff_t *hist_trS2_Q_R  = new ptrdiff_t[histogram_bins*3];
-                ptrdiff_t *hist_gradu     = new ptrdiff_t[histogram_bins*9];
-                ptrdiff_t *hist_QR2D      = new ptrdiff_t[QR2D_histogram_bins*QR2D_histogram_bins];
-                double trS2QR_max_estimates[3];
-                double gradu_max_estimates[9];
-                trS2QR_max_estimates[0] = max_trS2_estimate;
-                trS2QR_max_estimates[1] = max_Q_estimate;
-                trS2QR_max_estimates[2] = max_R_estimate;
-                std::fill_n(gradu_max_estimates, 9, sqrt(3*max_trS2_estimate));
-                fs->compute_gradient_statistics(
-                    fs->cvelocity,
-                    gradu_moments,
-                    trS2_Q_R_moments,
-                    hist_gradu,
-                    hist_trS2_Q_R,
-                    hist_QR2D,
-                    trS2QR_max_estimates,
-                    gradu_max_estimates,
-                    histogram_bins,
-                    QR2D_histogram_bins);
-                //endcpp
-                """
-        self.stat_src += """
-                //begincpp
-                if (myrank == 0)
-                    H5Gclose(stat_group);
-                if (fs->cd->myrank == 0)
-                {{
-                    hid_t Cdset, wspace, mspace;
-                    int ndims;
-                    hsize_t count[4], offset[4], dims[4];
-                    offset[0] = fs->iteration/niter_stat;
-                    offset[1] = 0;
-                    offset[2] = 0;
-                    offset[3] = 0;
-                //endcpp
-                """.format(self.C_dtype)
-        if self.dtype == np.float32:
-            field_H5T = 'H5T_NATIVE_FLOAT'
-        elif self.dtype == np.float64:
-            field_H5T = 'H5T_NATIVE_DOUBLE'
-        if self.QR_stats_on:
-            self.stat_src += self.create_stat_output(
-                    '/statistics/moments/trS2_Q_R',
-                    'trS2_Q_R_moments',
-                    size_setup ="""
-                        count[0] = 1;
-                        count[1] = 10;
-                        count[2] = 3;
-                        """)
-            self.stat_src += self.create_stat_output(
-                    '/statistics/moments/velocity_gradient',
-                    'gradu_moments',
-                    size_setup ="""
-                        count[0] = 1;
-                        count[1] = 10;
-                        count[2] = 3;
-                        count[3] = 3;
-                        """)
-            self.stat_src += self.create_stat_output(
-                    '/statistics/histograms/trS2_Q_R',
-                    'hist_trS2_Q_R',
-                    data_type = 'H5T_NATIVE_INT64',
-                    size_setup = """
-                        count[0] = 1;
-                        count[1] = histogram_bins;
-                        count[2] = 3;
-                        """)
-            self.stat_src += self.create_stat_output(
-                    '/statistics/histograms/velocity_gradient',
-                    'hist_gradu',
-                    data_type = 'H5T_NATIVE_INT64',
-                    size_setup = """
-                        count[0] = 1;
-                        count[1] = histogram_bins;
-                        count[2] = 3;
-                        count[3] = 3;
-                        """)
-            self.stat_src += self.create_stat_output(
-                    '/statistics/histograms/QR2D',
-                    'hist_QR2D',
-                    data_type = 'H5T_NATIVE_INT64',
-                    size_setup = """
-                        count[0] = 1;
-                        count[1] = QR2D_histogram_bins;
-                        count[2] = QR2D_histogram_bins;
-                        """)
-        self.stat_src += '}\n'
-        if self.QR_stats_on:
-            self.stat_src += """
-                //begincpp
-                delete[] trS2_Q_R_moments;
-                delete[] gradu_moments;
-                delete[] hist_trS2_Q_R;
-                delete[] hist_gradu;
-                delete[] hist_QR2D;
-                //endcpp
-                """
-        return None
-    def fill_up_fluid_code(self):
-        self.fluid_includes += '#include <cstring>\n'
-        self.fluid_variables += (
-                'fluid_solver<{0}> *fs;\n'.format(self.C_dtype) +
-                'field<{0}, FFTW, THREE> *tmp_vec_field;\n'.format(self.C_dtype) +
-                'field<{0}, FFTW, ONE> *tmp_scal_field;\n'.format(self.C_dtype) +
-                'kspace<FFTW, SMOOTH> *kk_smooth;\n' +
-                'kspace<FFTW, TWO_THIRDS> *kk_two_thirds;\n')
-        self.fluid_definitions += """
-                    typedef struct {{
-                        {0} re;
-                        {0} im;
-                    }} tmp_complex_type;
-                    """.format(self.C_dtype)
-        self.write_fluid_stats()
-        if self.dtype == np.float32:
-            field_H5T = 'H5T_NATIVE_FLOAT'
-        elif self.dtype == np.float64:
-            field_H5T = 'H5T_NATIVE_DOUBLE'
-        self.fluid_start += """
-                //begincpp
-                char fname[512];
-                fs = new fluid_solver<{0}>(
-                        simname,
-                        nx, ny, nz,
-                        dkx, dky, dkz,
-                        dealias_type,
-                        {1});
-                tmp_vec_field = new field<{0}, FFTW, THREE>(
-                        nx, ny, nz,
-                        MPI_COMM_WORLD,
-                        {1});
-                tmp_scal_field = new field<{0}, FFTW, ONE>(
-                        nx, ny, nz,
-                        MPI_COMM_WORLD,
-                        {1});
-                kk_smooth = new kspace<FFTW, SMOOTH>(
-                        tmp_vec_field->clayout,
-                        fs->dkx, fs->dky, fs->dkz);
-                kk_two_thirds = new kspace<FFTW, TWO_THIRDS>(
-                        tmp_vec_field->clayout,
-                        fs->dkx, fs->dky, fs->dkz);
-                fs->nu = nu;
-                fs->fmode = fmode;
-                fs->famplitude = famplitude;
-                fs->fk0 = fk0;
-                fs->fk1 = fk1;
-                strncpy(fs->forcing_type, forcing_type, 128);
-                fs->iteration = iteration;
-                fs->read('v', 'c');
-                //endcpp
-                """.format(self.C_dtype, self.fftw_plan_rigor, field_H5T)
-        self.fluid_start += self.store_kspace
-        if not self.frozen_fields:
-            self.fluid_loop = 'fs->step(dt);\n'
-        else:
-            self.fluid_loop = ''
-        self.fluid_loop += ('if (fs->iteration % niter_out == 0)\n{\n' +
-                            self.fluid_output + '\n}\n')
-        self.fluid_end = ('if (fs->iteration % niter_out != 0)\n{\n' +
-                          self.fluid_output + '\n}\n' +
-                          'delete fs;\n' +
-                          'delete tmp_vec_field;\n' +
-                          'delete tmp_scal_field;\n' +
-                          'delete kk_smooth;\n' +
-                          'delete kk_two_thirds;\n')
-        return None
-    def add_3D_rFFTW_field(
-            self,
-            name = 'rFFTW_acc'):
-        if self.dtype == np.float32:
-            FFTW = 'fftwf'
-        elif self.dtype == np.float64:
-            FFTW = 'fftw'
-        self.fluid_variables += '{0} *{1};\n'.format(self.C_dtype, name)
-        self.fluid_start += '{0} = {1}_alloc_real(2*fs->cd->local_size);\n'.format(name, FFTW)
-        self.fluid_end   += '{0}_free({1});\n'.format(FFTW, name)
-        return None
-    def add_interpolator(
-            self,
-            interp_type = 'spline',
-            neighbours = 1,
-            smoothness = 1,
-            name = 'field_interpolator',
-            field_name = 'fs->rvelocity',
-            class_name = 'rFFTW_interpolator'):
-        self.fluid_includes += '#include "{0}.hpp"\n'.format(class_name)
-        self.fluid_variables += '{0} <{1}, {2}> *{3};\n'.format(
-                class_name, self.C_dtype, neighbours, name)
-        self.parameters[name + '_type'] = interp_type
-        self.parameters[name + '_neighbours'] = neighbours
-        if interp_type == 'spline':
-            self.parameters[name + '_smoothness'] = smoothness
-            beta_name = 'beta_n{0}_m{1}'.format(neighbours, smoothness)
-        elif interp_type == 'Lagrange':
-            beta_name = 'beta_Lagrange_n{0}'.format(neighbours)
-        self.fluid_start += '{0} = new {1}<{2}, {3}>(fs, {4}, {5});\n'.format(
-                name,
-                class_name,
-                self.C_dtype,
-                neighbours,
-                beta_name,
-                field_name)
-        self.fluid_end += 'delete {0};\n'.format(name)
-        return None
-    def add_particles(
-            self,
-            integration_steps = 2,
-            kcut = None,
-            interpolator = 'field_interpolator',
-            frozen_particles = False,
-            acc_name = None,
-            class_name = 'particles'):
-        """Adds code for tracking a series of particle species, each
-        consisting of `nparticles` particles.
-
-        :type integration_steps: int, list of int
-        :type kcut: None (default), str, list of str
-        :type interpolator: str, list of str
-        :type frozen_particles: bool
-        :type acc_name: str
-
-        .. warning:: if not None, kcut must be a list of decreasing
-                     wavenumbers, since filtering is done sequentially
-                     on the same complex FFTW field.
-        """
-        if self.dtype == np.float32:
-            FFTW = 'fftwf'
-        elif self.dtype == np.float64:
-            FFTW = 'fftw'
-        s0 = self.particle_species
-        if type(integration_steps) == int:
-            integration_steps = [integration_steps]
-        if type(kcut) == str:
-            kcut = [kcut]
-        if type(interpolator) == str:
-            interpolator = [interpolator]
-        nspecies = max(len(integration_steps), len(interpolator))
-        if type(kcut) == list:
-            nspecies = max(nspecies, len(kcut))
-        if len(integration_steps) == 1:
-            integration_steps = [integration_steps[0] for s in range(nspecies)]
-        if len(interpolator) == 1:
-            interpolator = [interpolator[0] for s in range(nspecies)]
-        if type(kcut) == list:
-            if len(kcut) == 1:
-                kcut = [kcut[0] for s in range(nspecies)]
-        assert(len(integration_steps) == nspecies)
-        assert(len(interpolator) == nspecies)
-        if type(kcut) == list:
-            assert(len(kcut) == nspecies)
-        for s in range(nspecies):
-            neighbours = self.parameters[interpolator[s] + '_neighbours']
-            if type(kcut) == list:
-                self.parameters['tracers{0}_kcut'.format(s0 + s)] = kcut[s]
-            self.parameters['tracers{0}_interpolator'.format(s0 + s)] = interpolator[s]
-            self.parameters['tracers{0}_acc_on'.format(s0 + s)] = int(not type(acc_name) == type(None))
-            self.parameters['tracers{0}_integration_steps'.format(s0 + s)] = integration_steps[s]
-            self.file_datasets_grow += """
-                        //begincpp
-                        group = H5Gopen(particle_file, "/tracers{0}", H5P_DEFAULT);
-                        grow_particle_datasets(group, "", NULL, NULL);
-                        H5Gclose(group);
-                        //endcpp
-                        """.format(s0 + s)
-
-        #### code that outputs statistics
-        output_vel_acc = '{\n'
-        # array for putting sampled velocity in
-        # must compute velocity, just in case it was messed up by some
-        # other particle species before the stats
-        output_vel_acc += 'fs->compute_velocity(fs->cvorticity);\n'
-        if not type(kcut) == list:
-            output_vel_acc += 'fs->ift_velocity();\n'
-        if not type(acc_name) == type(None):
-            # array for putting sampled acceleration in
-            # must compute acceleration
-            output_vel_acc += 'fs->compute_Lagrangian_acceleration({0});\n'.format(acc_name)
-        for s in range(nspecies):
-            if type(kcut) == list:
-                output_vel_acc += 'fs->low_pass_Fourier(fs->cvelocity, 3, {0});\n'.format(kcut[s])
-                output_vel_acc += 'fs->ift_velocity();\n'
-            output_vel_acc += """
-                {0}->read_rFFTW(fs->rvelocity);
-                ps{1}->sample({0}, "velocity");
-                """.format(interpolator[s], s0 + s)
-            if not type(acc_name) == type(None):
-                output_vel_acc += """
-                    {0}->read_rFFTW({1});
-                    ps{2}->sample({0}, "acceleration");
-                    """.format(interpolator[s], acc_name, s0 + s)
-        output_vel_acc += '}\n'
-
-        #### initialize, stepping and finalize code
-        if not type(kcut) == list:
-            update_fields = ('fs->compute_velocity(fs->cvorticity);\n' +
-                             'fs->ift_velocity();\n')
-            self.particle_start += update_fields
-            self.particle_loop  += update_fields
-        else:
-            self.particle_loop += 'fs->compute_velocity(fs->cvorticity);\n'
-        self.particle_includes += '#include "{0}.hpp"\n'.format(class_name)
-        self.particle_stat_src += (
-                'if (ps0->iteration % niter_part == 0)\n' +
-                '{\n')
-        for s in range(nspecies):
-            neighbours = self.parameters[interpolator[s] + '_neighbours']
-            self.particle_start += 'sprintf(fname, "tracers{0}");\n'.format(s0 + s)
-            self.particle_end += ('ps{0}->write();\n' +
-                                  'delete ps{0};\n').format(s0 + s)
-            self.particle_variables += '{0}<VELOCITY_TRACER, {1}, {2}> *ps{3};\n'.format(
-                    class_name,
-                    self.C_dtype,
-                    neighbours,
-                    s0 + s)
-            self.particle_start += ('ps{0} = new {1}<VELOCITY_TRACER, {2}, {3}>(\n' +
-                                    'fname, particle_file, {4},\n' +
-                                    'niter_part, tracers{0}_integration_steps);\n').format(
-                                            s0 + s,
-                                            class_name,
-                                            self.C_dtype,
-                                            neighbours,
-                                            interpolator[s])
-            self.particle_start += ('ps{0}->dt = dt;\n' +
-                                    'ps{0}->iteration = iteration;\n' +
-                                    'ps{0}->read();\n').format(s0 + s)
-            if not frozen_particles:
-                if type(kcut) == list:
-                    update_field = ('fs->low_pass_Fourier(fs->cvelocity, 3, {0});\n'.format(kcut[s]) +
-                                    'fs->ift_velocity();\n')
-                    self.particle_loop += update_field
-                self.particle_loop += '{0}->read_rFFTW(fs->rvelocity);\n'.format(interpolator[s])
-                self.particle_loop += 'ps{0}->step();\n'.format(s0 + s)
-            self.particle_stat_src += 'ps{0}->write(false);\n'.format(s0 + s)
-        self.particle_stat_src += output_vel_acc
-        self.particle_stat_src += '}\n'
-        self.particle_species += nspecies
-        return None
-    def get_cache_file_name(self):
-        return os.path.join(self.work_dir, self.simname + '_cache.h5')
-    def get_cache_file(self):
-        return h5py.File(self.get_postprocess_file_name(), 'r')
-    def get_postprocess_file_name(self):
-        return self.get_cache_file_name()
-    def get_postprocess_file(self):
-        return h5py.File(self.get_postprocess_file_name(), 'r')
-    def compute_statistics(self, iter0 = 0, iter1 = None):
-        """Run basic postprocessing on raw data.
-        The energy spectrum :math:`E(t, k)` and the enstrophy spectrum
-        :math:`\\frac{1}{2}\omega^2(t, k)` are computed from the
-
-        .. math::
-
-            \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{u_i} \\hat{u_j}^*, \\hskip .5cm
-            \sum_{k \\leq \\|\\mathbf{k}\\| \\leq k+dk}\\hat{\omega_i} \\hat{\\omega_j}^*
-
-        tensors, and the enstrophy spectrum is also used to
-        compute the dissipation :math:`\\varepsilon(t)`.
-        These basic quantities are stored in a newly created HDF5 file,
-        ``simname_cache.h5``.
-        """
-        if len(list(self.statistics.keys())) > 0:
-            return None
-        self.read_parameters()
-        with self.get_data_file() as data_file:
-            if 'moments' not in data_file['statistics'].keys():
-                return None
-            iter0 = min((data_file['statistics/moments/velocity'].shape[0] *
-                         self.parameters['niter_stat']-1),
-                        iter0)
-            if type(iter1) == type(None):
-                iter1 = data_file['iteration'].value
-            else:
-                iter1 = min(data_file['iteration'].value, iter1)
-            ii0 = iter0 // self.parameters['niter_stat']
-            ii1 = iter1 // self.parameters['niter_stat']
-            self.statistics['kshell'] = data_file['kspace/kshell'].value
-            self.statistics['kM'] = data_file['kspace/kM'].value
-            self.statistics['dk'] = data_file['kspace/dk'].value
-            computation_needed = True
-            pp_file = h5py.File(self.get_postprocess_file_name(), 'a')
-            if 'ii0' in pp_file.keys():
-                computation_needed =  not (ii0 == pp_file['ii0'].value and
-                                           ii1 == pp_file['ii1'].value)
-                if computation_needed:
-                    for k in ['t', 'vel_max(t)', 'renergy(t)',
-                              'energy(t, k)', 'enstrophy(t, k)',
-                              'ii0', 'ii1', 'iter0', 'iter1']:
-                        del pp_file[k]
-            if computation_needed:
-                pp_file['iter0'] = iter0
-                pp_file['iter1'] = iter1
-                pp_file['ii0'] = ii0
-                pp_file['ii1'] = ii1
-                pp_file['t'] = (self.parameters['dt']*
-                                self.parameters['niter_stat']*
-                                (np.arange(ii0, ii1+1).astype(np.float)))
-                pp_file['energy(t, k)'] = (
-                    data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 0, 0] +
-                    data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 1, 1] +
-                    data_file['statistics/spectra/velocity_velocity'][ii0:ii1+1, :, 2, 2])/2
-                pp_file['enstrophy(t, k)'] = (
-                    data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 0, 0] +
-                    data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 1, 1] +
-                    data_file['statistics/spectra/vorticity_vorticity'][ii0:ii1+1, :, 2, 2])/2
-                pp_file['vel_max(t)'] = data_file['statistics/moments/velocity']  [ii0:ii1+1, 9, 3]
-                pp_file['renergy(t)'] = data_file['statistics/moments/velocity'][ii0:ii1+1, 2, 3]/2
-                if 'trS2_Q_R' in data_file['statistics/moments'].keys():
-                    pp_file['mean_trS2(t)'] = data_file['statistics/moments/trS2_Q_R'][:, 1, 0]
-            for k in ['t',
-                      'energy(t, k)',
-                      'enstrophy(t, k)',
-                      'vel_max(t)',
-                      'renergy(t)',
-                      'mean_trS2(t)']:
-                if k in pp_file.keys():
-                    self.statistics[k] = pp_file[k].value
-            self.compute_time_averages()
-        return None
-    def compute_time_averages(self):
-        """Compute easy stats.
-
-        Further computation of statistics based on the contents of
-        ``simname_cache.h5``.
-        Standard quantities are as follows
-        (consistent with [Ishihara]_):
-
-        .. math::
-
-            U_{\\textrm{int}}(t) = \\sqrt{\\frac{2E(t)}{3}}, \\hskip .5cm
-            L_{\\textrm{int}}(t) = \\frac{\pi}{2U_{int}^2(t)} \\int \\frac{dk}{k} E(t, k), \\hskip .5cm
-            T_{\\textrm{int}}(t) =
-            \\frac{L_{\\textrm{int}}(t)}{U_{\\textrm{int}}(t)}
-
-            \\eta_K = \\left(\\frac{\\nu^3}{\\varepsilon}\\right)^{1/4}, \\hskip .5cm
-            \\tau_K = \\left(\\frac{\\nu}{\\varepsilon}\\right)^{1/2}, \\hskip .5cm
-            \\lambda = \\sqrt{\\frac{15 \\nu U_{\\textrm{int}}^2}{\\varepsilon}}
-
-            Re = \\frac{U_{\\textrm{int}} L_{\\textrm{int}}}{\\nu}, \\hskip
-            .5cm
-            R_{\\lambda} = \\frac{U_{\\textrm{int}} \\lambda}{\\nu}
-
-        .. [Ishihara] T. Ishihara et al,
-                      *Small-scale statistics in high-resolution direct numerical
-                      simulation of turbulence: Reynolds number dependence of
-                      one-point velocity gradient statistics*.
-                      J. Fluid Mech.,
-                      **592**, 335-366, 2007
-        """
-        for key in ['energy', 'enstrophy']:
-            self.statistics[key + '(t)'] = (self.statistics['dk'] *
-                                            np.sum(self.statistics[key + '(t, k)'], axis = 1))
-        self.statistics['Uint(t)'] = np.sqrt(2*self.statistics['energy(t)'] / 3)
-        self.statistics['Lint(t)'] = ((self.statistics['dk']*np.pi /
-                                       (2*self.statistics['Uint(t)']**2)) *
-                                      np.nansum(self.statistics['energy(t, k)'] /
-                                                self.statistics['kshell'][None, :], axis = 1))
-        for key in ['energy',
-                    'enstrophy',
-                    'vel_max',
-                    'mean_trS2',
-                    'Uint',
-                    'Lint']:
-            if key + '(t)' in self.statistics.keys():
-                self.statistics[key] = np.average(self.statistics[key + '(t)'], axis = 0)
-        for suffix in ['', '(t)']:
-            self.statistics['diss'    + suffix] = (self.parameters['nu'] *
-                                                   self.statistics['enstrophy' + suffix]*2)
-            self.statistics['etaK'    + suffix] = (self.parameters['nu']**3 /
-                                                   self.statistics['diss' + suffix])**.25
-            self.statistics['tauK'    + suffix] =  (self.parameters['nu'] /
-                                                    self.statistics['diss' + suffix])**.5
-            self.statistics['Re' + suffix] = (self.statistics['Uint' + suffix] *
-                                              self.statistics['Lint' + suffix] /
-                                              self.parameters['nu'])
-            self.statistics['lambda' + suffix] = (15 * self.parameters['nu'] *
-                                                  self.statistics['Uint' + suffix]**2 /
-                                                  self.statistics['diss' + suffix])**.5
-            self.statistics['Rlambda' + suffix] = (self.statistics['Uint' + suffix] *
-                                                   self.statistics['lambda' + suffix] /
-                                                   self.parameters['nu'])
-            self.statistics['kMeta' + suffix] = (self.statistics['kM'] *
-                                                 self.statistics['etaK' + suffix])
-            if self.parameters['dealias_type'] == 1:
-                self.statistics['kMeta' + suffix] *= 0.8
-        self.statistics['Tint'] = self.statistics['Lint'] / self.statistics['Uint']
-        self.statistics['Taylor_microscale'] = self.statistics['lambda']
-        return None
-    def set_plt_style(
-            self,
-            style = {'dashes' : (None, None)}):
-        self.style.update(style)
-        return None
-    def read_cfield(
-            self,
-            field_name = 'vorticity',
-            iteration = 0):
-        """read the Fourier representation of a vector field.
-
-        Read the binary file containing iteration ``iteration`` of the
-        field ``field_name``, and return it as a properly shaped
-        ``numpy.memmap`` object.
-        """
-        return np.memmap(
-                os.path.join(self.work_dir,
-                             self.simname + '_{0}_i{1:0>5x}'.format('c' + field_name, iteration)),
-                dtype = self.ctype,
-                mode = 'r',
-                shape = (self.parameters['ny'],
-                         self.parameters['nz'],
-                         self.parameters['nx']//2+1,
-                         3))
-    def write_par(
-            self,
-            iter0 = 0,
-            particle_ic = None):
-        _fluid_particle_base.write_par(self, iter0 = iter0)
-        with h5py.File(self.get_data_file_name(), 'r+') as ofile:
-            kspace = self.get_kspace()
-            nshells = kspace['nshell'].shape[0]
-            vec_stat_datasets = ['velocity', 'vorticity']
-            scal_stat_datasets = []
-            for k in vec_stat_datasets:
-                time_chunk = 2**20 // (
-                        self.dtype.itemsize*3*
-                        self.parameters['nx']*self.parameters['ny'])
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/0slices/' + k + '/real',
-                                     (1, self.parameters['ny'], self.parameters['nx'], 3),
-                                     chunks = (time_chunk, self.parameters['ny'], self.parameters['nx'], 3),
-                                     maxshape = (None, self.parameters['ny'], self.parameters['nx'], 3),
-                                     dtype = self.dtype)
-            if self.Lag_acc_stats_on:
-                vec_stat_datasets += ['Lagrangian_acceleration']
-                scal_stat_datasets += ['pressure']
-            for k in vec_stat_datasets:
-                time_chunk = 2**20//(8*3*3*nshells)
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/spectra/' + k + '_' + k,
-                                     (1, nshells, 3, 3),
-                                     chunks = (time_chunk, nshells, 3, 3),
-                                     maxshape = (None, nshells, 3, 3),
-                                     dtype = np.float64)
-                time_chunk = 2**20//(8*4*10)
-                time_chunk = max(time_chunk, 1)
-                a = ofile.create_dataset('statistics/moments/' + k,
-                                     (1, 10, 4),
-                                     chunks = (time_chunk, 10, 4),
-                                     maxshape = (None, 10, 4),
-                                     dtype = np.float64)
-                time_chunk = 2**20//(8*4*self.parameters['histogram_bins'])
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/histograms/' + k,
-                                     (1,
-                                      self.parameters['histogram_bins'],
-                                      4),
-                                     chunks = (time_chunk,
-                                               self.parameters['histogram_bins'],
-                                               4),
-                                     maxshape = (None,
-                                                 self.parameters['histogram_bins'],
-                                                 4),
-                                     dtype = np.int64)
-            for k in scal_stat_datasets:
-                time_chunk = 2**20//(8*nshells)
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/spectra/' + k + '_' + k,
-                                     (1, nshells),
-                                     chunks = (time_chunk, nshells),
-                                     maxshape = (None, nshells),
-                                     dtype = np.float64)
-                time_chunk = 2**20//(8*10)
-                time_chunk = max(time_chunk, 1)
-                a = ofile.create_dataset('statistics/moments/' + k,
-                                     (1, 10),
-                                     chunks = (time_chunk, 10),
-                                     maxshape = (None, 10),
-                                     dtype = np.float64)
-                time_chunk = 2**20//(8*self.parameters['histogram_bins'])
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/histograms/' + k,
-                                     (1,
-                                      self.parameters['histogram_bins']),
-                                     chunks = (time_chunk,
-                                               self.parameters['histogram_bins']),
-                                     maxshape = (None,
-                                                 self.parameters['histogram_bins']),
-                                     dtype = np.int64)
-            if self.QR_stats_on:
-                time_chunk = 2**20//(8*3*self.parameters['histogram_bins'])
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/histograms/trS2_Q_R',
-                                     (1,
-                                      self.parameters['histogram_bins'],
-                                      3),
-                                     chunks = (time_chunk,
-                                               self.parameters['histogram_bins'],
-                                               3),
-                                     maxshape = (None,
-                                                 self.parameters['histogram_bins'],
-                                                 3),
-                                     dtype = np.int64)
-                time_chunk = 2**20//(8*9*self.parameters['histogram_bins'])
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/histograms/velocity_gradient',
-                                     (1,
-                                      self.parameters['histogram_bins'],
-                                      3,
-                                      3),
-                                     chunks = (time_chunk,
-                                               self.parameters['histogram_bins'],
-                                               3,
-                                               3),
-                                     maxshape = (None,
-                                                 self.parameters['histogram_bins'],
-                                                 3,
-                                                 3),
-                                     dtype = np.int64)
-                time_chunk = 2**20//(8*3*10)
-                time_chunk = max(time_chunk, 1)
-                a = ofile.create_dataset('statistics/moments/trS2_Q_R',
-                                     (1, 10, 3),
-                                     chunks = (time_chunk, 10, 3),
-                                     maxshape = (None, 10, 3),
-                                     dtype = np.float64)
-                time_chunk = 2**20//(8*9*10)
-                time_chunk = max(time_chunk, 1)
-                a = ofile.create_dataset('statistics/moments/velocity_gradient',
-                                     (1, 10, 3, 3),
-                                     chunks = (time_chunk, 10, 3, 3),
-                                     maxshape = (None, 10, 3, 3),
-                                     dtype = np.float64)
-                time_chunk = 2**20//(8*self.parameters['QR2D_histogram_bins']**2)
-                time_chunk = max(time_chunk, 1)
-                ofile.create_dataset('statistics/histograms/QR2D',
-                                     (1,
-                                      self.parameters['QR2D_histogram_bins'],
-                                      self.parameters['QR2D_histogram_bins']),
-                                     chunks = (time_chunk,
-                                               self.parameters['QR2D_histogram_bins'],
-                                               self.parameters['QR2D_histogram_bins']),
-                                     maxshape = (None,
-                                                 self.parameters['QR2D_histogram_bins'],
-                                                 self.parameters['QR2D_histogram_bins']),
-                                     dtype = np.int64)
-        if self.particle_species == 0:
-            return None
-
-        if type(particle_ic) == type(None):
-            pbase_shape = (self.parameters['nparticles'],)
-            number_of_particles = self.parameters['nparticles']
-        else:
-            pbase_shape = particle_ic.shape[:-1]
-            assert(particle_ic.shape[-1] == 3)
-            if len(pbase_shape) == 1:
-                number_of_particles = pbase_shape[0]
-            else:
-                number_of_particles = 1
-                for val in pbase_shape[1:]:
-                    number_of_particles *= val
-
-        with h5py.File(self.get_particle_file_name(), 'a') as ofile:
-            for s in range(self.particle_species):
-                ofile.create_group('tracers{0}'.format(s))
-                time_chunk = 2**20 // (8*3*number_of_particles)
-                time_chunk = max(time_chunk, 1)
-                dims = ((1,
-                         self.parameters['tracers{0}_integration_steps'.format(s)]) +
-                        pbase_shape + (3,))
-                maxshape = (h5py.h5s.UNLIMITED,) + dims[1:]
-                if len(pbase_shape) > 1:
-                    chunks = (time_chunk, 1, 1) + dims[3:]
-                else:
-                    chunks = (time_chunk, 1) + dims[2:]
-                bfps.tools.create_alloc_early_dataset(
-                        ofile,
-                        '/tracers{0}/rhs'.format(s),
-                        dims, maxshape, chunks)
-                if len(pbase_shape) > 1:
-                    chunks = (time_chunk, 1) + pbase_shape[1:] + (3,)
-                else:
-                    chunks = (time_chunk, pbase_shape[0], 3)
-                bfps.tools.create_alloc_early_dataset(
-                        ofile,
-                        '/tracers{0}/state'.format(s),
-                        (1,) + pbase_shape + (3,),
-                        (h5py.h5s.UNLIMITED,) + pbase_shape + (3,),
-                        chunks)
-                # "velocity" is sampled, single precision is enough
-                # for the results we are interested in.
-                bfps.tools.create_alloc_early_dataset(
-                        ofile,
-                        '/tracers{0}/velocity'.format(s),
-                        (1,) + pbase_shape + (3,),
-                        (h5py.h5s.UNLIMITED,) + pbase_shape + (3,),
-                        chunks,
-                        dset_dtype = h5py.h5t.IEEE_F32LE)
-                if self.parameters['tracers{0}_acc_on'.format(s)]:
-                    bfps.tools.create_alloc_early_dataset(
-                            ofile,
-                            '/tracers{0}/acceleration'.format(s),
-                            (1,) + pbase_shape + (3,),
-                            (h5py.h5s.UNLIMITED,) + pbase_shape + (3,),
-                            chunks,
-                            dset_dtype = h5py.h5t.IEEE_F32LE)
-        return None
-    def add_particle_fields(
-            self,
-            interp_type = 'spline',
-            kcut = None,
-            neighbours = 1,
-            smoothness = 1,
-            name = 'particle_field',
-            field_class = 'rFFTW_interpolator',
-            acc_field_name = 'rFFTW_acc'):
-        self.fluid_includes += '#include "{0}.hpp"\n'.format(field_class)
-        self.fluid_variables += field_class + '<{0}, {1}> *vel_{2}, *acc_{2};\n'.format(
-                self.C_dtype, neighbours, name)
-        self.parameters[name + '_type'] = interp_type
-        self.parameters[name + '_neighbours'] = neighbours
-        if interp_type == 'spline':
-            self.parameters[name + '_smoothness'] = smoothness
-            beta_name = 'beta_n{0}_m{1}'.format(neighbours, smoothness)
-        elif interp_type == 'Lagrange':
-            beta_name = 'beta_Lagrange_n{0}'.format(neighbours)
-        if field_class == 'rFFTW_interpolator':
-            self.fluid_start += ('vel_{0} = new {1}<{2}, {3}>(fs, {4}, fs->rvelocity);\n' +
-                                 'acc_{0} = new {1}<{2}, {3}>(fs, {4}, {5});\n').format(name,
-                                                                                   field_class,
-                                                                                   self.C_dtype,
-                                                                                   neighbours,
-                                                                                   beta_name,
-                                                                                   acc_field_name)
-        elif field_class == 'interpolator':
-            self.fluid_start += ('vel_{0} = new {1}<{2}, {3}>(fs, {4});\n' +
-                                 'acc_{0} = new {1}<{2}, {3}>(fs, {4});\n').format(name,
-                                                                                   field_class,
-                                                                                   self.C_dtype,
-                                                                                   neighbours,
-                                                                                   beta_name,
-                                                                                   acc_field_name)
-        self.fluid_end += ('delete vel_{0};\n' +
-                           'delete acc_{0};\n').format(name)
-        update_fields = 'fs->compute_velocity(fs->cvorticity);\n'
-        if not type(kcut) == type(None):
-            update_fields += 'fs->low_pass_Fourier(fs->cvelocity, 3, {0});\n'.format(kcut)
-        update_fields += ('fs->ift_velocity();\n' +
-                          'fs->compute_Lagrangian_acceleration(acc_{0}->field);\n').format(name)
-        self.fluid_start += update_fields
-        self.fluid_loop += update_fields
-        return None
-    def specific_parser_arguments(
-            self,
-            parser):
-        _fluid_particle_base.specific_parser_arguments(self, parser)
-        parser.add_argument(
-                '--src-wd',
-                type = str,
-                dest = 'src_work_dir',
-                default = '')
-        parser.add_argument(
-                '--src-simname',
-                type = str,
-                dest = 'src_simname',
-                default = '')
-        parser.add_argument(
-                '--src-iteration',
-                type = int,
-                dest = 'src_iteration',
-                default = 0)
-        parser.add_argument(
-               '--njobs',
-               type = int, dest = 'njobs',
-               default = 1)
-        parser.add_argument(
-               '--QR-stats',
-               action = 'store_true',
-               dest = 'QR_stats',
-               help = 'add this option if you want to compute velocity gradient and QR stats')
-        parser.add_argument(
-               '--Lag-acc-stats',
-               action = 'store_true',
-               dest = 'Lag_acc_stats',
-               help = 'add this option if you want to compute Lagrangian acceleration statistics')
-        parser.add_argument(
-               '--kMeta',
-               type = float,
-               dest = 'kMeta',
-               default = 2.0)
-        parser.add_argument(
-               '--dtfactor',
-               type = float,
-               dest = 'dtfactor',
-               default = 0.5,
-               help = 'dt is computed as DTFACTOR / N')
-        parser.add_argument(
-               '--particle-rand-seed',
-               type = int,
-               dest = 'particle_rand_seed',
-               default = None)
-        parser.add_argument(
-               '--pclouds',
-               type = int,
-               dest = 'pclouds',
-               default = 1,
-               help = ('number of particle clouds. Particle "clouds" '
-                       'consist of particles distributed according to '
-                       'pcloud-type.'))
-        parser.add_argument(
-                '--pcloud-type',
-                choices = ['random-cube',
-                           'regular-cube'],
-                dest = 'pcloud_type',
-                default = 'random-cube')
-        parser.add_argument(
-               '--particle-cloud-size',
-               type = float,
-               dest = 'particle_cloud_size',
-               default = 2*np.pi)
-        parser.add_argument(
-                '--neighbours',
-                type = int,
-                dest = 'neighbours',
-                default = 1)
-        parser.add_argument(
-                '--smoothness',
-                type = int,
-                dest = 'smoothness',
-                default = 1)
-        return None
-    def prepare_launch(
-            self,
-            args = []):
-        """Set up reasonable parameters.
-
-        With the default Lundgren forcing applied in the band [2, 4],
-        we can estimate the dissipation, therefore we can estimate
-        :math:`k_M \\eta_K` and constrain the viscosity.
-        Also, if velocity gradient statistics are computed, the
-        dissipation is used for estimating the bins of the QR histogram.
-
-        In brief, the command line parameter :math:`k_M \\eta_K` is
-        used in the following formula for :math:`\\nu` (:math:`N` is the
-        number of real space grid points per coordinate):
-
-        .. math::
-
-            \\nu = \\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/3}
-
-        With this choice, the average dissipation :math:`\\varepsilon`
-        will be close to 0.4, and the integral scale velocity will be
-        close to 0.77, yielding the approximate value for the Taylor
-        microscale and corresponding Reynolds number:
-
-        .. math::
-
-            \\lambda \\approx 4.75\\left(\\frac{2 k_M \\eta_K}{N} \\right)^{4/6}, \\hskip .5in
-            R_\\lambda \\approx 3.7 \\left(\\frac{N}{2 k_M \\eta_K} \\right)^{4/6}
-
-        """
-        opt = _code.prepare_launch(self, args = args)
-        self.QR_stats_on = opt.QR_stats
-        self.Lag_acc_stats_on = opt.Lag_acc_stats
-        self.parameters['nu'] = (opt.kMeta * 2 / opt.n)**(4./3)
-        self.parameters['dt'] = (opt.dtfactor / opt.n)
-        # custom famplitude for 288 and 576
-        if opt.n == 288:
-            self.parameters['famplitude'] = 0.45
-        elif opt.n == 576:
-            self.parameters['famplitude'] = 0.47
-        if ((self.parameters['niter_todo'] % self.parameters['niter_out']) != 0):
-            self.parameters['niter_out'] = self.parameters['niter_todo']
-        if self.QR_stats_on:
-            # max_Q_estimate and max_R_estimate are just used for the 2D pdf
-            # therefore I just want them to be small multiples of mean trS2
-            # I'm already estimating the dissipation with kMeta...
-            meantrS2 = (opt.n//2 / opt.kMeta)**4 * self.parameters['nu']**2
-            self.parameters['max_Q_estimate'] = meantrS2
-            self.parameters['max_R_estimate'] = .4*meantrS2**1.5
-            # add QR suffix to code name, since we now expect additional
-            # datasets in the .h5 file
-            self.name += '-QR'
-        if self.Lag_acc_stats_on:
-            self.name += '-Lag_acc'
-        if len(opt.src_work_dir) == 0:
-            opt.src_work_dir = os.path.realpath(opt.work_dir)
-        self.pars_from_namespace(opt)
-        return opt
-    def launch(
-            self,
-            args = [],
-            noparticles = False,
-            **kwargs):
-        opt = self.prepare_launch(args = args)
-        self.fill_up_fluid_code()
-        if noparticles:
-            opt.nparticles = 0
-        elif type(opt.nparticles) == int:
-            if opt.nparticles > 0:
-                self.name += '-particles'
-                self.add_3D_rFFTW_field(
-                        name = 'rFFTW_acc')
-                self.add_interpolator(
-                        name = 'cubic_spline',
-                        neighbours = opt.neighbours,
-                        smoothness = opt.smoothness,
-                        class_name = 'rFFTW_interpolator')
-                self.add_particles(
-                        integration_steps = [4],
-                        interpolator = 'cubic_spline',
-                        acc_name = 'rFFTW_acc',
-                        class_name = 'rFFTW_distributed_particles')
-                self.variables += 'hid_t particle_file;\n'
-                self.main_start += """
-                    if (myrank == 0)
-                    {
-                        // set caching parameters
-                        hid_t fapl = H5Pcreate(H5P_FILE_ACCESS);
-                        herr_t cache_err = H5Pset_cache(fapl, 0, 521, 134217728, 1.0);
-                        DEBUG_MSG("when setting cache for particles I got %d\\n", cache_err);
-                        sprintf(fname, "%s_particles.h5", simname);
-                        particle_file = H5Fopen(fname, H5F_ACC_RDWR, fapl);
-                    }
-                    """
-                self.main_end = ('if (myrank == 0)\n' +
-                                 '{\n' +
-                                 'H5Fclose(particle_file);\n' +
-                                 '}\n') + self.main_end
-        self.finalize_code()
-        self.launch_jobs(opt = opt, **kwargs)
-        return None
-    def launch_jobs(
-            self,
-            opt = None,
-            particle_initial_condition = None):
-        if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')):
-            if opt.pclouds > 1:
-                np.random.seed(opt.particle_rand_seed)
-                if opt.pcloud_type == 'random-cube':
-                    particle_initial_condition = (
-                        np.random.random((opt.pclouds, 1, 3))*2*np.pi +
-                        np.random.random((1, self.parameters['nparticles'], 3))*opt.particle_cloud_size)
-                elif opt.pcloud_type == 'regular-cube':
-                    onedarray = np.linspace(
-                            -opt.particle_cloud_size/2,
-                            opt.particle_cloud_size/2,
-                            self.parameters['nparticles'])
-                    particle_initial_condition = np.zeros(
-                            (opt.pclouds,
-                             self.parameters['nparticles'],
-                             self.parameters['nparticles'],
-                             self.parameters['nparticles'], 3),
-                            dtype = np.float64)
-                    particle_initial_condition[:] = \
-                        np.random.random((opt.pclouds, 1, 1, 1, 3))*2*np.pi
-                    particle_initial_condition[..., 0] += onedarray[None, None, None, :]
-                    particle_initial_condition[..., 1] += onedarray[None, None, :, None]
-                    particle_initial_condition[..., 2] += onedarray[None, :, None, None]
-            self.write_par(
-                    particle_ic = particle_initial_condition)
-            if self.parameters['nparticles'] > 0:
-                data = self.generate_tracer_state(
-                        species = 0,
-                        rseed = opt.particle_rand_seed,
-                        data = particle_initial_condition)
-                for s in range(1, self.particle_species):
-                    self.generate_tracer_state(species = s, data = data)
-            init_condition_file = os.path.join(
-                    self.work_dir,
-                    self.simname + '_cvorticity_i{0:0>5x}'.format(0))
-            if not os.path.exists(init_condition_file):
-                if len(opt.src_simname) > 0:
-                    src_file = os.path.join(
-                            os.path.realpath(opt.src_work_dir),
-                            opt.src_simname + '_cvorticity_i{0:0>5x}'.format(opt.src_iteration))
-                    os.symlink(src_file, init_condition_file)
-                else:
-                   self.generate_vector_field(
-                           write_to_file = True,
-                           spectra_slope = 2.0,
-                           amplitude = 0.05)
-        self.run(
-                nb_processes = opt.nb_processes,
-                nb_threads_per_process = opt.nb_threads_per_process,
-                njobs = opt.njobs,
-                hours = opt.minutes // 60,
-                minutes = opt.minutes % 60,
-                no_submit = opt.no_submit)
-        return None
-
diff --git a/bfps/PP.py b/bfps/PP.py
index 6e02f2aefd5db2e9790f3a16cbc2bfa3c85ab37b..914b90ef9383d986a27d22bab11d2821983631f6 100644
--- a/bfps/PP.py
+++ b/bfps/PP.py
@@ -1,26 +1,25 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
+################################################################################
+#                                                                              #
+#  Copyright 2015-2019 Max Planck Institute for Dynamics and Self-Organization #
+#                                                                              #
+#  This file is part of bfps.                                                  #
+#                                                                              #
+#  bfps is free software: you can redistribute it and/or modify                #
+#  it under the terms of the GNU General Public License as published           #
+#  by the Free Software Foundation, either version 3 of the License,           #
+#  or (at your option) any later version.                                      #
+#                                                                              #
+#  bfps is distributed in the hope that it will be useful,                     #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+#  GNU General Public License for more details.                                #
+#                                                                              #
+#  You should have received a copy of the GNU General Public License           #
+#  along with bfps.  If not, see <http://www.gnu.org/licenses/>                #
+#                                                                              #
+# Contact: Cristian.Lalescu@ds.mpg.de                                          #
+#                                                                              #
+################################################################################
 
 
 
@@ -33,6 +32,7 @@ import h5py
 import math
 import numpy as np
 import warnings
+import glob
 
 import bfps
 from ._code import _code
@@ -50,12 +50,6 @@ class PP(_code):
                 self,
                 work_dir = work_dir,
                 simname = simname)
-        self.host_info = {'type'        : 'cluster',
-                          'environment' : None,
-                          'deltanprocs' : 1,
-                          'queue'       : '',
-                          'mail_address': '',
-                          'mail_events' : None}
         self.generate_default_parameters()
         return None
     def set_precision(
@@ -118,6 +112,7 @@ class PP(_code):
         return None
     def generate_default_parameters(self):
         # these parameters are relevant for all PP classes
+        self.parameters['fftw_plan_rigor'] = 'FFTW_ESTIMATE'
         self.parameters['dealias_type'] = int(1)
         self.parameters['dkx'] = float(1.0)
         self.parameters['dky'] = float(1.0)
@@ -139,6 +134,11 @@ class PP(_code):
             pars['max_acceleration_estimate'] = float(10)
             pars['max_velocity_estimate'] = float(1)
             pars['histogram_bins'] = int(129)
+        elif dns_type == 'resize':
+            pars['new_nx'] = int(32)
+            pars['new_ny'] = int(32)
+            pars['new_nz'] = int(32)
+            pars['new_simname'] = 'test_resized'
         return pars
     def get_data_file_name(self):
         return os.path.join(self.work_dir, self.simname + '.h5')
@@ -429,6 +429,12 @@ class PP(_code):
         self.simulation_parser_arguments(parser_native_binary_to_hdf5)
         self.job_parser_arguments(parser_native_binary_to_hdf5)
         self.parameters_to_parser_arguments(parser_native_binary_to_hdf5)
+        parser_field_single_to_double = subparsers.add_parser(
+                'field_single_to_double',
+                help = 'convert complex vorticity from single to double')
+        self.simulation_parser_arguments(parser_field_single_to_double)
+        self.job_parser_arguments(parser_field_single_to_double)
+        self.parameters_to_parser_arguments(parser_field_single_to_double)
         parser_get_rfields = subparsers.add_parser(
                 'get_rfields',
                 help = 'get real space velocity field')
@@ -444,6 +450,15 @@ class PP(_code):
         self.parameters_to_parser_arguments(
                 parser_joint_acc_vel_stats,
                 parameters = self.extra_postprocessing_parameters('joint_acc_vel_stats'))
+        parser_resize = subparsers.add_parser(
+                'resize',
+                help = 'get joint acceleration and velocity statistics')
+        self.simulation_parser_arguments(parser_resize)
+        self.job_parser_arguments(parser_resize)
+        self.parameters_to_parser_arguments(parser_resize)
+        self.parameters_to_parser_arguments(
+                parser_resize,
+                parameters = self.extra_postprocessing_parameters('resize'))
         return None
     def prepare_launch(
             self,
@@ -626,7 +641,6 @@ class PP(_code):
                 for kz in range(src_file[src_dset_name].shape[0]):
                     dst_file[dst_dset_name][kz] = src_file[src_dset_name][kz]
         else:
-            print('aloha')
             min_shape = (min(dst_shape[0], src_file[src_dset_name].shape[0]),
                          min(dst_shape[1], src_file[src_dset_name].shape[1]),
                          min(dst_shape[2], src_file[src_dset_name].shape[2]),
@@ -674,10 +688,11 @@ class PP(_code):
                 group = self.dns_type + '/parameters',
                 parameters = self.pp_parameters,
                 file_name = os.path.join(self.work_dir, self.simname + '_post.h5'))
-        histogram_bins = opt.histogram_bins
-        if (type(histogram_bins) == type(None) and
-            'histogram_bins' in self.pp_parameters.keys()):
-            histogram_bins = self.pp_parameters['histogram_bins']
+        if 'histogram_bins' in opt.__dict__.keys():
+            histogram_bins = opt.histogram_bins
+            if (type(histogram_bins) == type(None) and
+                'histogram_bins' in self.pp_parameters.keys()):
+                histogram_bins = self.pp_parameters['histogram_bins']
         with h5py.File(os.path.join(self.work_dir, self.simname + '_post.h5'), 'r+') as ofile:
             group = ofile[self.dns_type]
             group.require_group('histograms')
@@ -772,7 +787,7 @@ class PP(_code):
                             dtype = np.float64)
                 df.close()
         return None
-    def prepare_field_file(self):
+    def prepare_field_file(self, iter0 = 0):
         df = self.get_data_file()
         if 'field_dtype' in df.keys():
             # we don't need to do anything, raw binary files are used
@@ -783,28 +798,22 @@ class PP(_code):
         with h5py.File(os.path.join(self.work_dir, self.simname + '_fields.h5'), 'a') as ff:
             ff.require_group('vorticity')
             ff.require_group('vorticity/complex')
-            checkpoint = 0
-            while True:
-                cpf_name = os.path.join(
-                        self.work_dir,
-                        self.simname + '_checkpoint_{0}.h5'.format(checkpoint))
-                if os.path.exists(cpf_name):
-                    cpf = h5py.File(cpf_name, 'r')
-                    for iter_name in cpf['vorticity/complex'].keys():
-                        if iter_name not in ff['vorticity/complex'].keys():
-                            ff['vorticity/complex/' + iter_name] = h5py.ExternalLink(
-                                    cpf_name,
-                                    'vorticity/complex/' + iter_name)
-                    checkpoint += 1
-                else:
-                    break
+            checkpoint_file_list = glob.glob(self.simname + '_checkpoint_*.h5')
+            for cpf_name in checkpoint_file_list:
+                cpf = h5py.File(cpf_name, 'r')
+                for iter_name in cpf['vorticity/complex'].keys():
+                    if iter_name not in ff['vorticity/complex'].keys():
+                        ff['vorticity/complex/' + iter_name] = h5py.ExternalLink(
+                                cpf_name,
+                                'vorticity/complex/' + iter_name)
+                cpf.close()
         return None
     def launch_jobs(
             self,
             opt = None,
             particle_initial_condition = None):
         self.prepare_post_file(opt)
-        self.prepare_field_file()
+        self.prepare_field_file(iter0 = opt.iter0)
         self.run(
                 nb_processes = opt.nb_processes,
                 nb_threads_per_process = opt.nb_threads_per_process,
diff --git a/bfps/TEST.py b/bfps/TEST.py
index 5f5734030344f15c7b23d7849fede80105e11fc6..2edcdfe46a8cf47360ac5a7dae28b72d1e81978d 100644
--- a/bfps/TEST.py
+++ b/bfps/TEST.py
@@ -1,26 +1,25 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
+################################################################################
+#                                                                              #
+#  Copyright 2015-2019 Max Planck Institute for Dynamics and Self-Organization #
+#                                                                              #
+#  This file is part of bfps.                                                  #
+#                                                                              #
+#  bfps is free software: you can redistribute it and/or modify                #
+#  it under the terms of the GNU General Public License as published           #
+#  by the Free Software Foundation, either version 3 of the License,           #
+#  or (at your option) any later version.                                      #
+#                                                                              #
+#  bfps is distributed in the hope that it will be useful,                     #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+#  GNU General Public License for more details.                                #
+#                                                                              #
+#  You should have received a copy of the GNU General Public License           #
+#  along with bfps.  If not, see <http://www.gnu.org/licenses/>                #
+#                                                                              #
+# Contact: Cristian.Lalescu@ds.mpg.de                                          #
+#                                                                              #
+################################################################################
 
 
 
@@ -37,6 +36,7 @@ import warnings
 import bfps
 from ._code import _code
 from bfps import tools
+from bfps import DNS
 
 class TEST(_code):
     """This class is meant to stitch together the C++ code into a final source file,
@@ -50,12 +50,6 @@ class TEST(_code):
                 self,
                 work_dir = work_dir,
                 simname = simname)
-        self.host_info = {'type'        : 'cluster',
-                          'environment' : None,
-                          'deltanprocs' : 1,
-                          'queue'       : '',
-                          'mail_address': '',
-                          'mail_events' : None}
         self.generate_default_parameters()
         return None
     def set_precision(
@@ -118,12 +112,24 @@ class TEST(_code):
         return None
     def generate_default_parameters(self):
         # these parameters are relevant for all TEST classes
+        self.parameters['fftw_plan_rigor'] = 'FFTW_ESTIMATE'
         self.parameters['dealias_type'] = int(1)
         self.parameters['dkx'] = float(1.0)
         self.parameters['dky'] = float(1.0)
         self.parameters['dkz'] = float(1.0)
         self.parameters['filter_length'] = float(1.0)
+        self.parameters['random_seed'] = int(1)
         return None
+    def generate_extra_parameters(
+            self,
+            dns_type = None):
+        pars = {}
+        if dns_type == 'test_interpolation':
+            pars['nparticles'] = 3
+            pars['tracers0_integration_steps'] = int(4)
+            pars['tracers0_neighbours'] = int(1)
+            pars['tracers0_smoothness'] = int(1)
+        return pars
     def get_kspace(self):
         kspace = {}
         if self.parameters['dealias_type'] == 1:
@@ -254,9 +260,28 @@ class TEST(_code):
         parser_filter_test = subparsers.add_parser(
                 'filter_test',
                 help = 'plain filter test')
-        self.simulation_parser_arguments(parser_filter_test)
-        self.job_parser_arguments(parser_filter_test)
-        self.parameters_to_parser_arguments(parser_filter_test)
+        parser_field_test = subparsers.add_parser(
+                'field_test',
+                help = 'plain field test')
+        parser_symmetrize_test = subparsers.add_parser(
+                'symmetrize_test',
+                help = 'plain symmetrize test')
+        parser_field_output_test = subparsers.add_parser(
+                'field_output_test',
+                help = 'plain field output test')
+        parser_test_interpolation = subparsers.add_parser(
+                'test_interpolation',
+                help = 'test velocity gradient interpolation')
+        for parser in ['parser_filter_test',
+                       'parser_field_test',
+                       'parser_symmetrize_test',
+                       'parser_field_output_test',
+                       'parser_test_interpolation']:
+            eval('self.simulation_parser_arguments(' + parser + ')')
+            eval('self.job_parser_arguments(' + parser + ')')
+            eval('self.parameters_to_parser_arguments(' + parser + ')')
+            eval('self.parameters_to_parser_arguments(' + parser + ',' +
+                    'parameters = self.generate_extra_parameters(dns_type = \'' + parser + '\'))')
         return None
     def prepare_launch(
             self,
@@ -273,6 +298,8 @@ class TEST(_code):
             args = [],
             **kwargs):
         opt = self.prepare_launch(args = args)
+        self.parameters.update(
+                self.generate_extra_parameters(dns_type = self.dns_type))
         self.launch_jobs(opt = opt, **kwargs)
         return None
     def launch_jobs(
@@ -281,7 +308,62 @@ class TEST(_code):
             particle_initial_condition = None):
         if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')):
             self.write_par(
-                    particle_ic = None)
+                    particle_ic = particle_initial_condition)
+            if self.dns_type == 'test_interpolation':
+                if type(particle_initial_condition) == type(None):
+                    pbase_shape = (self.parameters['nparticles'],)
+                    number_of_particles = self.parameters['nparticles']
+                else:
+                    pbase_shape = particle_initial_condition.shape[:-1]
+                    assert(particle_initial_condition.shape[-1] == 3)
+                    number_of_particles = 1
+                    for val in pbase_shape[1:]:
+                        number_of_particles *= val
+                ncomponents = 3
+                with h5py.File(os.path.join(self.work_dir, self.simname + '_input.h5'), 'a') as ofile:
+                    s = 0
+                    ofile.create_group('tracers{0}'.format(s))
+                    ofile.create_group('tracers{0}/rhs'.format(s))
+                    ofile.create_group('tracers{0}/state'.format(s))
+                    ofile['tracers{0}/rhs'.format(s)].create_dataset(
+                            '0',
+                            shape = (
+                                (self.parameters['tracers{0}_integration_steps'.format(s)],) +
+                                pbase_shape +
+                                (ncomponents,)),
+                            dtype = np.float)
+                    ofile['tracers{0}/state'.format(s)].create_dataset(
+                            '0',
+                            shape = (
+                                pbase_shape +
+                                (ncomponents,)),
+                            dtype = np.float)
+                    if type(particle_initial_condition) == type(None):
+                        ofile['tracers0/state/0'][:] = np.random.random(pbase_shape + (ncomponents,))*2*np.pi
+                    else:
+                        ofile['tracers0/state/0'][:] = particle_initial_condition
+                with h5py.File(os.path.join(self.work_dir, self.simname + '_input.h5'), 'a') as ofile:
+                    data = DNS.generate_vector_field(self,
+                           write_to_file = False,
+                           spectra_slope = 1.0,
+                           amplitude = 0.05)
+                    #data[:] = 0.0
+                    ## ABC
+                    #data[0, 0, 1, 1] = -0.5*(1j)
+                    #data[0, 0, 1, 2] =  0.5*(1j)
+                    #data[0, 1, 0, 0]                         = -0.5*(1j)
+                    #data[0, self.parameters['nz'] - 1, 0, 0] =  0.5*(1j)
+                    #data[0, 1, 0, 1]                         =  0.5
+                    #data[0, self.parameters['nz'] - 1, 0, 1] =  0.5
+                    #data[1, 0, 0, 0]                         =  0.5
+                    #data[self.parameters['ny'] - 1, 0, 0, 0] =  0.5
+                    #data[1, 0, 0, 2]                         = -0.5*(1j)
+                    #data[self.parameters['ny'] - 1, 0, 0, 2] =  0.5*(1j)
+                    ofile['vorticity/complex/{0}'.format(0)] = data
+                with h5py.File(os.path.join(self.work_dir, self.simname + '_output.h5'), 'a') as ofile:
+                    ofile.require_group('tracers0')
+                    for kk in ['position', 'velocity', 'vorticity', 'velocity_gradient']:
+                        ofile['tracers0'].require_group(kk)
         self.run(
                 nb_processes = opt.nb_processes,
                 nb_threads_per_process = opt.nb_threads_per_process,
diff --git a/bfps/__init__.py b/bfps/__init__.py
index 6c220e69d877670206e411c5a0f1f1ae78c04d33..9595bee4d6885aaa4be4cfc252f605be835e7e64 100644
--- a/bfps/__init__.py
+++ b/bfps/__init__.py
@@ -1,26 +1,25 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
+################################################################################
+#                                                                              #
+#  Copyright 2015-2019 Max Planck Institute for Dynamics and Self-Organization #
+#                                                                              #
+#  This file is part of bfps.                                                  #
+#                                                                              #
+#  bfps is free software: you can redistribute it and/or modify                #
+#  it under the terms of the GNU General Public License as published           #
+#  by the Free Software Foundation, either version 3 of the License,           #
+#  or (at your option) any later version.                                      #
+#                                                                              #
+#  bfps is distributed in the hope that it will be useful,                     #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+#  GNU General Public License for more details.                                #
+#                                                                              #
+#  You should have received a copy of the GNU General Public License           #
+#  along with bfps.  If not, see <http://www.gnu.org/licenses/>                #
+#                                                                              #
+# Contact: Cristian.Lalescu@ds.mpg.de                                          #
+#                                                                              #
+################################################################################
 
 
 
@@ -38,18 +37,11 @@ here = os.path.normcase(__file__)
 header_dir = os.path.join(os.path.join(dist_loc, 'bfps'), 'cpp')
 lib_dir = os.path.join(dist_loc, 'bfps')
 
-install_info = pickle.load(
-        open(os.path.join(os.path.dirname(here), 'install_info.pickle'), 'rb'))
-
 homefolder = os.path.expanduser('~')
-bfpsfolder = os.path.join(homefolder, '.config/', 'bfps')
-sys.path.append(bfpsfolder)
-from host_information import host_info
+from .host_info import host_info
 
 from .DNS import DNS
-from .FluidConvert import FluidConvert
-from .FluidResize import FluidResize
-from .NavierStokes import NavierStokes
-from .NSVorticityEquation import NSVorticityEquation
+from .PP import PP
+from .TEST import TEST
 
 #import test
diff --git a/bfps/__main__.py b/bfps/__main__.py
index c41a6ffb67f91983f7969f40bc048a2e36e23afe..187171d049580498c82ea8551fa5728515c69845 100644
--- a/bfps/__main__.py
+++ b/bfps/__main__.py
@@ -1,26 +1,25 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
+################################################################################
+#                                                                              #
+#  Copyright 2015-2019 Max Planck Institute for Dynamics and Self-Organization #
+#                                                                              #
+#  This file is part of bfps.                                                  #
+#                                                                              #
+#  bfps is free software: you can redistribute it and/or modify                #
+#  it under the terms of the GNU General Public License as published           #
+#  by the Free Software Foundation, either version 3 of the License,           #
+#  or (at your option) any later version.                                      #
+#                                                                              #
+#  bfps is distributed in the hope that it will be useful,                     #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+#  GNU General Public License for more details.                                #
+#                                                                              #
+#  You should have received a copy of the GNU General Public License           #
+#  along with bfps.  If not, see <http://www.gnu.org/licenses/>                #
+#                                                                              #
+# Contact: Cristian.Lalescu@ds.mpg.de                                          #
+#                                                                              #
+################################################################################
 
 
 
@@ -31,48 +30,16 @@ import bfps
 from .DNS import DNS
 from .PP import PP
 from .TEST import TEST
-from .NavierStokes import NavierStokes
-from .NSVorticityEquation import NSVorticityEquation
-from .FluidResize import FluidResize
-from .FluidConvert import FluidConvert
-from .NSManyParticles import NSManyParticles
 
 def main():
-    parser = argparse.ArgumentParser(prog = 'bfps')
+    parser = argparse.ArgumentParser(prog = 'bfps', conflict_handler = 'resolve')
     parser.add_argument(
             '-v', '--version',
             action = 'version',
             version = '%(prog)s ' + bfps.__version__)
-    NSoptions = ['NavierStokes',
-                 'NavierStokes-single',
-                 'NavierStokes-double',
-                 'NS',
-                 'NS-single',
-                 'NS-double']
-    NSVEoptions = ['NSVorticityEquation',
-                 'NSVorticityEquation-single',
-                 'NSVorticityEquation-double',
-                 'NSVE',
-                 'NSVE-single',
-                 'NSVE-double']
-    FRoptions = ['FluidResize',
-                 'FluidResize-single',
-                 'FluidResize-double',
-                 'FR',
-                 'FR-single',
-                 'FR-double']
-    FCoptions = ['FluidConvert']
-    NSMPopt = ['NSManyParticles',
-               'NSManyParticles-single',
-               'NSManyParticles-double']
     parser.add_argument(
             'base_class',
-            choices = ['DNS', 'PP', 'TEST'] +
-                      NSoptions +
-                      NSVEoptions +
-                      FRoptions +
-                      FCoptions +
-                      NSMPopt,
+            choices = ['DNS', 'PP', 'TEST'],
             type = str)
     # first option is the choice of base class or -h or -v
     # all other options are passed on to the base_class instance
@@ -81,31 +48,10 @@ def main():
     # cannot be executed by mistake.
     if opt.base_class == 'DNS':
         c = DNS()
-        c.launch(args = sys.argv[2:])
-        return None
     if opt.base_class == 'PP':
         c = PP()
-        c.launch(args = sys.argv[2:])
-        return None
     if opt.base_class == 'TEST':
         c = TEST()
-        c.launch(args = sys.argv[2:])
-        return None
-    if 'double' in opt.base_class:
-        precision = 'double'
-    else:
-        precision = 'single'
-    if opt.base_class in NSoptions:
-        base_class = NavierStokes
-    if opt.base_class in NSVEoptions:
-        base_class = NSVorticityEquation
-    elif opt.base_class in FRoptions:
-        base_class = FluidResize
-    elif opt.base_class in FCoptions:
-        base_class = FluidConvert
-    elif opt.base_class in NSMPopt:
-        base_class = NSManyParticles
-    c = base_class(fluid_precision = precision)
     c.launch(args = sys.argv[2:])
     return None
 
diff --git a/bfps/_base.py b/bfps/_base.py
index 037261d3f1c6ea7af7fc58b79484ed461f84a28b..15a3c7a22cc783c2f471d96f01a30b4a379cd0dc 100644
--- a/bfps/_base.py
+++ b/bfps/_base.py
@@ -1,26 +1,25 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
+################################################################################
+#                                                                              #
+#  Copyright 2015-2019 Max Planck Institute for Dynamics and Self-Organization #
+#                                                                              #
+#  This file is part of bfps.                                                  #
+#                                                                              #
+#  bfps is free software: you can redistribute it and/or modify                #
+#  it under the terms of the GNU General Public License as published           #
+#  by the Free Software Foundation, either version 3 of the License,           #
+#  or (at your option) any later version.                                      #
+#                                                                              #
+#  bfps is distributed in the hope that it will be useful,                     #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+#  GNU General Public License for more details.                                #
+#                                                                              #
+#  You should have received a copy of the GNU General Public License           #
+#  along with bfps.  If not, see <http://www.gnu.org/licenses/>                #
+#                                                                              #
+# Contact: Cristian.Lalescu@ds.mpg.de                                          #
+#                                                                              #
+################################################################################
 
 
 
@@ -28,7 +27,6 @@ import os
 import sys
 import numpy as np
 import h5py
-from bfps import install_info
 from bfps import __version__
 
 class _base(object):
@@ -97,7 +95,7 @@ class _base(object):
                     'char fname[256];\n' +
                     'hsize_t dims[1];\n' +
                     'char *string_data;\n' +
-                    'sprintf(fname, "%s.h5", {0});\n'.format(simname_variable) +
+                    'snprintf(fname, 255, "%s.h5", {0});\n'.format(simname_variable) +
                     'parameter_file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT);\n')
         key_prefix = ''
         if prepend_this:
@@ -106,18 +104,24 @@ class _base(object):
             src_txt += 'dset = H5Dopen(parameter_file, "/{0}/{1}", H5P_DEFAULT);\n'.format(
                     file_group, key[i])
             if (type(parameters[key[i]]) == int and parameters[key[i]] >= 1<<30):
-                src_txt += 'H5Dread(dset, H5T_NATIVE_LLONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n'.format(key_prefix + key[i])
+                src_txt += ('if (dset > 0) H5Dread(dset, H5T_NATIVE_LLONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n'
+                          + 'else {0} = 0;\n').format(key_prefix + key[i])
             elif type(parameters[key[i]]) == int:
-                src_txt += 'H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n'.format(key_prefix + key[i])
+                src_txt += ('if (dset > 0) H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n'
+                          + 'else {0} = 0;\n').format(key_prefix + key[i])
             elif type(parameters[key[i]]) == str:
-                src_txt += ('space = H5Dget_space(dset);\n' +
+                src_txt += ('if (dset > 0)\n' +
+                            '{\n'
+                            'space = H5Dget_space(dset);\n' +
                             'memtype = H5Dget_type(dset);\n' +
                             'string_data = (char*)malloc(256);\n' +
                             'H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data);\n' +
-                            'sprintf({0}, "%s", string_data);\n'.format(key_prefix + key[i]) +
+                            'snprintf({0}, 255, "%s", string_data);\n'.format(key_prefix + key[i]) +
                             'free(string_data);\n'
                             'H5Sclose(space);\n' +
-                            'H5Tclose(memtype);\n')
+                            'H5Tclose(memtype);\n' +
+                            '}\n' +
+                            'else printf({0}, "NULL");\n'.format(key_prefix + key[i]))
             elif type(parameters[key[i]]) == np.ndarray:
                 if parameters[key[i]].dtype in [np.int, np.int64, np.int32]:
                     template_par = 'int'
@@ -126,10 +130,11 @@ class _base(object):
                 src_txt += '{0} = hdf5_tools::read_vector<{1}>(parameter_file, "/{2}/{0}");\n'.format(
                         key_prefix + key[i], template_par, file_group)
             else:
-                src_txt += 'H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n'.format(key_prefix + key[i])
+                src_txt += ('if (dset > 0) H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n' +
+                            'else {0} = 0.0;\n').format(key_prefix + key[i])
             src_txt += 'H5Dclose(dset);\n'
         src_txt += 'H5Fclose(parameter_file);\n'
-        src_txt += 'return 0;\n}\n' # finishing read_parameters
+        src_txt += 'return EXIT_SUCCESS;\n}\n' # finishing read_parameters
         return src_txt
     def cprint_pars(self):
         key = sorted(list(self.parameters.keys()))
@@ -168,8 +173,7 @@ class _base(object):
                 ofile['parameters/' + k] = self.parameters[k]
         ofile['iteration'] = int(iter0)
         ofile['bfps_info/solver_class'] = type(self).__name__
-        for k in install_info.keys():
-            ofile['bfps_info/' + k] = str(install_info[k])
+        ofile['bfps_info/VERSION'] = __version__
         ofile.close()
         return None
     def rewrite_par(
@@ -217,8 +221,10 @@ class _base(object):
                     ofile[group + '/' + k][...] = parameters[k]
         ofile.close()
         return None
-    def read_parameters(self):
-        with h5py.File(os.path.join(self.work_dir, self.simname + '.h5'), 'r') as data_file:
+    def read_parameters(self, fname = None):
+        if type(fname) == type(None):
+            fname = os.path.join(self.work_dir, self.simname + '.h5')
+        with h5py.File(fname, 'r') as data_file:
             for k in data_file['parameters'].keys():
                 if k in self.parameters.keys():
                     if type(self.parameters[k]) in [int, str, float]:
diff --git a/bfps/_code.py b/bfps/_code.py
index 22bcd9101ff6591e00f0455c1de1af2698c5f842..250be61daf1c89cddb9e461748469fd636796eff 100644
--- a/bfps/_code.py
+++ b/bfps/_code.py
@@ -1,26 +1,25 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
+################################################################################
+#                                                                              #
+#  Copyright 2015-2019 Max Planck Institute for Dynamics and Self-Organization #
+#                                                                              #
+#  This file is part of bfps.                                                  #
+#                                                                              #
+#  bfps is free software: you can redistribute it and/or modify                #
+#  it under the terms of the GNU General Public License as published           #
+#  by the Free Software Foundation, either version 3 of the License,           #
+#  or (at your option) any later version.                                      #
+#                                                                              #
+#  bfps is distributed in the hope that it will be useful,                     #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+#  GNU General Public License for more details.                                #
+#                                                                              #
+#  You should have received a copy of the GNU General Public License           #
+#  along with bfps.  If not, see <http://www.gnu.org/licenses/>                #
+#                                                                              #
+# Contact: Cristian.Lalescu@ds.mpg.de                                          #
+#                                                                              #
+################################################################################
 
 
 
@@ -35,6 +34,7 @@ import math
 import warnings
 
 import bfps
+from bfps.host_info import host_info
 from ._base import _base
 
 class _code(_base):
@@ -62,7 +62,7 @@ class _code(_base):
                 #include <string>
                 #include <cstring>
                 #include <fftw3-mpi.h>
-				#include <omp.h>
+                #include <omp.h>
                 #include <fenv.h>
                 #include <cstdlib>
                 //endcpp
@@ -116,7 +116,7 @@ class _code(_base):
                     }
                 #endif
                     strcpy(simname, argv[1]);
-                    sprintf(fname, "%s.h5", simname);
+                    snprintf(fname, 255, "%s.h5", simname);
                     parameter_file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT);
                     Cdset = H5Dopen(parameter_file, "iteration", H5P_DEFAULT);
                     H5Dread(
@@ -174,12 +174,7 @@ class _code(_base):
                 }
                 //endcpp
                 """
-        self.host_info = {'type'        : 'cluster',
-                          'environment' : None,
-                          'deltanprocs' : 1,
-                          'queue'       : '',
-                          'mail_address': '',
-                          'mail_events' : None}
+        self.host_info = host_info
         self.main = ''
         return None
     def write_src(self):
@@ -194,33 +189,54 @@ class _code(_base):
             outfile.write(self.main)
             outfile.write(self.main_end)
         return None
-    def compile_code(self):
+    def compile_code(
+            self,
+            no_debug = True):
+        if os.path.exists(os.path.join(self.work_dir, self.name)):
+            return 0
         # compile code
-        if not os.path.isfile(os.path.join(bfps.header_dir, 'base.hpp')):
-            raise IOError('header not there:\n' +
-                          '{0}\n'.format(os.path.join(bfps.header_dir, 'base.hpp')) +
-                          '{0}\n'.format(bfps.dist_loc))
-        libraries = ['bfps']
-        libraries += bfps.install_info['libraries']
-
-        command_strings = [bfps.install_info['compiler']]
-        command_strings += [self.name + '.cpp', '-o', self.name]
-        command_strings += bfps.install_info['extra_compile_args']
-        command_strings += ['-I' + idir for idir in bfps.install_info['include_dirs']]
-        command_strings.append('-I' + bfps.header_dir)
-        command_strings += ['-L' + ldir for ldir in bfps.install_info['library_dirs']]
-        command_strings += ['-Wl,-rpath=' + ldir for ldir in bfps.install_info['library_dirs']]
-        command_strings.append('-L' + bfps.lib_dir)
-        command_strings.append('-Wl,-rpath=' + bfps.lib_dir)
-
-        for libname in libraries:
-            command_strings += ['-l' + libname]
-
-        command_strings += ['-fopenmp']
-
+        build_dir = 'bfps_build_' + self.name
+        os.makedirs(build_dir, exist_ok = True)
+        os.chdir(build_dir)
         self.write_src()
-        print('compiling code with command\n' + ' '.join(command_strings))
-        return subprocess.call(command_strings)
+        with open('CMakeLists.txt', 'w') as outfile:
+            outfile.write('cmake_minimum_required(VERSION 3.10)\n')
+            outfile.write('cmake_policy(VERSION 3.12)\n')
+            outfile.write('if (DEFINED ENV{MPICXX})\n')
+            outfile.write('    message(STATUS "Using CMAKE_CXX_COMPILER=MPICXX")\n')
+            outfile.write('    set(CMAKE_CXX_COMPILER $ENV{MPICXX})\n')
+            outfile.write('else()\n')
+            outfile.write('    message(STATUS "MPICXX environment variable undefined, trying to find MPI")\n')
+            outfile.write('    set(MPI_STATIC ON)\n')
+            outfile.write('    find_package(MPI REQUIRED)\n')
+            outfile.write('endif()\n')
+            outfile.write('if (DEFINED ENV{MPICC})\n')
+            outfile.write('    set(CMAKE_C_COMPILER $ENV{MPICC})\n')
+            outfile.write('    message(STATUS "Using CMAKE_C_COMPILER=MPICC")\n')
+            outfile.write('endif()\n')
+            #ideally we should use something like the following 2 lines
+            #outfile.write('set(CMAKE_CXX_COMPILER ${BFPS_CXX_COMPILER})\n')
+            #outfile.write('set(CMAKE_C_COMPILER ${BFPS_C_COMPILER})\n')
+            outfile.write('project(project_{0} LANGUAGES CXX)\n'.format(self.name))
+            outfile.write('find_package(BFPS REQUIRED)\n')
+            outfile.write('set(CMAKE_CXX_STANDARD 11)\n')
+            outfile.write('set(CMAKE_CXX_STANDARD_REQUIRED ON)\n')
+            outfile.write('set(CMAKE_CXX_COMPILE_FLAGS "${CMAKE_CXX_COMPILE_FLAGS} ${BFPS_CXX_COMPILE_FLAGS}")\n')
+            outfile.write('set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${BFPS_EXE_LINKER_FLAGS}")\n')
+            outfile.write('include_directories(${BFPS_INCLUDE_DIRECTORIES} ${BFPS_INCLUDE_DIR}/bfps)\n')
+            outfile.write('link_directories(${BFPS_LINK_DIRECTORIES} ${BFPS_LIBRARIES_DIR})\n')
+            outfile.write('find_library(BFPS_STATIC_LIBRARY bfps)\n')
+            outfile.write('add_executable({0} {0}.cpp)\n'.format(self.name))
+            outfile.write('target_link_libraries(' + self.name + ' ${BFPS_STATIC_LIBRARY})\n')
+            outfile.write('target_link_libraries(' + self.name + ' ${BFPS_LIBS})\n')
+        subprocess.check_call(['cmake', '.'])
+        current_environment = os.environ
+        if not no_debug:
+            current_environment['VERBOSE'] = '1'
+        make_result = subprocess.check_call(['make'], env = current_environment)
+        os.chdir('..')
+        shutil.copy2(os.path.join(build_dir, self.name), os.path.join(self.work_dir, self.name))
+        return make_result
     def set_host_info(
             self,
             host_info = {}):
@@ -234,30 +250,29 @@ class _code(_base):
             hours = 0,
             minutes = 10,
             njobs = 1,
-            no_submit = False):
+            no_submit = False,
+            no_debug = True):
         self.read_parameters()
         with h5py.File(os.path.join(self.work_dir, self.simname + '.h5'), 'r') as data_file:
             iter0 = data_file['iteration'].value
         if not os.path.isdir(self.work_dir):
             os.makedirs(self.work_dir)
-        if not os.path.exists(os.path.join(self.work_dir, self.name)):
-            need_to_compile = True
-        else:
-            need_to_compile = (datetime.fromtimestamp(os.path.getctime(os.path.join(self.work_dir, self.name))) <
-                               bfps.install_info['install_date'])
-        if need_to_compile:
-            assert(self.compile_code() == 0)
-            if self.work_dir != os.path.realpath(os.getcwd()):
-                shutil.copy(self.name, self.work_dir)
+        assert (self.compile_code(no_debug = no_debug) == 0)
         if 'niter_todo' not in self.parameters.keys():
             self.parameters['niter_todo'] = 1
         current_dir = os.getcwd()
         os.chdir(self.work_dir)
         os.chdir(current_dir)
+        if not 'MPI' in self.host_info.keys():
+            self.host_info['MPI'] = 'openmpi'
+        if self.host_info['MPI'] == 'openmpi':
+            mpirun_environment_set = 'x'
+        else:
+            mpirun_environment_set = 'env'
         command_atoms = ['mpirun',
                          '-np',
                          '{0}'.format(nb_processes),
-                         '-x',
+                         '-' + mpirun_environment_set,
                          'OMP_NUM_THREADS={0}'.format(nb_threads_per_process),
                          './' + self.name,
                          self.simname]
@@ -268,20 +283,22 @@ class _code(_base):
                 qsub_script_name = 'run_' + suffix + '.sh'
                 self.write_sge_file(
                     file_name     = os.path.join(self.work_dir, qsub_script_name),
-                    nprocesses    = nb_processes*nb_threads_per_process,
+                    nprocesses    = nb_processes,
                     name_of_run   = suffix,
                     command_atoms = command_atoms[5:],
                     hours         = hours,
                     minutes       = minutes,
                     out_file      = out_file + '_' + suffix,
-                    err_file      = err_file + '_' + suffix)
+                    err_file      = err_file + '_' + suffix,
+                    nb_threads_per_process = nb_threads_per_process)
                 os.chdir(self.work_dir)
                 qsub_atoms = ['qsub']
-                if len(job_name_list) >= 1:
-                    qsub_atoms += ['-hold_jid', job_name_list[-1]]
-                subprocess.call(qsub_atoms + [qsub_script_name])
-                os.chdir(current_dir)
-                job_name_list.append(suffix)
+                if not no_submit:
+                    if len(job_name_list) >= 1:
+                        qsub_atoms += ['-hold_jid', job_name_list[-1]]
+                    subprocess.check_call(qsub_atoms + [qsub_script_name])
+                    os.chdir(current_dir)
+                    job_name_list.append(suffix)
         if self.host_info['type'] == 'SLURM':
             job_id_list = []
             for j in range(njobs):
@@ -296,7 +313,7 @@ class _code(_base):
                     out_file      = out_file + '_' + suffix,
                     err_file      = err_file + '_' + suffix,
                     nb_mpi_processes = nb_processes,
-			        nb_threads_per_process = nb_threads_per_process)
+                    nb_threads_per_process = nb_threads_per_process)
                 os.chdir(self.work_dir)
                 qsub_atoms = ['sbatch']
 
@@ -313,6 +330,14 @@ class _code(_base):
         elif self.host_info['type'] == 'IBMLoadLeveler':
             suffix = self.simname + '_{0}'.format(iter0)
             job_script_name = 'run_' + suffix + '.sh'
+            energy_policy_tag = (
+                    'bfps'
+                    + '_np{0}_ntpp{1}'.format(
+                        nb_processes, nb_threads_per_process)
+                    + '_Nx{0}_Ny{1}_Nz{2}'.format(
+                        self.parameters['nx'], self.parameters['ny'], self.parameters['nz']))
+            if 'nparticles' in self.parameters.keys():
+                energy_policy_tag += '_nparticles{0}'.format(self.parameters['nparticles'])
             if (njobs == 1):
                 self.write_IBMLoadLeveler_file_single_job(
                     file_name     = os.path.join(self.work_dir, job_script_name),
@@ -323,7 +348,8 @@ class _code(_base):
                     out_file      = out_file + '_' + suffix,
                     err_file      = err_file + '_' + suffix,
                     nb_mpi_processes = nb_processes,
-			        nb_threads_per_process = nb_threads_per_process)
+                    nb_threads_per_process = nb_threads_per_process,
+                    energy_policy_tag = energy_policy_tag)
             else:
                 self.write_IBMLoadLeveler_file_many_job(
                     file_name     = os.path.join(self.work_dir, job_script_name),
@@ -335,21 +361,19 @@ class _code(_base):
                     err_file      = err_file + '_' + suffix,
                     njobs = njobs,
                     nb_mpi_processes = nb_processes,
-			        nb_threads_per_process = nb_threads_per_process)
+                    nb_threads_per_process = nb_threads_per_process,
+                    energy_policy_tag = energy_policy_tag)
             submit_atoms = ['llsubmit']
 
             if not no_submit:
-                subprocess.call(submit_atoms + [os.path.join(self.work_dir, job_script_name)])
+                subprocess.check_call(submit_atoms + [os.path.join(self.work_dir, job_script_name)])
 
         elif self.host_info['type'] == 'pc':
             os.chdir(self.work_dir)
-            if os.getenv('LD_LIBRARY_PATH') != None:
-                os.environ['LD_LIBRARY_PATH'] += ':{0}'.format(bfps.lib_dir)
-                print('added to LD_LIBRARY_PATH the location {0}'.format(bfps.lib_dir))
             for j in range(njobs):
                 suffix = self.simname + '_{0}'.format(iter0 + j*self.parameters['niter_todo'])
                 print('running code with command\n' + ' '.join(command_atoms))
-                subprocess.call(command_atoms,
+                subprocess.check_call(command_atoms,
                                 stdout = open(out_file + '_' + suffix, 'w'),
                                 stderr = open(err_file + '_' + suffix, 'w'))
             os.chdir(current_dir)
@@ -364,8 +388,9 @@ class _code(_base):
             minutes = None,
             out_file = None,
             err_file = None,
-			nb_mpi_processes = None,
-			nb_threads_per_process = None):
+            nb_mpi_processes = None,
+            nb_threads_per_process = None,
+            energy_policy_tag = 'bfps'):
 
         script_file = open(file_name, 'w')
         script_file.write('# @ shell=/bin/bash\n')
@@ -380,19 +405,22 @@ class _code(_base):
 
         # If Ibm is used should be : script_file.write('# @ job_type = parallel\n')
         script_file.write('# @ job_type = MPICH\n')
+        assert(type(self.host_info['environment']) != type(None))
+        script_file.write('# @ class = {0}\n'.format(self.host_info['environment']))
 
         script_file.write('# @ node_usage = not_shared\n')
         script_file.write('# @ notification = complete\n')
-        script_file.write('# @ notify_user = $(user)@rzg.mpg.de\n')
+        script_file.write('# @ notify_user = {0}\n'.format(self.host_info['mail_address']))
 
         nb_cpus_per_node = self.host_info['deltanprocs']
-        assert(isinstance(nb_cpus_per_node, int) and nb_cpus_per_node >= 1,
-                'nb_cpus_per_node is {}'.format(nb_cpus_per_node))
+        assert isinstance(nb_cpus_per_node, int) and \
+               nb_cpus_per_node >= 1, \
+               'nb_cpus_per_node is {}'.format(nb_cpus_per_node)
 
         # No more threads than the number of cores
-        assert(nb_threads_per_process <= nb_cpus_per_node,
+        assert nb_threads_per_process <= nb_cpus_per_node, \
                "Cannot use more threads ({} asked) than the number of cores ({})".format(
-                   nb_threads_per_process, nb_cpus_per_node))
+                   nb_threads_per_process, nb_cpus_per_node)
         # Warn if some core will not be ued
         if nb_cpus_per_node%nb_threads_per_process != 0:
             warnings.warn("The number of threads is smaller than the number of cores (machine will be underused)",
@@ -410,10 +438,11 @@ class _code(_base):
             nb_processes_per_node = int(nb_cpus_per_node // nb_threads_per_process)
             first_node_tasks = int(nb_mpi_processes - (nb_nodes-1)*nb_processes_per_node)
 
+        script_file.write('# @ energy_policy_tag = {0}\n'.format(energy_policy_tag))
+        script_file.write('# @ minimize_time_to_solution = yes\n')
         script_file.write('# @ resources = ConsumableCpus({})\n'.format(nb_threads_per_process))
         script_file.write('# @ network.MPI = sn_all,not_shared,us\n')
         script_file.write('# @ wall_clock_limit = {0}:{1:0>2d}:00\n'.format(hours, minutes))
-        assert(type(self.host_info['environment']) != type(None))
         script_file.write('# @ node = {0}\n'.format(nb_nodes))
         script_file.write('# @ tasks_per_node = {0}\n'.format(nb_processes_per_node))
         if (first_node_tasks > 0):
@@ -425,9 +454,6 @@ class _code(_base):
         script_file.write('module li\n')
         script_file.write('export OMP_NUM_THREADS={}\n'.format(nb_threads_per_process))
 
-        script_file.write('LD_LIBRARY_PATH=' +
-                          ':'.join([bfps.lib_dir] + bfps.install_info['library_dirs']) +
-                          ':${LD_LIBRARY_PATH}\n')
         script_file.write('echo "Start time is `date`"\n')
         script_file.write('export HTMLOUTPUT={}.html\n'.format(command_atoms[-1]))
         script_file.write('cd ' + self.work_dir + '\n')
@@ -437,7 +463,7 @@ class _code(_base):
         script_file.write('mpiexec.hydra '
             + ' -np {} '.format(nb_mpi_processes)
             + ' -ppn {} '.format(nb_processes_per_node)
-            + ' -ordered-output -prepend-rank '
+            #+ ' -ordered-output -prepend-rank '
             + os.path.join(
                 self.work_dir,
                 command_atoms[0]) +
@@ -460,8 +486,9 @@ class _code(_base):
             out_file = None,
             err_file = None,
             njobs = 2,
-			nb_mpi_processes = None,
-			nb_threads_per_process = None):
+            nb_mpi_processes = None,
+            nb_threads_per_process = None,
+            energy_policy_tag = 'bfps'):
         assert(type(self.host_info['environment']) != type(None))
         script_file = open(file_name, 'w')
         script_file.write('# @ shell=/bin/bash\n')
@@ -475,16 +502,23 @@ class _code(_base):
         script_file.write('# @ output = ' + os.path.join(self.work_dir, out_file) + '\n')
         # If Ibm is used should be : script_file.write('# @ job_type = parallel\n')
         script_file.write('# @ job_type = MPICH\n')
+        assert(type(self.host_info['environment']) != type(None))
+        script_file.write('# @ class = {0}\n'.format(self.host_info['environment']))
         script_file.write('# @ node_usage = not_shared\n')
+
+        script_file.write('# @ notification = error\n')
+        script_file.write('# @ notify_user = {0}\n'.format(self.host_info['mail_address']))
         script_file.write('#\n')
 
         nb_cpus_per_node = self.host_info['deltanprocs']
-        assert(isinstance(nb_cpus_per_node, int) and nb_cpus_per_node >= 1, 'nb_cpus_per_node is {}'.format(nb_cpus_per_node))
+        assert isinstance(nb_cpus_per_node, int) and \
+               nb_cpus_per_node >= 1, \
+               'nb_cpus_per_node is {}'.format(nb_cpus_per_node)
 
         # No more threads than the number of cores
-        assert(nb_threads_per_process <= nb_cpus_per_node,
+        assert nb_threads_per_process <= nb_cpus_per_node, \
                "Cannot use more threads ({} asked) than the number of cores ({})".format(
-                   nb_threads_per_process, nb_cpus_per_node))
+                   nb_threads_per_process, nb_cpus_per_node)
         # Warn if some core will not be ued
         if nb_cpus_per_node%nb_threads_per_process != 0:
             warnings.warn("The number of threads is smaller than the number of cores (machine will be underused)",
@@ -503,11 +537,15 @@ class _code(_base):
             first_node_tasks = int(nb_mpi_processes - (nb_nodes-1)*nb_processes_per_node)
 
         for job in range(njobs):
-            script_file.write('# @ step_name = {0}.$(stepid)\n'.format(self.simname))
+            script_file.write('# @ step_name = {0}.{1}\n'.format(self.simname, job))
+            if job > 0:
+                script_file.write('# @ dependency = {0}.{1} == 0\n'.format(self.simname, job - 1))
             script_file.write('# @ resources = ConsumableCpus({})\n'.format(nb_threads_per_process))
             script_file.write('# @ network.MPI = sn_all,not_shared,us\n')
             script_file.write('# @ wall_clock_limit = {0}:{1:0>2d}:00\n'.format(hours, minutes))
-            assert(type(self.host_info['environment']) != type(None))
+            script_file.write('# @ energy_policy_tag = {0}\n'.format(energy_policy_tag))
+            script_file.write('# @ minimize_time_to_solution = yes\n')
+            assert type(self.host_info['environment']) != type(None)
             script_file.write('# @ node = {0}\n'.format(nb_nodes))
             script_file.write('# @ tasks_per_node = {0}\n'.format(nb_processes_per_node))
             if (first_node_tasks > 0):
@@ -518,9 +556,6 @@ class _code(_base):
         script_file.write('module li\n')
         script_file.write('export OMP_NUM_THREADS={}\n'.format(nb_threads_per_process))
 
-        script_file.write('LD_LIBRARY_PATH=' +
-                          ':'.join([bfps.lib_dir] + bfps.install_info['library_dirs']) +
-                          ':${LD_LIBRARY_PATH}\n')
         script_file.write('echo "Start time is `date`"\n')
         script_file.write('export HTMLOUTPUT={}.html\n'.format(command_atoms[-1]))
         script_file.write('cd ' + self.work_dir + '\n')
@@ -552,7 +587,8 @@ class _code(_base):
             hours = None,
             minutes = None,
             out_file = None,
-            err_file = None):
+            err_file = None,
+            nb_threads_per_process = 1):
         script_file = open(file_name, 'w')
         script_file.write('#!/bin/bash\n')
         # export all environment variables
@@ -568,18 +604,17 @@ class _code(_base):
         if not type(out_file) == type(None):
             script_file.write('#$ -o ' + out_file + '\n')
         if not type(self.host_info['environment']) == type(None):
-            envprocs = self.host_info['deltanprocs'] * int(math.ceil((nprocesses *1.0/ self.host_info['deltanprocs'])))
+            envprocs = nb_threads_per_process * nprocesses
             script_file.write('#$ -pe {0} {1}\n'.format(
                     self.host_info['environment'],
                     envprocs))
         script_file.write('echo "got $NSLOTS slots."\n')
         script_file.write('echo "Start time is `date`"\n')
-        script_file.write('mpiexec -machinefile $TMPDIR/machines ' +
-                          '-genv LD_LIBRARY_PATH ' +
-                          '"' +
-                          ':'.join([bfps.lib_dir] + bfps.install_info['library_dirs']) +
-                          '" ' +
-                          '-n {0} {1}\n'.format(nprocesses, ' '.join(command_atoms)))
+        script_file.write('mpiexec \\\n' +
+                          '\t-machinefile $TMPDIR/machines \\\n' +
+                          '\t-genv OMP_NUM_THREADS={0} \\\n'.format(nb_threads_per_process) +
+                          '\t-genv OMP_PLACES=cores \\\n' +
+                          '\t-n {0} \\\n\t{1}\n'.format(nprocesses, ' '.join(command_atoms)))
         script_file.write('echo "End time is `date`"\n')
         script_file.write('exit 0\n')
         script_file.close()
@@ -593,8 +628,8 @@ class _code(_base):
             minutes = None,
             out_file = None,
             err_file = None,
-			nb_mpi_processes = None,
-			nb_threads_per_process = None):
+            nb_mpi_processes = None,
+            nb_threads_per_process = None):
         script_file = open(file_name, 'w')
         script_file.write('#!/bin/bash -l\n')
         # job name
@@ -611,13 +646,14 @@ class _code(_base):
                 self.host_info['environment']))
 
         nb_cpus_per_node = self.host_info['deltanprocs']
-        assert(isinstance(nb_cpus_per_node, int) and nb_cpus_per_node >= 1,
-               'nb_cpus_per_node is {}'.format(nb_cpus_per_node))
+        assert isinstance(nb_cpus_per_node, int) \
+               and nb_cpus_per_node >= 1, \
+               'nb_cpus_per_node is {}'.format(nb_cpus_per_node)
 
         # No more threads than the number of cores
-        assert(nb_threads_per_process <= nb_cpus_per_node,
+        assert nb_threads_per_process <= nb_cpus_per_node, \
                "Cannot use more threads ({} asked) than the number of cores ({})".format(
-                   nb_threads_per_process, nb_cpus_per_node))
+                   nb_threads_per_process, nb_cpus_per_node)
         # Warn if some core will not be ued
         if nb_cpus_per_node%nb_threads_per_process != 0:
             warnings.warn(
@@ -646,9 +682,6 @@ class _code(_base):
             script_file.write('export OMP_NUM_THREADS={0}\n'.format(nb_threads_per_process))
             script_file.write('export OMP_PLACES=cores\n')
 
-        script_file.write('LD_LIBRARY_PATH=' +
-                          ':'.join([bfps.lib_dir] + bfps.install_info['library_dirs']) +
-                          ':${LD_LIBRARY_PATH}\n')
         script_file.write('echo "Start time is `date`"\n')
         script_file.write('cd ' + self.work_dir + '\n')
         script_file.write('export HTMLOUTPUT={}.html\n'.format(command_atoms[-1]))
diff --git a/bfps/_fluid_base.py b/bfps/_fluid_base.py
deleted file mode 100644
index 757e6cb81e6c605cbcb3c2e9d19bd7487add115f..0000000000000000000000000000000000000000
--- a/bfps/_fluid_base.py
+++ /dev/null
@@ -1,503 +0,0 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
-
-
-
-from ._code import _code
-from bfps import tools
-
-import os
-import numpy as np
-import h5py
-
-class _fluid_particle_base(_code):
-    """This class is meant to put together all common code between the
-    different C++ solvers/postprocessing tools, so that development of
-    specific functionalities is not overwhelming.
-    """
-    def __init__(
-            self,
-            name = 'solver',
-            work_dir = './',
-            simname = 'test',
-            dtype = np.float32,
-            use_fftw_wisdom = True):
-        _code.__init__(
-                self,
-                work_dir = work_dir,
-                simname = simname)
-        self.use_fftw_wisdom = use_fftw_wisdom
-        self.name = name
-        self.particle_species = 0
-        if dtype in [np.float32, np.float64]:
-            self.dtype = dtype
-        elif dtype in ['single', 'double']:
-            if dtype == 'single':
-                self.dtype = np.dtype(np.float32)
-            elif dtype == 'double':
-                self.dtype = np.dtype(np.float64)
-        self.rtype = self.dtype
-        if self.rtype == np.float32:
-            self.ctype = np.dtype(np.complex64)
-            self.C_dtype = 'float'
-        elif self.rtype == np.float64:
-            self.ctype = np.dtype(np.complex128)
-            self.C_dtype = 'double'
-        self.parameters['dealias_type'] = 1
-        self.parameters['dkx'] = 1.0
-        self.parameters['dky'] = 1.0
-        self.parameters['dkz'] = 1.0
-        self.parameters['niter_todo'] = 8
-        self.parameters['niter_part'] = 1
-        self.parameters['niter_stat'] = 1
-        self.parameters['niter_out'] = 1024
-        self.parameters['nparticles'] = 0
-        self.parameters['dt'] = 0.01
-        self.fluid_includes = '#include "fluid_solver.hpp"\n'
-        self.fluid_includes = '#include "field.hpp"\n'
-        self.fluid_variables = ''
-        self.fluid_definitions = ''
-        self.fluid_start = ''
-        self.fluid_loop = ''
-        self.fluid_end  = ''
-        self.fluid_output = ''
-        self.stat_src = ''
-        self.particle_includes = ''
-        self.particle_variables = ''
-        self.particle_definitions = ''
-        self.particle_start = ''
-        self.particle_loop = ''
-        self.particle_output = ''
-        self.particle_end  = ''
-        self.particle_stat_src = ''
-        self.file_datasets_grow   = ''
-        self.store_kspace = """
-                //begincpp
-                if (myrank == 0 && iteration == 0)
-                {
-                    TIMEZONE("fuild_base::store_kspace");
-                    hsize_t dims[4];
-                    hid_t space, dset;
-                    // store kspace information
-                    hid_t parameter_file = stat_file;
-                    //char fname[256];
-                    //sprintf(fname, "%s.h5", simname);
-                    //parameter_file = H5Fopen(fname, H5F_ACC_RDWR, H5P_DEFAULT);
-                    dset = H5Dopen(parameter_file, "/kspace/kshell", H5P_DEFAULT);
-                    space = H5Dget_space(dset);
-                    H5Sget_simple_extent_dims(space, dims, NULL);
-                    H5Sclose(space);
-                    if (fs->nshells != dims[0])
-                    {
-                        DEBUG_MSG(
-                            "ERROR: computed nshells %d not equal to data file nshells %d\\n",
-                            fs->nshells, dims[0]);
-                    }
-                    H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, fs->kshell);
-                    H5Dclose(dset);
-                    dset = H5Dopen(parameter_file, "/kspace/nshell", H5P_DEFAULT);
-                    H5Dwrite(dset, H5T_NATIVE_INT64, H5S_ALL, H5S_ALL, H5P_DEFAULT, fs->nshell);
-                    H5Dclose(dset);
-                    dset = H5Dopen(parameter_file, "/kspace/kM", H5P_DEFAULT);
-                    H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->kMspec);
-                    H5Dclose(dset);
-                    dset = H5Dopen(parameter_file, "/kspace/dk", H5P_DEFAULT);
-                    H5Dwrite(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &fs->dk);
-                    H5Dclose(dset);
-                    //H5Fclose(parameter_file);
-                }
-                //endcpp
-                """
-        return None
-    def get_data_file_name(self):
-        return os.path.join(self.work_dir, self.simname + '.h5')
-    def get_data_file(self):
-        return h5py.File(self.get_data_file_name(), 'r')
-    def get_particle_file_name(self):
-        return os.path.join(self.work_dir, self.simname + '_particles.h5')
-    def get_particle_file(self):
-        return h5py.File(self.get_particle_file_name(), 'r')
-    def finalize_code(
-            self,
-            postprocess_mode = False):
-        self.includes   += self.fluid_includes
-        self.includes   += '#include <ctime>\n'
-        self.variables  += self.fluid_variables
-        self.definitions += ('int grow_single_dataset(hid_t dset, int tincrement)\n{\n' +
-                             'int ndims;\n' +
-                             'hsize_t space;\n' +
-                             'space = H5Dget_space(dset);\n' +
-                             'ndims = H5Sget_simple_extent_ndims(space);\n' +
-                             'hsize_t *dims = new hsize_t[ndims];\n' +
-                             'H5Sget_simple_extent_dims(space, dims, NULL);\n' +
-                             'dims[0] += tincrement;\n' +
-                             'H5Dset_extent(dset, dims);\n' +
-                             'H5Sclose(space);\n' +
-                             'delete[] dims;\n' +
-                             'return EXIT_SUCCESS;\n}\n')
-        self.definitions+= self.fluid_definitions
-        if self.particle_species > 0:
-            self.includes    += self.particle_includes
-            self.variables   += self.particle_variables
-            self.definitions += self.particle_definitions
-        self.definitions += ('herr_t grow_statistics_dataset(hid_t o_id, const char *name, const H5O_info_t *info, void *op_data)\n{\n' +
-                             'if (info->type == H5O_TYPE_DATASET)\n{\n' +
-                             'hsize_t dset = H5Dopen(o_id, name, H5P_DEFAULT);\n' +
-                             'grow_single_dataset(dset, niter_todo/niter_stat);\n'
-                             'H5Dclose(dset);\n}\n' +
-                             'return 0;\n}\n')
-        self.definitions += ('herr_t grow_particle_datasets(hid_t g_id, const char *name, const H5L_info_t *info, void *op_data)\n{\n' +
-                             'hsize_t dset;\n')
-        for key in ['state', 'velocity', 'acceleration']:
-            self.definitions += ('if (H5Lexists(g_id, "{0}", H5P_DEFAULT))\n'.format(key) +
-                                 '{\n' +
-                                 'dset = H5Dopen(g_id, "{0}", H5P_DEFAULT);\n'.format(key) +
-                                 'grow_single_dataset(dset, niter_todo/niter_part);\n' +
-                                 'H5Dclose(dset);\n}\n')
-        self.definitions += ('if (H5Lexists(g_id, "rhs", H5P_DEFAULT))\n{\n' +
-                             'dset = H5Dopen(g_id, "rhs", H5P_DEFAULT);\n' +
-                             'grow_single_dataset(dset, 1);\n' +
-                             'H5Dclose(dset);\n}\n' +
-                             'return 0;\n}\n')
-        self.definitions += ('int grow_file_datasets()\n{\n' +
-                             'int file_problems = 0;\n' +
-                             self.file_datasets_grow +
-                             'return file_problems;\n'
-                             '}\n')
-        self.definitions += 'void do_stats()\n{\n' + self.stat_src + '}\n'
-        self.definitions += 'void do_particle_stats()\n{\n' + self.particle_stat_src + '}\n'
-        # take care of wisdom
-        if self.use_fftw_wisdom:
-            if self.dtype == np.float32:
-                fftw_prefix = 'fftwf_'
-            elif self.dtype == np.float64:
-                fftw_prefix = 'fftw_'
-            self.main_start += """
-                        //begincpp
-                        if (myrank == 0)
-                        {{
-                            char fname[256];
-                            sprintf(fname, "%s_fftw_wisdom.txt", simname);
-                            {0}import_wisdom_from_filename(fname);
-                        }}
-                        {0}mpi_broadcast_wisdom(MPI_COMM_WORLD);
-                        //endcpp
-                        """.format(fftw_prefix)
-            self.main_end = """
-                        //begincpp
-                        {0}mpi_gather_wisdom(MPI_COMM_WORLD);
-                        MPI_Barrier(MPI_COMM_WORLD);
-                        if (myrank == 0)
-                        {{
-                            char fname[256];
-                            sprintf(fname, "%s_fftw_wisdom.txt", simname);
-                            {0}export_wisdom_to_filename(fname);
-                        }}
-                        //endcpp
-                        """.format(fftw_prefix) + self.main_end
-        self.main        = """
-                           //begincpp
-                           int data_file_problem;
-                           clock_t time0, time1;
-                           double time_difference, local_time_difference;
-                           time0 = clock();
-                           if (myrank == 0) data_file_problem = grow_file_datasets();
-                           MPI_Bcast(&data_file_problem, 1, MPI_INT, 0, MPI_COMM_WORLD);
-                           if (data_file_problem > 0)
-                           {
-                               std::cerr << data_file_problem << " problems growing file datasets.\\ntrying to exit now." << std::endl;
-                               MPI_Finalize();
-                               return EXIT_SUCCESS;
-                           }
-                           //endcpp
-                           """
-        self.main       += self.fluid_start
-        if self.particle_species > 0:
-            self.main   += self.particle_start
-        output_time_difference = ('time1 = clock();\n' +
-                                  'local_time_difference = ((unsigned int)(time1 - time0))/((double)CLOCKS_PER_SEC);\n' +
-                                  'time_difference = 0.0;\n' +
-                                  'MPI_Allreduce(&local_time_difference, &time_difference, ' +
-                                      '1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);\n' +
-                                  'if (myrank == 0) std::cout << "iteration " ' +
-                                      '<< {0} << " took " ' +
-                                      '<< time_difference/nprocs << " seconds" << std::endl;\n' +
-                                  'if (myrank == 0) std::cerr << "iteration " ' +
-                                      '<< {0} << " took " ' +
-                                      '<< time_difference/nprocs << " seconds" << std::endl;\n' +
-                                  'time0 = time1;\n')
-        if not postprocess_mode:
-            self.main       += 'for (int max_iter = iteration+niter_todo-iteration%niter_todo; iteration < max_iter; iteration++)\n'
-            self.main       += '{\n'
-
-            self.main       += """
-                                #ifdef USE_TIMINGOUTPUT
-                                const std::string loopLabel = "code::main_start::loop-" + std::to_string(iteration);
-                                TIMEZONE(loopLabel.c_str());
-                                #endif
-                                """
-            self.main       += 'if (iteration % niter_stat == 0) do_stats();\n'
-            if self.particle_species > 0:
-                self.main       += 'if (iteration % niter_part == 0) do_particle_stats();\n'
-                self.main   += self.particle_loop
-            self.main       += self.fluid_loop
-            self.main       += output_time_difference.format('iteration')
-            self.main       += '}\n'
-            self.main       += 'do_stats();\n'
-            self.main       += 'do_particle_stats();\n'
-            self.main       += output_time_difference.format('iteration')
-        else:
-            self.main       += 'for (int frame_index = iter0; frame_index <= iter1; frame_index += niter_out)\n'
-            self.main       += '{\n'
-            self.main       += """
-                                #ifdef USE_TIMINGOUTPUT
-                                const std::string loopLabel = "code::main_start::loop-" + std::to_string(frame_index);
-                                TIMEZONE(loopLabel.c_str());
-                                #endif
-                                """
-            if self.particle_species > 0:
-                self.main   += self.particle_loop
-            self.main       += self.fluid_loop
-            self.main       += output_time_difference.format('frame_index')
-            self.main       += '}\n'
-        self.main       += self.fluid_end
-        if self.particle_species > 0:
-            self.main   += self.particle_end
-        return None
-    def read_rfield(
-            self,
-            field = 'velocity',
-            iteration = 0,
-            filename = None):
-        """
-            :note: assumes field is a vector field
-        """
-        if type(filename) == type(None):
-            filename = os.path.join(
-                    self.work_dir,
-                    self.simname + '_r' + field + '_i{0:0>5x}'.format(iteration))
-        return np.memmap(
-                filename,
-                dtype = self.dtype,
-                mode = 'r',
-                shape = (self.parameters['nz'],
-                         self.parameters['ny'],
-                         self.parameters['nx'], 3))
-    def transpose_frame(
-            self,
-            field = 'velocity',
-            iteration = 0,
-            filename = None,
-            ofile = None):
-        Rdata = self.read_rfield(
-                field = field,
-                iteration = iteration,
-                filename = filename)
-        new_data = np.zeros(
-                (3,
-                 self.parameters['nz'],
-                 self.parameters['ny'],
-                 self.parameters['nx']),
-                dtype = self.dtype)
-        for i in range(3):
-            new_data[i] = Rdata[..., i]
-        if type(ofile) == type(None):
-            ofile = os.path.join(
-                    self.work_dir,
-                    self.simname + '_r' + field + '_i{0:0>5x}_3xNZxNYxNX'.format(iteration))
-        else:
-            new_data.tofile(ofile)
-        return new_data
-    def plot_vel_cut(
-            self,
-            axis,
-            field = 'velocity',
-            iteration = 0,
-            yval = 13,
-            filename = None):
-        axis.set_axis_off()
-        Rdata0 = self.read_rfield(field = field, iteration = iteration, filename = filename)
-        energy = np.sum(Rdata0[:, yval, :, :]**2, axis = 2)*.5
-        axis.imshow(energy, interpolation='none')
-        axis.set_title('{0}'.format(np.average(Rdata0[..., 0]**2 +
-                                               Rdata0[..., 1]**2 +
-                                               Rdata0[..., 2]**2)*.5))
-        return Rdata0
-    def generate_vector_field(
-            self,
-            rseed = 7547,
-            spectra_slope = 1.,
-            amplitude = 1.,
-            iteration = 0,
-            field_name = 'vorticity',
-            write_to_file = False,
-            # to switch to constant field, use generate_data_3D_uniform
-            # for scalar_generator
-            scalar_generator = tools.generate_data_3D):
-        """generate vector field.
-
-        The generated field is not divergence free, but it has the proper
-        shape.
-
-        :param rseed: seed for random number generator
-        :param spectra_slope: spectrum of field will look like k^(-p)
-        :param amplitude: all amplitudes are multiplied with this value
-        :param iteration: the field is written at this iteration
-        :param field_name: the name of the field being generated
-        :param write_to_file: should we write the field to file?
-        :param scalar_generator: which function to use for generating the
-            individual components.
-            Possible values: bfps.tools.generate_data_3D,
-            bfps.tools.generate_data_3D_uniform
-        :type rseed: int
-        :type spectra_slope: float
-        :type amplitude: float
-        :type iteration: int
-        :type field_name: str
-        :type write_to_file: bool
-        :type scalar_generator: function
-
-        :returns: ``Kdata``, a complex valued 4D ``numpy.array`` that uses the
-            transposed FFTW layout.
-            Kdata[ky, kz, kx, i] is the amplitude of mode (kx, ky, kz) for
-            the i-th component of the field.
-            (i.e. x is the fastest index and z the slowest index in the
-            real-space representation).
-        """
-        np.random.seed(rseed)
-        Kdata00 = scalar_generator(
-                self.parameters['nz']//2,
-                self.parameters['ny']//2,
-                self.parameters['nx']//2,
-                p = spectra_slope,
-                amplitude = amplitude).astype(self.ctype)
-        Kdata01 = scalar_generator(
-                self.parameters['nz']//2,
-                self.parameters['ny']//2,
-                self.parameters['nx']//2,
-                p = spectra_slope,
-                amplitude = amplitude).astype(self.ctype)
-        Kdata02 = scalar_generator(
-                self.parameters['nz']//2,
-                self.parameters['ny']//2,
-                self.parameters['nx']//2,
-                p = spectra_slope,
-                amplitude = amplitude).astype(self.ctype)
-        Kdata0 = np.zeros(
-                Kdata00.shape + (3,),
-                Kdata00.dtype)
-        Kdata0[..., 0] = Kdata00
-        Kdata0[..., 1] = Kdata01
-        Kdata0[..., 2] = Kdata02
-        Kdata1 = tools.padd_with_zeros(
-                Kdata0,
-                self.parameters['nz'],
-                self.parameters['ny'],
-                self.parameters['nx'])
-        if write_to_file:
-            Kdata1.tofile(
-                    os.path.join(self.work_dir,
-                                 self.simname + "_c{0}_i{1:0>5x}".format(field_name, iteration)))
-        return Kdata1
-    def generate_tracer_state(
-            self,
-            rseed = None,
-            iteration = 0,
-            species = 0,
-            write_to_file = False,
-            ncomponents = 3,
-            testing = False,
-            data = None):
-        if (type(data) == type(None)):
-            if not type(rseed) == type(None):
-                np.random.seed(rseed)
-            #point with problems: 5.37632864e+00,   6.10414710e+00,   6.25256493e+00]
-            data = np.zeros(self.parameters['nparticles']*ncomponents).reshape(-1, ncomponents)
-            data[:, :3] = np.random.random((self.parameters['nparticles'], 3))*2*np.pi
-        if testing:
-            #data[0] = np.array([3.26434, 4.24418, 3.12157])
-            data[0] = np.array([ 0.72086101,  2.59043666,  6.27501953])
-        with h5py.File(self.get_particle_file_name(), 'r+') as data_file:
-            data_file['tracers{0}/state'.format(species)][0] = data
-        if write_to_file:
-            data.tofile(
-                    os.path.join(
-                        self.work_dir,
-                        "tracers{0}_state_i{1:0>5x}".format(species, iteration)))
-        return data
-    def generate_initial_condition(self):
-        self.generate_vector_field(write_to_file = True)
-        for species in range(self.particle_species):
-            self.generate_tracer_state(
-                    species = species,
-                    write_to_file = False)
-        return None
-    def get_kspace(self):
-        kspace = {}
-        if self.parameters['dealias_type'] == 1:
-            kMx = self.parameters['dkx']*(self.parameters['nx']//2 - 1)
-            kMy = self.parameters['dky']*(self.parameters['ny']//2 - 1)
-            kMz = self.parameters['dkz']*(self.parameters['nz']//2 - 1)
-        else:
-            kMx = self.parameters['dkx']*(self.parameters['nx']//3 - 1)
-            kMy = self.parameters['dky']*(self.parameters['ny']//3 - 1)
-            kMz = self.parameters['dkz']*(self.parameters['nz']//3 - 1)
-        kspace['kM'] = max(kMx, kMy, kMz)
-        kspace['dk'] = min(self.parameters['dkx'],
-                           self.parameters['dky'],
-                           self.parameters['dkz'])
-        nshells = int(kspace['kM'] / kspace['dk']) + 2
-        kspace['nshell'] = np.zeros(nshells, dtype = np.int64)
-        kspace['kshell'] = np.zeros(nshells, dtype = np.float64)
-        kspace['kx'] = np.arange( 0,
-                                  self.parameters['nx']//2 + 1).astype(np.float64)*self.parameters['dkx']
-        kspace['ky'] = np.arange(-self.parameters['ny']//2 + 1,
-                                  self.parameters['ny']//2 + 1).astype(np.float64)*self.parameters['dky']
-        kspace['ky'] = np.roll(kspace['ky'], self.parameters['ny']//2+1)
-        kspace['kz'] = np.arange(-self.parameters['nz']//2 + 1,
-                                  self.parameters['nz']//2 + 1).astype(np.float64)*self.parameters['dkz']
-        kspace['kz'] = np.roll(kspace['kz'], self.parameters['nz']//2+1)
-        return kspace
-    def write_par(self, iter0 = 0):
-        assert (self.parameters['niter_todo'] % self.parameters['niter_stat'] == 0)
-        assert (self.parameters['niter_todo'] % self.parameters['niter_out']  == 0)
-        assert (self.parameters['niter_todo'] % self.parameters['niter_part'] == 0)
-        assert (self.parameters['niter_out']  % self.parameters['niter_stat'] == 0)
-        assert (self.parameters['niter_out']  % self.parameters['niter_part'] == 0)
-        _code.write_par(self, iter0 = iter0)
-        with h5py.File(os.path.join(self.work_dir, self.simname + '.h5'), 'r+') as ofile:
-            ofile['bfps_info/exec_name'] = self.name
-            ofile['field_dtype'] = np.dtype(self.dtype).str
-            kspace = self.get_kspace()
-            for k in kspace.keys():
-                ofile['kspace/' + k] = kspace[k]
-            nshells = kspace['nshell'].shape[0]
-            ofile.close()
-        return None
-    def specific_parser_arguments(
-            self,
-            parser):
-        _code.specific_parser_arguments(self, parser)
-        return None
-
diff --git a/bfps/cpp/distributed_particles.cpp b/bfps/cpp/distributed_particles.cpp
deleted file mode 100644
index 73fd0275d8138d41bb4ee7fbc28e2d41e8017661..0000000000000000000000000000000000000000
--- a/bfps/cpp/distributed_particles.cpp
+++ /dev/null
@@ -1,472 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-//#define NDEBUG
-
-#include <cmath>
-#include <cassert>
-#include <cstring>
-#include <string>
-#include <sstream>
-#include <array>
-
-#include "base.hpp"
-#include "distributed_particles.hpp"
-#include "fftw_tools.hpp"
-#include "scope_timer.hpp"
-
-
-extern int myrank, nprocs;
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-distributed_particles<particle_type, rnumber, interp_neighbours>::distributed_particles(
-        const char *NAME,
-        const hid_t data_file_id,
-        interpolator<rnumber, interp_neighbours> *VEL,
-        const int TRAJ_SKIP,
-        const int INTEGRATION_STEPS) : particles_io_base<particle_type>(
-            NAME,
-            TRAJ_SKIP,
-            data_file_id,
-            VEL->descriptor->comm)
-{
-    assert((INTEGRATION_STEPS <= 6) &&
-           (INTEGRATION_STEPS >= 1));
-    this->vel = VEL;
-    this->rhs.resize(INTEGRATION_STEPS);
-    this->integration_steps = INTEGRATION_STEPS;
-    this->state.reserve(2*this->nparticles / this->nprocs);
-    for (unsigned int i=0; i<this->rhs.size(); i++)
-        this->rhs[i].reserve(2*this->nparticles / this->nprocs);
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-distributed_particles<particle_type, rnumber, interp_neighbours>::~distributed_particles()
-{
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::sample(
-        interpolator<rnumber, interp_neighbours> *field,
-        const std::unordered_map<int, single_particle_state<particle_type>> &x,
-        std::unordered_map<int, single_particle_state<POINT3D>> &y)
-{
-    std::array<double, 3> yy;
-    y.clear();
-    for (auto &pp: x)
-    {
-        (*field)(pp.second.data, &yy.front());
-        y[pp.first] = &yy.front();
-    }
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::get_rhs(
-        const std::unordered_map<int, single_particle_state<particle_type>> &x,
-        std::unordered_map<int, single_particle_state<particle_type>> &y)
-{
-    std::unordered_map<int, single_particle_state<POINT3D>> yy;
-    switch(particle_type)
-    {
-        case VELOCITY_TRACER:
-            this->sample(this->vel, this->state, yy);
-            y.clear();
-            for (auto &pp: x)
-                y[pp.first] = yy[pp.first].data;
-            break;
-    }
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::sample(
-        interpolator<rnumber, interp_neighbours> *field,
-        const char *dset_name)
-{
-    std::unordered_map<int, single_particle_state<POINT3D>> y;
-    this->sample(field, this->state, y);
-    this->write(dset_name, y);
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::roll_rhs()
-{
-    for (int i=this->integration_steps-2; i>=0; i--)
-        rhs[i+1] = rhs[i];
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::redistribute(
-        std::unordered_map<int, single_particle_state<particle_type>> &x,
-        std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals)
-{
-    TIMEZONE("distributed_particles::redistribute");
-    //DEBUG_MSG("entered redistribute\n");
-    /* neighbouring rank offsets */
-    int ro[2];
-    ro[0] = -1;
-    ro[1] = 1;
-    /* neighbouring ranks */
-    int nr[2];
-    nr[0] = MOD(this->myrank+ro[0], this->nprocs);
-    nr[1] = MOD(this->myrank+ro[1], this->nprocs);
-    /* particles to send, particles to receive */
-    std::vector<int> ps[2], pr[2];
-    /* number of particles to send, number of particles to receive */
-    int nps[2], npr[2];
-    int rsrc, rdst;
-    /* get list of id-s to send */
-    for (auto &pp: x)
-        for (unsigned int i=0; i<2; i++)
-            if (this->vel->get_rank(pp.second.data[2]) == nr[i])
-                ps[i].push_back(pp.first);
-    /* prepare data for send recv */
-    for (unsigned int i=0; i<2; i++)
-        nps[i] = ps[i].size();
-    for (rsrc = 0; rsrc<this->nprocs; rsrc++)
-        for (unsigned int i=0; i<2; i++)
-        {
-            rdst = MOD(rsrc+ro[i], this->nprocs);
-            if (this->myrank == rsrc)
-                MPI_Send(
-                        nps+i,
-                        1,
-                        MPI_INTEGER,
-                        rdst,
-                        2*(rsrc*this->nprocs + rdst)+i,
-                        this->comm);
-            if (this->myrank == rdst)
-                MPI_Recv(
-                        npr+1-i,
-                        1,
-                        MPI_INTEGER,
-                        rsrc,
-                        2*(rsrc*this->nprocs + rdst)+i,
-                        this->comm,
-                        MPI_STATUS_IGNORE);
-        }
-    //DEBUG_MSG("I have to send %d %d particles\n", nps[0], nps[1]);
-    //DEBUG_MSG("I have to recv %d %d particles\n", npr[0], npr[1]);
-    for (unsigned int i=0; i<2; i++)
-        pr[i].resize(npr[i]);
-
-    int buffer_size = (nps[0] > nps[1]) ? nps[0] : nps[1];
-    buffer_size = (buffer_size > npr[0])? buffer_size : npr[0];
-    buffer_size = (buffer_size > npr[1])? buffer_size : npr[1];
-    //DEBUG_MSG("buffer size is %d\n", buffer_size);
-    double *buffer = new double[buffer_size*state_dimension(particle_type)*(1+vals.size())];
-    for (rsrc = 0; rsrc<this->nprocs; rsrc++)
-        for (unsigned int i=0; i<2; i++)
-        {
-            rdst = MOD(rsrc+ro[i], this->nprocs);
-            if (this->myrank == rsrc && nps[i] > 0)
-            {
-                MPI_Send(
-                        &ps[i].front(),
-                        nps[i],
-                        MPI_INTEGER,
-                        rdst,
-                        2*(rsrc*this->nprocs + rdst),
-                        this->comm);
-                int pcounter = 0;
-                for (int p: ps[i])
-                {
-                    std::copy(x[p].data,
-                              x[p].data + state_dimension(particle_type),
-                              buffer + pcounter*(1+vals.size())*state_dimension(particle_type));
-                    x.erase(p);
-                    for (unsigned int tindex=0; tindex<vals.size(); tindex++)
-                    {
-                        std::copy(vals[tindex][p].data,
-                                  vals[tindex][p].data + state_dimension(particle_type),
-                                  buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type));
-                        vals[tindex].erase(p);
-                    }
-                    pcounter++;
-                }
-                MPI_Send(
-                        buffer,
-                        nps[i]*(1+vals.size())*state_dimension(particle_type),
-                        MPI_DOUBLE,
-                        rdst,
-                        2*(rsrc*this->nprocs + rdst)+1,
-                        this->comm);
-            }
-            if (this->myrank == rdst && npr[1-i] > 0)
-            {
-                MPI_Recv(
-                        &pr[1-i].front(),
-                        npr[1-i],
-                        MPI_INTEGER,
-                        rsrc,
-                        2*(rsrc*this->nprocs + rdst),
-                        this->comm,
-                        MPI_STATUS_IGNORE);
-                MPI_Recv(
-                        buffer,
-                        npr[1-i]*(1+vals.size())*state_dimension(particle_type),
-                        MPI_DOUBLE,
-                        rsrc,
-                        2*(rsrc*this->nprocs + rdst)+1,
-                        this->comm,
-                        MPI_STATUS_IGNORE);
-                unsigned int pcounter = 0;
-                for (int p: pr[1-i])
-                {
-                    x[p] = buffer + (pcounter*(1+vals.size()))*state_dimension(particle_type);
-                    for (unsigned int tindex=0; tindex<vals.size(); tindex++)
-                    {
-                        vals[tindex][p] = buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type);
-                    }
-                    pcounter++;
-                }
-            }
-        }
-    delete[] buffer;
-
-
-#ifndef NDEBUG
-    /* check that all particles at x are local */
-    for (auto &pp: x)
-        if (this->vel->get_rank(pp.second.data[2]) != this->myrank)
-        {
-            DEBUG_MSG("found particle %d with rank %d\n",
-                    pp.first,
-                    this->vel->get_rank(pp.second.data[2]));
-            assert(false);
-        }
-#endif
-    //DEBUG_MSG("exiting redistribute\n");
-}
-
-
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth(
-        const int nsteps)
-{
-    this->get_rhs(this->state, this->rhs[0]);
-    for (auto &pp: this->state)
-        for (unsigned int i=0; i<state_dimension(particle_type); i++)
-            switch(nsteps)
-            {
-                case 1:
-                    pp.second[i] += this->dt*this->rhs[0][pp.first][i];
-                    break;
-                case 2:
-                    pp.second[i] += this->dt*(3*this->rhs[0][pp.first][i]
-                                            -   this->rhs[1][pp.first][i])/2;
-                    break;
-                case 3:
-                    pp.second[i] += this->dt*(23*this->rhs[0][pp.first][i]
-                                            - 16*this->rhs[1][pp.first][i]
-                                            +  5*this->rhs[2][pp.first][i])/12;
-                    break;
-                case 4:
-                    pp.second[i] += this->dt*(55*this->rhs[0][pp.first][i]
-                                            - 59*this->rhs[1][pp.first][i]
-                                            + 37*this->rhs[2][pp.first][i]
-                                            -  9*this->rhs[3][pp.first][i])/24;
-                    break;
-                case 5:
-                    pp.second[i] += this->dt*(1901*this->rhs[0][pp.first][i]
-                                            - 2774*this->rhs[1][pp.first][i]
-                                            + 2616*this->rhs[2][pp.first][i]
-                                            - 1274*this->rhs[3][pp.first][i]
-                                            +  251*this->rhs[4][pp.first][i])/720;
-                    break;
-                case 6:
-                    pp.second[i] += this->dt*(4277*this->rhs[0][pp.first][i]
-                                            - 7923*this->rhs[1][pp.first][i]
-                                            + 9982*this->rhs[2][pp.first][i]
-                                            - 7298*this->rhs[3][pp.first][i]
-                                            + 2877*this->rhs[4][pp.first][i]
-                                            -  475*this->rhs[5][pp.first][i])/1440;
-                    break;
-            }
-    this->redistribute(this->state, this->rhs);
-    this->roll_rhs();
-}
-
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::step()
-{
-    TIMEZONE("distributed_particles::step");
-    this->AdamsBashforth((this->iteration < this->integration_steps) ?
-                            this->iteration+1 :
-                            this->integration_steps);
-    this->iteration++;
-}
-
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::read()
-{
-    double *temp = new double[this->chunk_size*state_dimension(particle_type)];
-    for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
-    {
-        //read state
-        if (this->myrank == 0)
-            this->read_state_chunk(cindex, temp);
-        MPI_Bcast(
-                temp,
-                this->chunk_size*state_dimension(particle_type),
-                MPI_DOUBLE,
-                0,
-                this->comm);
-        for (unsigned int p=0; p<this->chunk_size; p++)
-        {
-            if (this->vel->get_rank(temp[state_dimension(particle_type)*p+2]) == this->myrank)
-                this->state[p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p;
-        }
-        //read rhs
-        if (this->iteration > 0)
-            for (int i=0; i<this->integration_steps; i++)
-            {
-                if (this->myrank == 0)
-                    this->read_rhs_chunk(cindex, i, temp);
-                MPI_Bcast(
-                        temp,
-                        this->chunk_size*state_dimension(particle_type),
-                        MPI_DOUBLE,
-                        0,
-                        this->comm);
-                for (unsigned int p=0; p<this->chunk_size; p++)
-                {
-                    auto pp = this->state.find(p+cindex*this->chunk_size);
-                    if (pp != this->state.end())
-                        this->rhs[i][p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p;
-                }
-            }
-    }
-    DEBUG_MSG("%s->state.size = %ld\n", this->name.c_str(), this->state.size());
-    delete[] temp;
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::write(
-        const char *dset_name,
-        std::unordered_map<int, single_particle_state<POINT3D>> &y)
-{
-    TIMEZONE("distributed_particles::write");
-    double *data = new double[this->nparticles*3];
-    double *yy = new double[this->nparticles*3];
-    for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
-    {
-        std::fill_n(yy, this->chunk_size*3, 0);
-        for (unsigned int p=0; p<this->chunk_size; p++)
-        {
-            auto pp = y.find(p+cindex*this->chunk_size);
-            if (pp != y.end())
-                std::copy(pp->second.data,
-                          pp->second.data + 3,
-                          yy + pp->first*3);
-        }
-        MPI_Allreduce(
-                yy,
-                data,
-                3*this->nparticles,
-                MPI_DOUBLE,
-                MPI_SUM,
-                this->comm);
-        if (this->myrank == 0)
-            this->write_point3D_chunk(dset_name, cindex, data);
-    }
-    delete[] yy;
-    delete[] data;
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void distributed_particles<particle_type, rnumber, interp_neighbours>::write(
-        const bool write_rhs)
-{
-    TIMEZONE("distributed_particles::write2");
-    double *temp0 = new double[this->chunk_size*state_dimension(particle_type)];
-    double *temp1 = new double[this->chunk_size*state_dimension(particle_type)];
-    for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
-    {
-        //write state
-        std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0);
-        for (unsigned int p=0; p<this->chunk_size; p++)
-        {
-            auto pp = this->state.find(p + cindex*this->chunk_size);
-            if (pp != this->state.end())
-                std::copy(pp->second.data,
-                          pp->second.data + state_dimension(particle_type),
-                          temp0 + p*state_dimension(particle_type));
-        }
-        MPI_Allreduce(
-                temp0,
-                temp1,
-                state_dimension(particle_type)*this->chunk_size,
-                MPI_DOUBLE,
-                MPI_SUM,
-                this->comm);
-        if (this->myrank == 0)
-            this->write_state_chunk(cindex, temp1);
-        //write rhs
-        if (write_rhs)
-            for (int i=0; i<this->integration_steps; i++)
-            {
-                std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0);
-                for (unsigned int p=0; p<this->chunk_size; p++)
-                {
-                    auto pp = this->rhs[i].find(p + cindex*this->chunk_size);
-                    if (pp != this->rhs[i].end())
-                        std::copy(pp->second.data,
-                                  pp->second.data + state_dimension(particle_type),
-                                  temp0 + p*state_dimension(particle_type));
-                }
-                MPI_Allreduce(
-                        temp0,
-                        temp1,
-                        state_dimension(particle_type)*this->chunk_size,
-                        MPI_DOUBLE,
-                        MPI_SUM,
-                        this->comm);
-                if (this->myrank == 0)
-                    this->write_rhs_chunk(cindex, i, temp1);
-            }
-    }
-    delete[] temp0;
-    delete[] temp1;
-}
-
-
-/*****************************************************************************/
-template class distributed_particles<VELOCITY_TRACER, float, 1>;
-template class distributed_particles<VELOCITY_TRACER, float, 2>;
-template class distributed_particles<VELOCITY_TRACER, float, 3>;
-template class distributed_particles<VELOCITY_TRACER, float, 4>;
-template class distributed_particles<VELOCITY_TRACER, float, 5>;
-template class distributed_particles<VELOCITY_TRACER, float, 6>;
-template class distributed_particles<VELOCITY_TRACER, double, 1>;
-template class distributed_particles<VELOCITY_TRACER, double, 2>;
-template class distributed_particles<VELOCITY_TRACER, double, 3>;
-template class distributed_particles<VELOCITY_TRACER, double, 4>;
-template class distributed_particles<VELOCITY_TRACER, double, 5>;
-template class distributed_particles<VELOCITY_TRACER, double, 6>;
-/*****************************************************************************/
diff --git a/bfps/cpp/distributed_particles.hpp b/bfps/cpp/distributed_particles.hpp
deleted file mode 100644
index cf6e124a7744c049b6fcf0c84c1618a0a214c30e..0000000000000000000000000000000000000000
--- a/bfps/cpp/distributed_particles.hpp
+++ /dev/null
@@ -1,105 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <iostream>
-#include <unordered_map>
-#include <vector>
-#include <hdf5.h>
-#include "base.hpp"
-#include "particles_base.hpp"
-#include "fluid_solver_base.hpp"
-#include "interpolator.hpp"
-
-#ifndef DISTRIBUTED_PARTICLES
-
-#define DISTRIBUTED_PARTICLES
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-class distributed_particles: public particles_io_base<particle_type>
-{
-    private:
-        std::unordered_map<int, single_particle_state<particle_type> > state;
-        std::vector<std::unordered_map<int, single_particle_state<particle_type>>> rhs;
-
-    public:
-        int integration_steps;
-        // this class only works with buffered interpolator
-        interpolator<rnumber, interp_neighbours> *vel;
-
-        /* simulation parameters */
-        double dt;
-
-        /* methods */
-
-        /* constructor and destructor.
-         * allocate and deallocate:
-         *  this->state
-         *  this->rhs
-         * */
-        distributed_particles(
-                const char *NAME,
-                const hid_t data_file_id,
-                interpolator<rnumber, interp_neighbours> *FIELD,
-                const int TRAJ_SKIP,
-                const int INTEGRATION_STEPS = 2);
-        ~distributed_particles();
-
-        void sample(
-                interpolator<rnumber, interp_neighbours> *field,
-                const char *dset_name);
-        void sample(
-                interpolator<rnumber, interp_neighbours> *field,
-                const std::unordered_map<int, single_particle_state<particle_type>> &x,
-                std::unordered_map<int, single_particle_state<POINT3D>> &y);
-        void get_rhs(
-                const std::unordered_map<int, single_particle_state<particle_type>> &x,
-                std::unordered_map<int, single_particle_state<particle_type>> &y);
-
-        void redistribute(
-                std::unordered_map<int, single_particle_state<particle_type>> &x,
-                std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals);
-
-
-        /* input/output */
-        void read();
-        void write(
-                const char *dset_name,
-                std::unordered_map<int, single_particle_state<POINT3D>> &y);
-        void write(
-                const char *dset_name,
-                std::unordered_map<int, single_particle_state<particle_type>> &y);
-        void write(const bool write_rhs = true);
-
-        /* solvers */
-        void step();
-        void roll_rhs();
-        void AdamsBashforth(const int nsteps);
-};
-
-#endif//DISTRIBUTED_PARTICLES
-
diff --git a/bfps/cpp/fftw_interface.hpp b/bfps/cpp/fftw_interface.hpp
deleted file mode 100644
index 495ec9fa3712153df4d31faf7dfb3046637b5483..0000000000000000000000000000000000000000
--- a/bfps/cpp/fftw_interface.hpp
+++ /dev/null
@@ -1,173 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-#ifndef FFTW_INTERFACE_HPP
-#define FFTW_INTERFACE_HPP
-
-#include <fftw3-mpi.h>
-
-#ifdef USE_FFTWESTIMATE
-#define DEFAULT_FFTW_FLAG FFTW_ESTIMATE
-#warning You are using FFTW estimate
-#else
-#define DEFAULT_FFTW_FLAG FFTW_PATIENT
-#endif
-
-template <class realtype>
-class fftw_interface;
-
-template <>
-class fftw_interface<float>
-{
-public:
-    using real = float;
-    using complex = fftwf_complex;
-    using plan = fftwf_plan;
-    using iodim = fftwf_iodim;
-
-    static complex* alloc_complex(const size_t in_size){
-        return fftwf_alloc_complex(in_size);
-    }
-
-    static real* alloc_real(const size_t in_size){
-        return fftwf_alloc_real(in_size);
-    }
-
-    static void free(void* ptr){
-        fftwf_free(ptr);
-    }
-
-    static void execute(plan in_plan){
-        fftwf_execute(in_plan);
-    }
-
-    static void destroy_plan(plan in_plan){
-        fftwf_destroy_plan(in_plan);
-    }
-
-    template <class ... Params>
-    static plan mpi_plan_transpose(Params ... params){
-        return fftwf_mpi_plan_transpose(params...);
-    }
-
-    template <class ... Params>
-    static plan mpi_plan_many_transpose(Params ... params){
-        return fftwf_mpi_plan_many_transpose(params...);
-    }
-
-    template <class ... Params>
-    static plan plan_guru_r2r(Params ... params){
-        return fftwf_plan_guru_r2r(params...);
-    }
-
-    template <class ... Params>
-    static plan plan_guru_dft(Params ... params){
-        return fftwf_plan_guru_dft(params...);
-    }
-
-    template <class ... Params>
-    static plan mpi_plan_many_dft_c2r(Params ... params){
-        return fftwf_mpi_plan_many_dft_c2r(params...);
-    }
-
-    template <class ... Params>
-    static plan mpi_plan_many_dft_r2c(Params ... params){
-        return fftwf_mpi_plan_many_dft_r2c(params...);
-    }
-
-    template <class ... Params>
-    static plan mpi_plan_dft_c2r_3d(Params ... params){
-        return fftwf_mpi_plan_dft_c2r_3d(params...);
-    }
-};
-
-template <>
-class fftw_interface<double>
-{
-public:
-    using real = double;
-    using complex = fftw_complex;
-    using plan = fftw_plan;
-    using iodim = fftw_iodim;
-
-    static complex* alloc_complex(const size_t in_size){
-        return fftw_alloc_complex(in_size);
-    }
-
-    static real* alloc_real(const size_t in_size){
-        return fftw_alloc_real(in_size);
-    }
-
-    static void free(void* ptr){
-        fftw_free(ptr);
-    }
-
-    static void execute(plan in_plan){
-        fftw_execute(in_plan);
-    }
-
-    static void destroy_plan(plan in_plan){
-        fftw_destroy_plan(in_plan);
-    }
-
-    template <class ... Params>
-    static plan mpi_plan_transpose(Params ... params){
-        return fftw_mpi_plan_transpose(params...);
-    }
-
-    template <class ... Params>
-    static plan mpi_plan_many_transpose(Params ... params){
-        return fftw_mpi_plan_many_transpose(params...);
-    }
-
-    template <class ... Params>
-    static plan plan_guru_r2r(Params ... params){
-        return fftw_plan_guru_r2r(params...);
-    }
-
-    template <class ... Params>
-    static plan plan_guru_dft(Params ... params){
-        return fftw_plan_guru_dft(params...);
-    }
-
-    template <class ... Params>
-    static plan mpi_plan_many_dft_c2r(Params ... params){
-        return fftw_mpi_plan_many_dft_c2r(params...);
-    }
-
-    template <class ... Params>
-    static plan mpi_plan_many_dft_r2c(Params ... params){
-        return fftw_mpi_plan_many_dft_r2c(params...);
-    }
-
-    template <class ... Params>
-    static plan mpi_plan_dft_c2r_3d(Params ... params){
-        return fftw_mpi_plan_dft_c2r_3d(params...);
-    }
-};
-
-
-
-#endif // FFTW_INTERFACE_HPP
-
diff --git a/bfps/cpp/fftw_tools.cpp b/bfps/cpp/fftw_tools.cpp
deleted file mode 100644
index 61e03d292f81aed1fa4b2dfcab880fb7105b676e..0000000000000000000000000000000000000000
--- a/bfps/cpp/fftw_tools.cpp
+++ /dev/null
@@ -1,222 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-#include <stdlib.h>
-#include <algorithm>
-#include <iostream>
-#include "base.hpp"
-#include "fftw_tools.hpp"
-#include "fftw_interface.hpp"
-
-#define NDEBUG
-
-template <class rnumber>
-int clip_zero_padding(
-        field_descriptor<rnumber> *f,
-        rnumber *a,
-        int howmany)
-{
-    if (f->ndims < 3)
-        return EXIT_FAILURE;
-    rnumber *b = a;
-    ptrdiff_t copy_size = f->sizes[2] * howmany;
-    ptrdiff_t skip_size = copy_size + 2*howmany;
-    for (int i0 = 0; i0 < f->subsizes[0]; i0++)
-        for (int i1 = 0; i1 < f->sizes[1]; i1++)
-        {
-            std::copy(a, a + copy_size, b);
-            a += skip_size;
-            b += copy_size;
-        }
-    return EXIT_SUCCESS;
-}
-
-template
-int clip_zero_padding<float>(
-        field_descriptor<float> *f,
-        float *a,
-        int howmany);
-
-template
-int clip_zero_padding<double>(
-        field_descriptor<double> *f,
-        double *a,
-        int howmany);
-
-
-
-template <class rnumber>
-int copy_complex_array(
-        field_descriptor<rnumber> *fi,
-        rnumber (*ai)[2],
-field_descriptor<rnumber> *fo,
-rnumber (*ao)[2],
-int howmany)
-{
-    DEBUG_MSG("entered copy_complex_array\n");
-    typename fftw_interface<rnumber>::complex *buffer;
-    buffer = fftw_interface<rnumber>::alloc_complex(fi->slice_size*howmany);
-
-    int min_fast_dim;
-    min_fast_dim =
-            (fi->sizes[2] > fo->sizes[2]) ?
-                fo->sizes[2] : fi->sizes[2];
-
-    /* clean up destination, in case we're padding with zeros
-       (even if only for one dimension) */
-    std::fill_n((rnumber*)ao, fo->local_size*2, 0.0);
-
-    int64_t ii0, ii1;
-    int64_t oi0, oi1;
-    int64_t delta1, delta0;
-    int irank, orank;
-    delta0 = (fo->sizes[0] - fi->sizes[0]);
-    delta1 = (fo->sizes[1] - fi->sizes[1]);
-    for (ii0=0; ii0 < fi->sizes[0]; ii0++)
-    {
-        if (ii0 <= fi->sizes[0]/2)
-        {
-            oi0 = ii0;
-            if (oi0 > fo->sizes[0]/2)
-                continue;
-        }
-        else
-        {
-            oi0 = ii0 + delta0;
-            if ((oi0 < 0) || ((fo->sizes[0] - oi0) >= fo->sizes[0]/2))
-                continue;
-        }
-        irank = fi->rank[ii0];
-        orank = fo->rank[oi0];
-        if ((irank == orank) &&
-                (irank == fi->myrank))
-        {
-            std::copy(
-                        (rnumber*)(ai + (ii0 - fi->starts[0]    )*fi->slice_size),
-                    (rnumber*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size),
-                    (rnumber*)buffer);
-        }
-        else
-        {
-            if (fi->myrank == irank)
-            {
-                MPI_Send(
-                            (void*)(ai + (ii0-fi->starts[0])*fi->slice_size),
-                        fi->slice_size,
-                        mpi_real_type<rnumber>::complex(),
-                        orank,
-                        ii0,
-                        fi->comm);
-            }
-            if (fi->myrank == orank)
-            {
-                MPI_Recv(
-                            (void*)(buffer),
-                            fi->slice_size,
-                            mpi_real_type<rnumber>::complex(),
-                            irank,
-                            ii0,
-                            fi->comm,
-                            MPI_STATUS_IGNORE);
-            }
-        }
-        if (fi->myrank == orank)
-        {
-            for (ii1 = 0; ii1 < fi->sizes[1]; ii1++)
-            {
-                if (ii1 <= fi->sizes[1]/2)
-                {
-                    oi1 = ii1;
-                    if (oi1 > fo->sizes[1]/2)
-                        continue;
-                }
-                else
-                {
-                    oi1 = ii1 + delta1;
-                    if ((oi1 < 0) || ((fo->sizes[1] - oi1) >= fo->sizes[1]/2))
-                        continue;
-                }
-                std::copy(
-                            (rnumber*)(buffer + (ii1*fi->sizes[2]*howmany)),
-                        (rnumber*)(buffer + (ii1*fi->sizes[2] + min_fast_dim)*howmany),
-                        (rnumber*)(ao +
-                                   ((oi0 - fo->starts[0])*fo->sizes[1] +
-                        oi1)*fo->sizes[2]*howmany));
-            }
-        }
-    }
-    fftw_interface<rnumber>::free(buffer);
-    MPI_Barrier(fi->comm);
-
-    DEBUG_MSG("exiting copy_complex_array\n");
-    return EXIT_SUCCESS;
-}
-
-template
-int copy_complex_array<float>(
-        field_descriptor<float> *fi,
-        float (*ai)[2],
-        field_descriptor<float> *fo,
-        float (*ao)[2],
-        int howmany);
-
-template
-int copy_complex_array<double>(
-        field_descriptor<double> *fi,
-        double (*ai)[2],
-        field_descriptor<double> *fo,
-        double (*ao)[2],
-        int howmany);
-
-
-template <class rnumber>
-int get_descriptors_3D(
-        int n0, int n1, int n2,
-        field_descriptor<rnumber> **fr,
-        field_descriptor<rnumber> **fc)
-{
-    int ntmp[3];
-    ntmp[0] = n0;
-    ntmp[1] = n1;
-    ntmp[2] = n2;
-    *fr = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), MPI_COMM_WORLD);
-    ntmp[0] = n0;
-    ntmp[1] = n1;
-    ntmp[2] = n2/2+1;
-    *fc = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::complex(), MPI_COMM_WORLD);
-    return EXIT_SUCCESS;
-}
-
-template
-int get_descriptors_3D<float>(
-        int n0, int n1, int n2,
-        field_descriptor<float> **fr,
-        field_descriptor<float> **fc);
-
-template
-int get_descriptors_3D<double>(
-        int n0, int n1, int n2,
-        field_descriptor<double> **fr,
-        field_descriptor<double> **fc);
-
diff --git a/bfps/cpp/field_descriptor.cpp b/bfps/cpp/field_descriptor.cpp
deleted file mode 100644
index 20c634262dbb45ad4c2bb5a1b5640b6df23d4d2c..0000000000000000000000000000000000000000
--- a/bfps/cpp/field_descriptor.cpp
+++ /dev/null
@@ -1,543 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#define NDEBUG
-
-#include <stdlib.h>
-#include <algorithm>
-#include <iostream>
-#include "base.hpp"
-#include "field_descriptor.hpp"
-#include "fftw_interface.hpp"
-#include "scope_timer.hpp"
-
-/*****************************************************************************/
-/* macro for specializations to numeric types compatible with FFTW           */
-
-
-template <class rnumber>
-field_descriptor<rnumber>::field_descriptor(
-        int ndims,
-        int *n,
-        MPI_Datatype element_type,
-        MPI_Comm COMM_TO_USE)
-{
-    TIMEZONE("field_descriptor");
-    DEBUG_MSG("entered field_descriptor::field_descriptor\n");
-    this->comm = COMM_TO_USE;
-    MPI_Comm_rank(this->comm, &this->myrank);
-    MPI_Comm_size(this->comm, &this->nprocs);
-    this->ndims = ndims;
-    this->sizes    = new int[ndims];
-    this->subsizes = new int[ndims];
-    this->starts   = new int[ndims];
-    int tsizes   [ndims];
-    int tsubsizes[ndims];
-    int tstarts  [ndims];
-    std::vector<ptrdiff_t> nfftw;
-    nfftw.resize(ndims);
-    ptrdiff_t local_n0, local_0_start;
-    for (int i = 0; i < this->ndims; i++)
-        nfftw[i] = n[i];
-    this->local_size = fftw_mpi_local_size_many(
-                this->ndims,
-                &nfftw.front(),
-                1,
-                FFTW_MPI_DEFAULT_BLOCK,
-                this->comm,
-                &local_n0,
-                &local_0_start);
-    this->sizes[0] = n[0];
-    this->subsizes[0] = (int)local_n0;
-    this->starts[0] = (int)local_0_start;
-    DEBUG_MSG_WAIT(
-                this->comm,
-                "first subsizes[0] = %d %d %d\n",
-                this->subsizes[0],
-            tsubsizes[0],
-            (int)local_n0);
-    tsizes[0] = n[0];
-    tsubsizes[0] = (int)local_n0;
-    tstarts[0] = (int)local_0_start;
-    DEBUG_MSG_WAIT(
-                this->comm,
-                "second subsizes[0] = %d %d %d\n",
-                this->subsizes[0],
-            tsubsizes[0],
-            (int)local_n0);
-    this->mpi_dtype = element_type;
-    this->slice_size = 1;
-    this->full_size = this->sizes[0];
-    for (int i = 1; i < this->ndims; i++)
-    {
-        this->sizes[i] = n[i];
-        this->subsizes[i] = n[i];
-        this->starts[i] = 0;
-        this->slice_size *= this->subsizes[i];
-        this->full_size *= this->sizes[i];
-        tsizes[i] = this->sizes[i];
-        tsubsizes[i] = this->subsizes[i];
-        tstarts[i] = this->starts[i];
-    }
-    tsizes[ndims-1] *= sizeof(rnumber);
-    tsubsizes[ndims-1] *= sizeof(rnumber);
-    tstarts[ndims-1] *= sizeof(rnumber);
-    if (this->mpi_dtype == mpi_real_type<rnumber>::complex())
-    {
-        tsizes[ndims-1] *= 2;
-        tsubsizes[ndims-1] *= 2;
-        tstarts[ndims-1] *= 2;
-    }
-    int local_zero_array[this->nprocs], zero_array[this->nprocs];
-    for (int i=0; i<this->nprocs; i++)
-        local_zero_array[i] = 0;
-    local_zero_array[this->myrank] = (this->subsizes[0] == 0) ? 1 : 0;
-    MPI_Allreduce(
-                local_zero_array,
-                zero_array,
-                this->nprocs,
-                MPI_INT,
-                MPI_SUM,
-                this->comm);
-    int no_of_excluded_ranks = 0;
-    for (int i = 0; i<this->nprocs; i++)
-        no_of_excluded_ranks += zero_array[i];
-    DEBUG_MSG_WAIT(
-                this->comm,
-                "subsizes[0] = %d %d\n",
-                this->subsizes[0],
-            tsubsizes[0]);
-    if (no_of_excluded_ranks == 0)
-    {
-        this->io_comm = this->comm;
-        this->io_nprocs = this->nprocs;
-        this->io_myrank = this->myrank;
-    }
-    else
-    {
-        int excluded_rank[no_of_excluded_ranks];
-        for (int i=0, j=0; i<this->nprocs; i++)
-            if (zero_array[i])
-            {
-                excluded_rank[j] = i;
-                j++;
-            }
-        MPI_Group tgroup0, tgroup;
-        MPI_Comm_group(this->comm, &tgroup0);
-        MPI_Group_excl(tgroup0, no_of_excluded_ranks, excluded_rank, &tgroup);
-        MPI_Comm_create(this->comm, tgroup, &this->io_comm);
-        MPI_Group_free(&tgroup0);
-        MPI_Group_free(&tgroup);
-        if (this->subsizes[0] > 0)
-        {
-            MPI_Comm_rank(this->io_comm, &this->io_myrank);
-            MPI_Comm_size(this->io_comm, &this->io_nprocs);
-        }
-        else
-        {
-            this->io_myrank = MPI_PROC_NULL;
-            this->io_nprocs = -1;
-        }
-    }
-    DEBUG_MSG_WAIT(
-                this->comm,
-                "inside field_descriptor constructor, about to call "
-                "MPI_Type_create_subarray "
-                "%d %d %d\n",
-                this->sizes[0],
-            this->subsizes[0],
-            this->starts[0]);
-    for (int i=0; i<this->ndims; i++)
-        DEBUG_MSG_WAIT(
-                    this->comm,
-                    "tsizes "
-                    "%d %d %d\n",
-                    tsizes[i],
-                    tsubsizes[i],
-                    tstarts[i]);
-    if (this->subsizes[0] > 0)
-    {
-        DEBUG_MSG("creating subarray\n");
-        MPI_Type_create_subarray(
-                    ndims,
-                    tsizes,
-                    tsubsizes,
-                    tstarts,
-                    MPI_ORDER_C,
-                    MPI_UNSIGNED_CHAR,
-                    &this->mpi_array_dtype);
-        MPI_Type_commit(&this->mpi_array_dtype);
-    }
-    this->rank = new int[this->sizes[0]];
-    int *local_rank = new int[this->sizes[0]];
-    std::fill_n(local_rank, this->sizes[0], 0);
-    for (int i = 0; i < this->sizes[0]; i++)
-        if (i >= this->starts[0] && i < this->starts[0] + this->subsizes[0])
-            local_rank[i] = this->myrank;
-    MPI_Allreduce(
-                local_rank,
-                this->rank,
-                this->sizes[0],
-            MPI_INT,
-            MPI_SUM,
-            this->comm);
-    delete[] local_rank;
-    this->all_start0 = new int[this->nprocs];
-    int *local_start0 = new int[this->nprocs];
-    std::fill_n(local_start0, this->nprocs, 0);
-    for (int i = 0; i < this->nprocs; i++)
-        if (this->myrank == i)
-            local_start0[i] = this->starts[0];
-    MPI_Allreduce(
-                local_start0,
-                this->all_start0,
-                this->nprocs,
-                MPI_INT,
-                MPI_SUM,
-                this->comm);
-    delete[] local_start0;
-    this->all_size0  = new int[this->nprocs];
-    int *local_size0 = new int[this->nprocs];
-    std::fill_n(local_size0, this->nprocs, 0);
-    for (int i = 0; i < this->nprocs; i++)
-        if (this->myrank == i)
-            local_size0[i] = this->subsizes[0];
-    MPI_Allreduce(
-                local_size0,
-                this->all_size0,
-                this->nprocs,
-                MPI_INT,
-                MPI_SUM,
-                this->comm);
-    delete[] local_size0;
-    DEBUG_MSG("exiting field_descriptor constructor\n");
-}
-
-template <class rnumber>
-int field_descriptor<rnumber>::read(
-        const char *fname,
-        void *buffer)
-{
-    TIMEZONE("field_descriptor::read");
-    DEBUG_MSG("entered field_descriptor::read\n");
-    char representation[] = "native";
-    if (this->subsizes[0] > 0)
-    {
-        MPI_Info info;
-        MPI_Info_create(&info);
-        MPI_File f;
-        ptrdiff_t read_size = this->local_size*sizeof(rnumber);
-        DEBUG_MSG("read size is %ld\n", read_size);
-        char ffname[200];
-        if (this->mpi_dtype == mpi_real_type<rnumber>::complex())
-            read_size *= 2;
-        DEBUG_MSG("read size is %ld\n", read_size);
-        sprintf(ffname, "%s", fname);
-
-        MPI_File_open(
-                    this->io_comm,
-                    ffname,
-                    MPI_MODE_RDONLY,
-                    info,
-                    &f);
-        DEBUG_MSG("opened file\n");
-        MPI_File_set_view(
-                    f,
-                    0,
-                    MPI_UNSIGNED_CHAR,
-                    this->mpi_array_dtype,
-                    representation,
-                    info);
-        DEBUG_MSG("view is set\n");
-        MPI_File_read_all(
-                    f,
-                    buffer,
-                    read_size,
-                    MPI_UNSIGNED_CHAR,
-                    MPI_STATUS_IGNORE);
-        DEBUG_MSG("info is read\n");
-        MPI_File_close(&f);
-    }
-    DEBUG_MSG("finished with field_descriptor::read\n");
-    return EXIT_SUCCESS;
-}
-
-template <class rnumber>
-int field_descriptor<rnumber>::write(
-        const char *fname,
-        void *buffer)
-{
-    TIMEZONE("field_descriptor::write");
-    char representation[] = "native";
-    if (this->subsizes[0] > 0)
-    {
-        MPI_Info info;
-        MPI_Info_create(&info);
-        MPI_File f;
-        ptrdiff_t read_size = this->local_size*sizeof(rnumber);
-        char ffname[200];
-        if (this->mpi_dtype == mpi_real_type<rnumber>::complex())
-            read_size *= 2;
-        sprintf(ffname, "%s", fname);
-
-        MPI_File_open(
-                    this->io_comm,
-                    ffname,
-                    MPI_MODE_CREATE | MPI_MODE_WRONLY,
-                    info,
-                    &f);
-        MPI_File_set_view(
-                    f,
-                    0,
-                    MPI_UNSIGNED_CHAR,
-                    this->mpi_array_dtype,
-                    representation,
-                    info);
-        MPI_File_write_all(
-                    f,
-                    buffer,
-                    read_size,
-                    MPI_UNSIGNED_CHAR,
-                    MPI_STATUS_IGNORE);
-        MPI_File_close(&f);
-    }
-
-    return EXIT_SUCCESS;
-}
-
-template <class rnumber>
-int field_descriptor<rnumber>::transpose(
-        rnumber *input,
-        rnumber *output)
-{
-    TIMEZONE("field_descriptor::transpose");
-    /* IMPORTANT NOTE:
-     for 3D transposition, the input data is messed up */
-    typename fftw_interface<rnumber>::plan tplan;
-    if (this->ndims == 3)
-    {
-        /* transpose the two local dimensions 1 and 2 */
-        rnumber *atmp;
-        atmp = fftw_interface<rnumber>::alloc_real(this->slice_size);
-        for (int k = 0; k < this->subsizes[0]; k++)
-        {
-            /* put transposed slice in atmp */
-            for (int j = 0; j < this->sizes[1]; j++)
-                for (int i = 0; i < this->sizes[2]; i++)
-                    atmp[i*this->sizes[1] + j] =
-                            input[(k*this->sizes[1] + j)*this->sizes[2] + i];
-            /* copy back transposed slice */
-            std::copy(
-                        atmp,
-                        atmp + this->slice_size,
-                        input + k*this->slice_size);
-        }
-        fftw_interface<rnumber>::free(atmp);
-    }
-    tplan = fftw_interface<rnumber>::mpi_plan_transpose(
-                this->sizes[0], this->slice_size,
-            input, output,
-            this->comm,
-            DEFAULT_FFTW_FLAG);
-    fftw_interface<rnumber>::execute(tplan);
-    fftw_interface<rnumber>::destroy_plan(tplan);
-    return EXIT_SUCCESS;
-}
-
-template <class rnumber>
-int field_descriptor<rnumber>::transpose(
-        typename fftw_interface<rnumber>::complex *input,
-        typename fftw_interface<rnumber>::complex *output)
-{
-    TIMEZONE("field_descriptor::transpose2");
-    switch (this->ndims)
-    {
-    case 2:
-        /* do a global transpose over the 2 dimensions */
-        if (output == NULL)
-        {
-            std::cerr << "bad arguments for transpose.\n" << std::endl;
-            return EXIT_FAILURE;
-        }
-        typename fftw_interface<rnumber>::plan tplan;
-        tplan = fftw_interface<rnumber>::mpi_plan_many_transpose(
-                    this->sizes[0], this->sizes[1], 2,
-                FFTW_MPI_DEFAULT_BLOCK,
-                FFTW_MPI_DEFAULT_BLOCK,
-                (rnumber*)input, (rnumber*)output,
-                this->comm,
-                DEFAULT_FFTW_FLAG);
-        fftw_interface<rnumber>::execute(tplan);
-        fftw_interface<rnumber>::destroy_plan(tplan);
-        break;
-    case 3:
-        /* transpose the two local dimensions 1 and 2 */
-        typename fftw_interface<rnumber>::complex *atmp;
-        atmp = fftw_interface<rnumber>::alloc_complex(this->slice_size);
-        for (int k = 0; k < this->subsizes[0]; k++)
-        {
-            /* put transposed slice in atmp */
-            for (int j = 0; j < this->sizes[1]; j++)
-                for (int i = 0; i < this->sizes[2]; i++)
-                {
-                    atmp[i*this->sizes[1] + j][0] =
-                            input[(k*this->sizes[1] + j)*this->sizes[2] + i][0];
-                    atmp[i*this->sizes[1] + j][1] =
-                            input[(k*this->sizes[1] + j)*this->sizes[2] + i][1];
-                }
-            /* copy back transposed slice */
-            std::copy(
-                        (rnumber*)(atmp),
-                        (rnumber*)(atmp + this->slice_size),
-                        (rnumber*)(input + k*this->slice_size));
-        }
-        fftw_interface<rnumber>::free(atmp);
-        break;
-    default:
-        return EXIT_FAILURE;
-        break;
-    }
-    return EXIT_SUCCESS;
-}
-
-template <class rnumber>
-int field_descriptor<rnumber>::interleave(
-        rnumber *a,
-        int dim)
-{
-     TIMEZONE("field_descriptor::interleav");
-    /* the following is copied from
- * http://agentzlerich.blogspot.com/2010/01/using-fftw-for-in-place-matrix.html
- * */
-    typename fftw_interface<rnumber>::iodim howmany_dims[2];
-    howmany_dims[0].n  = dim;
-    howmany_dims[0].is = this->local_size;
-    howmany_dims[0].os = 1;
-    howmany_dims[1].n  = this->local_size;
-    howmany_dims[1].is = 1;
-    howmany_dims[1].os = dim;
-    const int howmany_rank = sizeof(howmany_dims)/sizeof(howmany_dims[0]);
-
-    typename fftw_interface<rnumber>::plan tmp = fftw_interface<rnumber>::plan_guru_r2r(
-                /*rank*/0,
-                /*dims*/nullptr,
-                howmany_rank,
-                howmany_dims,
-                a,
-                a,
-                /*kind*/nullptr,
-                DEFAULT_FFTW_FLAG);
-    fftw_interface<rnumber>::execute(tmp);
-    fftw_interface<rnumber>::destroy_plan(tmp);
-    return EXIT_SUCCESS;
-}
-
-template <class rnumber>
-int field_descriptor<rnumber>::interleave(
-        typename fftw_interface<rnumber>::complex *a,
-        int dim)
-{
-     TIMEZONE("field_descriptor::interleave2");
-    typename fftw_interface<rnumber>::iodim howmany_dims[2];
-    howmany_dims[0].n  = dim;
-    howmany_dims[0].is = this->local_size;
-    howmany_dims[0].os = 1;
-    howmany_dims[1].n  = this->local_size;
-    howmany_dims[1].is = 1;
-    howmany_dims[1].os = dim;
-    const int howmany_rank = sizeof(howmany_dims)/sizeof(howmany_dims[0]);
-
-    typename fftw_interface<rnumber>::plan tmp = fftw_interface<rnumber>::plan_guru_dft(
-                /*rank*/0,
-                /*dims*/nullptr,
-                howmany_rank,
-                howmany_dims,
-                a,
-                a,
-                +1,
-                DEFAULT_FFTW_FLAG);
-    fftw_interface<rnumber>::execute(tmp);
-    fftw_interface<rnumber>::destroy_plan(tmp);
-    return EXIT_SUCCESS;
-}
-
-template <class rnumber>
-field_descriptor<rnumber>* field_descriptor<rnumber>::get_transpose()
-{
-    TIMEZONE("field_descriptor::get_transpose");
-    int n[this->ndims];
-    for (int i=0; i<this->ndims; i++)
-        n[i] = this->sizes[this->ndims - i - 1];
-    return new field_descriptor<rnumber>(this->ndims, n, this->mpi_dtype, this->comm);
-}
-
-/*****************************************************************************/
-/*****************************************************************************/
-
-
-
-/*****************************************************************************/
-/* destructor looks the same for both float and double                       */
-template <class rnumber>
-field_descriptor<rnumber>::~field_descriptor()
-{
-    DEBUG_MSG_WAIT(
-                MPI_COMM_WORLD,
-                this->io_comm == MPI_COMM_NULL ? "null\n" : "not null\n");
-    DEBUG_MSG_WAIT(
-                MPI_COMM_WORLD,
-                "subsizes[0] = %d \n", this->subsizes[0]);
-    if (this->subsizes[0] > 0)
-    {
-        DEBUG_MSG_WAIT(
-                    this->io_comm,
-                    "deallocating mpi_array_dtype\n");
-        MPI_Type_free(&this->mpi_array_dtype);
-    }
-    if (this->nprocs != this->io_nprocs && this->io_myrank != MPI_PROC_NULL)
-    {
-        DEBUG_MSG_WAIT(
-                    this->io_comm,
-                    "freeing io_comm\n");
-        MPI_Comm_free(&this->io_comm);
-    }
-    delete[] this->sizes;
-    delete[] this->subsizes;
-    delete[] this->starts;
-    delete[] this->rank;
-    delete[] this->all_start0;
-    delete[] this->all_size0;
-}
-/*****************************************************************************/
-
-
-
-/*****************************************************************************/
-/* finally, force generation of code                                         */
-template class field_descriptor<float>;
-template class field_descriptor<double>;
-/*****************************************************************************/
-
diff --git a/bfps/cpp/field_descriptor.hpp b/bfps/cpp/field_descriptor.hpp
deleted file mode 100644
index 2fb491bca7c130704fc5de5d22c3393cb196eec7..0000000000000000000000000000000000000000
--- a/bfps/cpp/field_descriptor.hpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#include <mpi.h>
-#include <fftw3-mpi.h>
-#include "fftw_interface.hpp"
-
-#ifndef FIELD_DESCRIPTOR
-
-#define FIELD_DESCRIPTOR
-
-extern int myrank, nprocs;
-
-template <class rnumber>
-class field_descriptor
-{
-    private:
-        typedef rnumber cnumber[2];
-    public:
-
-        /* data */
-        int *sizes;
-        int *subsizes;
-        int *starts;
-        int ndims;
-        int *rank;
-        int *all_start0;
-        int *all_size0;
-        ptrdiff_t slice_size, local_size, full_size;
-        MPI_Datatype mpi_array_dtype, mpi_dtype;
-        int myrank, nprocs, io_myrank, io_nprocs;
-        MPI_Comm comm, io_comm;
-
-
-        /* methods */
-        field_descriptor(
-                int ndims,
-                int *n,
-                MPI_Datatype element_type,
-                MPI_Comm COMM_TO_USE);
-        ~field_descriptor();
-
-        /* io is performed using MPI_File stuff, and our
-         * own mpi_array_dtype that was defined in the constructor.
-         * */
-        int read(
-                const char *fname,
-                void *buffer);
-        int write(
-                const char *fname,
-                void *buffer);
-
-        /* a function that generates the transposed descriptor.
-         * don't forget to delete the result once you're done with it.
-         * the transposed descriptor is useful for io operations.
-         * */
-        field_descriptor<rnumber> *get_transpose();
-
-        /* we don't actually need the transposed descriptor to perform
-         * the transpose operation: we only need the in/out fields.
-         * */
-        int transpose(
-                rnumber *input,
-                rnumber *output);
-        int transpose(
-                typename fftw_interface<rnumber>::complex *input,
-                typename fftw_interface<rnumber>::complex *output = NULL);
-
-        int interleave(
-                rnumber *input,
-                int dim);
-        int interleave(
-                typename fftw_interface<rnumber>::complex *input,
-                int dim);
-};
-
-
-inline float btle(const float be)
-     {
-         float le;
-         char *befloat = (char *) & be;
-         char *lefloat = (char *) & le;
-         lefloat[0] = befloat[3];
-         lefloat[1] = befloat[2];
-         lefloat[2] = befloat[1];
-         lefloat[3] = befloat[0];
-         return le;
-     }
-
-#endif//FIELD_DESCRIPTOR
-
diff --git a/bfps/cpp/fluid_solver.cpp b/bfps/cpp/fluid_solver.cpp
deleted file mode 100644
index 319186103797f8135d4d3e2244ed5e3a8f271b00..0000000000000000000000000000000000000000
--- a/bfps/cpp/fluid_solver.cpp
+++ /dev/null
@@ -1,1057 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-//#define NDEBUG
-
-#include <cassert>
-#include <cmath>
-#include <cstring>
-#include "fluid_solver.hpp"
-#include "fftw_tools.hpp"
-#include "scope_timer.hpp"
-#include "shared_array.hpp"
-
-
-template <class rnumber>
-void fluid_solver<rnumber>::impose_zero_modes()
-{
-    if (this->cd->myrank == this->cd->rank[0])
-    {
-        std::fill_n((rnumber*)(this->cu), 6, 0.0);
-        std::fill_n((rnumber*)(this->cv[0]), 6, 0.0);
-        std::fill_n((rnumber*)(this->cv[1]), 6, 0.0);
-        std::fill_n((rnumber*)(this->cv[2]), 6, 0.0);
-    }
-}
-/*****************************************************************************/
-/* macro for specializations to numeric types compatible with FFTW           */
-
-template <class rnumber>
-fluid_solver<rnumber>::fluid_solver(
-        const char *NAME,
-        int nx,
-        int ny,
-        int nz,
-        double DKX,
-        double DKY,
-        double DKZ,
-        int DEALIAS_TYPE,
-        unsigned FFTW_PLAN_RIGOR) : fluid_solver_base<rnumber>(
-                                        NAME,
-                                        nx , ny , nz,
-                                        DKX, DKY, DKZ,
-                                        DEALIAS_TYPE,
-                                        FFTW_PLAN_RIGOR)
-{
-    TIMEZONE("fluid_solver::fluid_solver");
-    this->cvorticity = fftw_interface<rnumber>::alloc_complex(this->cd->local_size);
-    this->cvelocity  = fftw_interface<rnumber>::alloc_complex(this->cd->local_size);
-    this->rvorticity = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2);
-    /*this->rvelocity  = (rnumber*)(this->cvelocity);*/
-    this->rvelocity  = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2);
-
-    this->ru = this->rvelocity;
-    this->cu = this->cvelocity;
-
-    this->rv[0] = this->rvorticity;
-    this->rv[3] = this->rvorticity;
-    this->cv[0] = this->cvorticity;
-    this->cv[3] = this->cvorticity;
-
-    this->cv[1] = fftw_interface<rnumber>::alloc_complex(this->cd->local_size);
-    this->cv[2] = this->cv[1];
-    this->rv[1] = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2);
-    this->rv[2] = this->rv[1];
-
-    this->c2r_vorticity = new typename fftw_interface<rnumber>::plan;
-    this->r2c_vorticity = new typename fftw_interface<rnumber>::plan;
-    this->c2r_velocity  = new typename fftw_interface<rnumber>::plan;
-    this->r2c_velocity  = new typename fftw_interface<rnumber>::plan;
-
-    ptrdiff_t sizes[] = {nz,
-                         ny,
-                         nx};
-
-    *this->c2r_vorticity = fftw_interface<rnumber>::mpi_plan_many_dft_c2r(
-                3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                this->cvorticity, this->rvorticity,
-                MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN);
-
-    *this->r2c_vorticity = fftw_interface<rnumber>::mpi_plan_many_dft_r2c(
-                3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                this->rvorticity, this->cvorticity,
-                MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT);
-
-    *this->c2r_velocity = fftw_interface<rnumber>::mpi_plan_many_dft_c2r(
-                3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                this->cvelocity, this->rvelocity,
-                MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN);
-
-    *this->r2c_velocity = fftw_interface<rnumber>::mpi_plan_many_dft_r2c(
-                3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                this->rvelocity, this->cvelocity,
-                MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT);
-
-    this->uc2r = this->c2r_velocity;
-    this->ur2c = this->r2c_velocity;
-    this->vc2r[0] = this->c2r_vorticity;
-    this->vr2c[0] = this->r2c_vorticity;
-
-    this->vc2r[1] = new typename fftw_interface<rnumber>::plan;
-    this->vr2c[1] = new typename fftw_interface<rnumber>::plan;
-    this->vc2r[2] = new typename fftw_interface<rnumber>::plan;
-    this->vr2c[2] = new typename fftw_interface<rnumber>::plan;
-
-    *(this->vc2r[1]) = fftw_interface<rnumber>::mpi_plan_many_dft_c2r(
-                3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                this->cv[1], this->rv[1],
-            MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN);
-
-    *this->vc2r[2] = fftw_interface<rnumber>::mpi_plan_many_dft_c2r(
-                3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                this->cv[2], this->rv[2],
-            MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN);
-
-    *this->vr2c[1] = fftw_interface<rnumber>::mpi_plan_many_dft_r2c(
-                3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                this->rv[1], this->cv[1],
-            MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT);
-
-    *this->vr2c[2] = fftw_interface<rnumber>::mpi_plan_many_dft_r2c(
-                3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
-                this->rv[2], this->cv[2],
-            MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT);
-
-    /* ``physical'' parameters etc, initialized here just in case */
-
-    this->nu = 0.1;
-    this->fmode = 1;
-    this->famplitude = 1.0;
-    this->fk0  = 0;
-    this->fk1 = 3.0;
-    /* initialization of fields must be done AFTER planning */
-    std::fill_n((rnumber*)this->cvorticity, this->cd->local_size*2, 0.0);
-    std::fill_n((rnumber*)this->cvelocity, this->cd->local_size*2, 0.0);
-    std::fill_n(this->rvelocity, this->cd->local_size*2, 0.0);
-    std::fill_n(this->rvorticity, this->cd->local_size*2, 0.0);
-    std::fill_n((rnumber*)this->cv[1], this->cd->local_size*2, 0.0);
-    std::fill_n(this->rv[1], this->cd->local_size*2, 0.0);
-    std::fill_n(this->rv[2], this->cd->local_size*2, 0.0);
-}
-
-template <class rnumber>
-fluid_solver<rnumber>::~fluid_solver()
-{
-    fftw_interface<rnumber>::destroy_plan(*this->c2r_vorticity);
-    fftw_interface<rnumber>::destroy_plan(*this->r2c_vorticity);
-    fftw_interface<rnumber>::destroy_plan(*this->c2r_velocity );
-    fftw_interface<rnumber>::destroy_plan(*this->r2c_velocity );
-    fftw_interface<rnumber>::destroy_plan(*this->vc2r[1]);
-    fftw_interface<rnumber>::destroy_plan(*this->vr2c[1]);
-    fftw_interface<rnumber>::destroy_plan(*this->vc2r[2]);
-    fftw_interface<rnumber>::destroy_plan(*this->vr2c[2]);
-
-    delete this->c2r_vorticity;
-    delete this->r2c_vorticity;
-    delete this->c2r_velocity ;
-    delete this->r2c_velocity ;
-    delete this->vc2r[1];
-    delete this->vr2c[1];
-    delete this->vc2r[2];
-    delete this->vr2c[2];
-
-    fftw_interface<rnumber>::free(this->cv[1]);
-    fftw_interface<rnumber>::free(this->rv[1]);
-    fftw_interface<rnumber>::free(this->cvorticity);
-    fftw_interface<rnumber>::free(this->rvorticity);
-    fftw_interface<rnumber>::free(this->cvelocity);
-    fftw_interface<rnumber>::free(this->rvelocity);
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::compute_vorticity()
-{
-    TIMEZONE("fluid_solver::compute_vorticity");
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){
-        // cindex indexing is thread safe (and tindex too) + it is a write
-        ptrdiff_t tindex = 3*cindex;
-        if (k2 <= this->kM2)
-        {
-            this->cvorticity[tindex+0][0] = -(this->ky[yindex]*this->cu[tindex+2][1] - this->kz[zindex]*this->cu[tindex+1][1]);
-            this->cvorticity[tindex+1][0] = -(this->kz[zindex]*this->cu[tindex+0][1] - this->kx[xindex]*this->cu[tindex+2][1]);
-            this->cvorticity[tindex+2][0] = -(this->kx[xindex]*this->cu[tindex+1][1] - this->ky[yindex]*this->cu[tindex+0][1]);
-            this->cvorticity[tindex+0][1] =  (this->ky[yindex]*this->cu[tindex+2][0] - this->kz[zindex]*this->cu[tindex+1][0]);
-            this->cvorticity[tindex+1][1] =  (this->kz[zindex]*this->cu[tindex+0][0] - this->kx[xindex]*this->cu[tindex+2][0]);
-            this->cvorticity[tindex+2][1] =  (this->kx[xindex]*this->cu[tindex+1][0] - this->ky[yindex]*this->cu[tindex+0][0]);
-        }
-        else{
-            std::fill_n((rnumber*)(this->cvorticity+tindex), 6, 0.0);
-        }
-    }
-    );
-    this->symmetrize(this->cvorticity, 3);
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::compute_velocity(rnumber (*__restrict__ vorticity)[2])
-{
-    TIMEZONE("fluid_solver::compute_velocity");
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){
-        // cindex indexing is thread safe (and tindex too) + it is a write
-        ptrdiff_t tindex = 3*cindex;
-        if (k2 <= this->kM2 && k2 > 0)
-        {
-            this->cu[tindex+0][0] = -(this->ky[yindex]*vorticity[tindex+2][1] - this->kz[zindex]*vorticity[tindex+1][1]) / k2;
-            this->cu[tindex+1][0] = -(this->kz[zindex]*vorticity[tindex+0][1] - this->kx[xindex]*vorticity[tindex+2][1]) / k2;
-            this->cu[tindex+2][0] = -(this->kx[xindex]*vorticity[tindex+1][1] - this->ky[yindex]*vorticity[tindex+0][1]) / k2;
-            this->cu[tindex+0][1] =  (this->ky[yindex]*vorticity[tindex+2][0] - this->kz[zindex]*vorticity[tindex+1][0]) / k2;
-            this->cu[tindex+1][1] =  (this->kz[zindex]*vorticity[tindex+0][0] - this->kx[xindex]*vorticity[tindex+2][0]) / k2;
-            this->cu[tindex+2][1] =  (this->kx[xindex]*vorticity[tindex+1][0] - this->ky[yindex]*vorticity[tindex+0][0]) / k2;
-        }
-        else
-            std::fill_n((rnumber*)(this->cu+tindex), 6, 0.0);
-    }
-    );
-    /*this->symmetrize(this->cu, 3);*/
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::ift_velocity()
-{
-    TIMEZONE("fluid_solver::ift_velocity");
-    fftw_interface<rnumber>::execute(*(this->c2r_velocity ));
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::ift_vorticity()
-{
-    TIMEZONE("fluid_solver::ift_vorticity");
-    std::fill_n(this->rvorticity, this->cd->local_size*2, 0.0);
-    fftw_interface<rnumber>::execute(*(this->c2r_vorticity ));
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::dft_velocity()
-{
-    TIMEZONE("fluid_solver::dft_velocity");
-    fftw_interface<rnumber>::execute(*(this->r2c_velocity ));
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::dft_vorticity()
-{
-    TIMEZONE("fluid_solver::dft_vorticity");
-    std::fill_n((rnumber*)this->cvorticity, this->cd->local_size*2, 0.0);
-    fftw_interface<rnumber>::execute(*(this->r2c_vorticity ));
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::add_forcing(
-        rnumber (*__restrict__ acc_field)[2], rnumber (*__restrict__ vort_field)[2], rnumber factor)
-{
-    TIMEZONE("fluid_solver::add_forcing");
-    if (strcmp(this->forcing_type, "none") == 0)
-        return;
-    if (strcmp(this->forcing_type, "Kolmogorov") == 0)
-    {
-        ptrdiff_t cindex;
-        if (this->cd->myrank == this->cd->rank[this->fmode])
-        {
-            cindex = ((this->fmode - this->cd->starts[0]) * this->cd->sizes[1])*this->cd->sizes[2]*3;
-            acc_field[cindex+2][0] -= this->famplitude*factor/2;
-        }
-        if (this->cd->myrank == this->cd->rank[this->cd->sizes[0] - this->fmode])
-        {
-            cindex = ((this->cd->sizes[0] - this->fmode - this->cd->starts[0]) * this->cd->sizes[1])*this->cd->sizes[2]*3;
-            acc_field[cindex+2][0] -= this->famplitude*factor/2;
-        }
-        return;
-    }
-    if (strcmp(this->forcing_type, "linear") == 0)
-    {
-        CLOOP(
-                    this,
-                    [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){
-            // cindex indexing is thread safe (and cindex*3+c too)
-            double knorm = sqrt(this->kx[xindex]*this->kx[xindex] +
-                         this->ky[yindex]*this->ky[yindex] +
-                         this->kz[zindex]*this->kz[zindex]);
-            if ((this->fk0 <= knorm) && (this->fk1 >= knorm))
-                for (int c=0; c<3; c++)
-                    for (int i=0; i<2; i++)
-                        acc_field[cindex*3+c][i] += this->famplitude*vort_field[cindex*3+c][i]*factor;
-        }
-        );
-        return;
-    }
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::omega_nonlin(
-        int src)
-{
-    TIMEZONE("fluid_solver::omega_nonlin");
-    assert(src >= 0 && src < 3);
-    this->compute_velocity(this->cv[src]);
-    /* get fields from Fourier space to real space */
-    {
-        TIMEZONE("fluid_solver::omega_nonlin::fftw");
-        fftw_interface<rnumber>::execute(*(this->c2r_velocity ));
-        fftw_interface<rnumber>::execute(*(this->vc2r[src]));
-    }
-    /* compute cross product $u \times \omega$, and normalize */
-    {
-        TIMEZONE("fluid_solver::omega_nonlin::RLOOP");
-        RLOOP (
-                    this,
-                    [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-            ptrdiff_t tindex = 3*rindex;
-            rnumber tmp[3][2];
-            for (int cc=0; cc<3; cc++)
-                tmp[cc][0] = (this->ru[tindex+(cc+1)%3]*this->rv[src][tindex+(cc+2)%3] -
-                        this->ru[tindex+(cc+2)%3]*this->rv[src][tindex+(cc+1)%3]);
-            // Access to rindex is thread safe so there is no overlap between threads
-            for (int cc=0; cc<3; cc++)
-                this->ru[(3*rindex)+cc] = tmp[cc][0] / this->normalization_factor;
-        }
-        );
-    }
-    /* go back to Fourier space */
-    this->clean_up_real_space(this->ru, 3);
-    {
-        TIMEZONE("fluid_solver::omega_nonlin::fftw-2");
-        fftw_interface<rnumber>::execute(*(this->r2c_velocity ));
-    }
-    this->dealias(this->cu, 3);
-    /* $\imath k \times Fourier(u \times \omega)$ */
-    {
-        TIMEZONE("fluid_solver::omega_nonlin::CLOOP");
-        CLOOP(
-                    this,
-                    [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){
-            rnumber tmp[3][2];
-            ptrdiff_t tindex = 3*cindex;
-            {
-                tmp[0][0] = -(this->ky[yindex]*this->cu[tindex+2][1] - this->kz[zindex]*this->cu[tindex+1][1]);
-                tmp[1][0] = -(this->kz[zindex]*this->cu[tindex+0][1] - this->kx[xindex]*this->cu[tindex+2][1]);
-                tmp[2][0] = -(this->kx[xindex]*this->cu[tindex+1][1] - this->ky[yindex]*this->cu[tindex+0][1]);
-                tmp[0][1] =  (this->ky[yindex]*this->cu[tindex+2][0] - this->kz[zindex]*this->cu[tindex+1][0]);
-                tmp[1][1] =  (this->kz[zindex]*this->cu[tindex+0][0] - this->kx[xindex]*this->cu[tindex+2][0]);
-                tmp[2][1] =  (this->kx[xindex]*this->cu[tindex+1][0] - this->ky[yindex]*this->cu[tindex+0][0]);
-            }
-            // cindex indexing is thread safe so it is 3*cindex so there is no overlap between threads
-            for (int cc=0; cc<3; cc++)
-                for (int i=0; i<2; i++)
-                    this->cu[tindex+cc][i] = tmp[cc][i];
-        }
-        );
-    }
-    {
-        TIMEZONE("fluid_solver::omega_nonlin::add_forcing");
-        this->add_forcing(this->cu, this->cv[src], 1.0);
-    }
-    {
-        TIMEZONE("fluid_solver::omega_nonlin::force_divfree");
-        this->force_divfree(this->cu);
-    }
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::step(double dt)
-{
-    TIMEZONE("fluid_solver::step");
-    std::fill_n((rnumber*)this->cv[1], this->cd->local_size*2, 0.0);
-    this->omega_nonlin(0);
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){
-        if (k2 <= this->kM2)
-        {
-            double factor0 = exp(-this->nu * k2 * dt);
-            // cindex indexing is thread safe so there is no overlap between threads
-            for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++)
-                this->cv[1][3*cindex+cc][i] = (this->cv[0][3*cindex+cc][i] +
-                    dt*this->cu[3*cindex+cc][i])*factor0;
-        }
-    }
-    );
-
-    this->omega_nonlin(1);
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){
-        if (k2 <= this->kM2)
-        {
-            double factor0 = exp(-this->nu * k2 * dt/2);
-            double factor1 = exp( this->nu * k2 * dt/2);
-            // cindex indexing is thread safe so there is no overlap between threads
-            for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++)
-                this->cv[2][3*cindex+cc][i] = (3*this->cv[0][3*cindex+cc][i]*factor0 +
-                    (this->cv[1][3*cindex+cc][i] +
-                    dt*this->cu[3*cindex+cc][i])*factor1)*0.25;
-        }
-    }
-    );
-
-    this->omega_nonlin(2);
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){
-        if (k2 <= this->kM2)
-        {
-            double factor0 = exp(-this->nu * k2 * dt * 0.5);
-            // cindex indexing is thread safe so there is no overlap between threads
-            for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++)
-                this->cv[3][3*cindex+cc][i] = (this->cv[0][3*cindex+cc][i]*factor0 +
-                    2*(this->cv[2][3*cindex+cc][i] +
-                    dt*this->cu[3*cindex+cc][i]))*factor0/3;
-        }
-    }
-    );
-
-    this->force_divfree(this->cvorticity);
-    this->symmetrize(this->cvorticity, 3);
-    this->iteration++;
-}
-
-template <class rnumber>
-int fluid_solver<rnumber>::read(char field, char representation)
-{
-    TIMEZONE("fluid_solver::read");
-    char fname[512];
-    int read_result;
-    if (field == 'v')
-    {
-        if (representation == 'c')
-        {
-            this->fill_up_filename("cvorticity", fname);
-            read_result = this->cd->read(fname, (void*)this->cvorticity);
-            if (read_result != EXIT_SUCCESS)
-                return read_result;
-        }
-        if (representation == 'r')
-        {
-            read_result = this->read_base("rvorticity", this->rvorticity);
-            if (read_result != EXIT_SUCCESS)
-                return read_result;
-            else
-                fftw_interface<rnumber>::execute(*(this->r2c_vorticity ));
-        }
-        this->low_pass_Fourier(this->cvorticity, 3, this->kM);
-        this->force_divfree(this->cvorticity);
-        this->symmetrize(this->cvorticity, 3);
-        return EXIT_SUCCESS;
-    }
-    if ((field == 'u') && (representation == 'c'))
-    {
-        read_result = this->read_base("cvelocity", this->cvelocity);
-        this->low_pass_Fourier(this->cvelocity, 3, this->kM);
-        this->force_divfree(this->cvorticity);
-        this->symmetrize(this->cvorticity, 3);
-        return read_result;
-    }
-    if ((field == 'u') && (representation == 'r'))
-        return this->read_base("rvelocity", this->rvelocity);
-    return EXIT_FAILURE;
-}
-
-template <class rnumber>
-int fluid_solver<rnumber>::write(char field, char representation)
-{
-    TIMEZONE("fluid_solver::write");
-    char fname[512];
-    if ((field == 'v') && (representation == 'c'))
-    {
-        this->fill_up_filename("cvorticity", fname);
-        return this->cd->write(fname, (void*)this->cvorticity);
-    }
-    if ((field == 'v') && (representation == 'r'))
-    {
-        fftw_interface<rnumber>::execute(*(this->c2r_vorticity ));
-        clip_zero_padding<rnumber>(this->rd, this->rvorticity, 3);
-        this->fill_up_filename("rvorticity", fname);
-        return this->rd->write(fname, this->rvorticity);
-    }
-    this->compute_velocity(this->cvorticity);
-    if ((field == 'u') && (representation == 'c'))
-    {
-        this->fill_up_filename("cvelocity", fname);
-        return this->cd->write(fname, this->cvelocity);
-    }
-    if ((field == 'u') && (representation == 'r'))
-    {
-        this->ift_velocity();
-        clip_zero_padding<rnumber>(this->rd, this->rvelocity, 3);
-        this->fill_up_filename("rvelocity", fname);
-        return this->rd->write(fname, this->rvelocity);
-    }
-    return EXIT_FAILURE;
-}
-
-template <class rnumber>
-int fluid_solver<rnumber>::write_rTrS2()
-{
-    TIMEZONE("fluid_solver::write_rTrS2");
-    char fname[512];
-    this->fill_up_filename("rTrS2", fname);
-    typename fftw_interface<rnumber>::complex *ca;
-    rnumber *ra;
-    ca = fftw_interface<rnumber>::alloc_complex(this->cd->local_size*3);
-    ra = (rnumber*)(ca);
-    this->compute_velocity(this->cvorticity);
-    this->compute_vector_gradient(ca, this->cvelocity);
-    for (int cc=0; cc<3; cc++)
-    {
-        std::copy(
-                    (rnumber*)(ca + cc*this->cd->local_size),
-                    (rnumber*)(ca + (cc+1)*this->cd->local_size),
-                    (rnumber*)this->cv[1]);
-        fftw_interface<rnumber>::execute(*(this->vc2r[1]));
-        std::copy(
-                    this->rv[1],
-                this->rv[1] + this->cd->local_size*2,
-                ra + cc*this->cd->local_size*2);
-    }
-    /* velocity gradient is now stored, in real space, in ra */
-    rnumber *dx_u, *dy_u, *dz_u;
-    dx_u = ra;
-    dy_u = ra + 2*this->cd->local_size;
-    dz_u = ra + 4*this->cd->local_size;
-    rnumber *trS2 = fftw_interface<rnumber>::alloc_real((this->cd->local_size/3)*2);
-    shared_array<double> average_local(1, [&](double* data){
-        data[0] = 0;
-    });
-
-    RLOOP(
-                this,
-                [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-        rnumber AxxAxx;
-        rnumber AyyAyy;
-        rnumber AzzAzz;
-        rnumber Sxy;
-        rnumber Syz;
-        rnumber Szx;
-        ptrdiff_t tindex = 3*rindex;
-        AxxAxx = dx_u[tindex+0]*dx_u[tindex+0];
-        AyyAyy = dy_u[tindex+1]*dy_u[tindex+1];
-        AzzAzz = dz_u[tindex+2]*dz_u[tindex+2];
-        Sxy = dx_u[tindex+1]+dy_u[tindex+0];
-        Syz = dy_u[tindex+2]+dz_u[tindex+1];
-        Szx = dz_u[tindex+0]+dx_u[tindex+2];
-        // rindex is thread safe + No overlap between thread it is a write
-        trS2[rindex] = (AxxAxx + AyyAyy + AzzAzz +
-                        (Sxy*Sxy + Syz*Syz + Szx*Szx)/2);
-        average_local.getMine()[0] += trS2[rindex];
-    }
-    );
-    average_local.mergeParallel();
-    double average;
-    MPI_Allreduce(
-                average_local.getMasterData(),
-                &average,
-                1,
-                MPI_DOUBLE, MPI_SUM, this->cd->comm);
-    DEBUG_MSG("average TrS2 is %g\n", average);
-    fftw_interface<rnumber>::free(ca);
-    /* output goes here */
-    int ntmp[3];
-    ntmp[0] = this->rd->sizes[0];
-    ntmp[1] = this->rd->sizes[1];
-    ntmp[2] = this->rd->sizes[2];
-    field_descriptor<rnumber> *scalar_descriptor = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), this->cd->comm);
-    clip_zero_padding<rnumber>(scalar_descriptor, trS2, 1);
-    int return_value = scalar_descriptor->write(fname, trS2);
-    delete scalar_descriptor;
-    fftw_interface<rnumber>::free(trS2);
-    return return_value;
-}
-
-template <class rnumber>
-int fluid_solver<rnumber>::write_renstrophy()
-{
-    TIMEZONE("fluid_solver::write_renstrophy");
-    char fname[512];
-    this->fill_up_filename("renstrophy", fname);
-    rnumber *enstrophy = fftw_interface<rnumber>::alloc_real((this->cd->local_size/3)*2);
-    this->ift_vorticity();
-    shared_array<double> average_local(1, [&](double* data){
-        data[0] = 0;
-    });
-
-    RLOOP(
-                this,
-                [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-        ptrdiff_t tindex = 3*rindex;
-        // rindex indexing is thread safe so there is no overlap between threads
-        enstrophy[rindex] = (
-                    this->rvorticity[tindex+0]*this->rvorticity[tindex+0] +
-                this->rvorticity[tindex+1]*this->rvorticity[tindex+1] +
-                this->rvorticity[tindex+2]*this->rvorticity[tindex+2]
-                )/2;
-        average_local.getMine()[0] += enstrophy[rindex];
-    }
-    );
-    average_local.mergeParallel();
-    double average;
-    MPI_Allreduce(
-                average_local.getMasterData(),
-                &average,
-                1,
-                MPI_DOUBLE, MPI_SUM, this->cd->comm);
-    DEBUG_MSG("average enstrophy is %g\n", average);
-    /* output goes here */
-    int ntmp[3];
-    ntmp[0] = this->rd->sizes[0];
-    ntmp[1] = this->rd->sizes[1];
-    ntmp[2] = this->rd->sizes[2];
-    field_descriptor<rnumber> *scalar_descriptor = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), this->cd->comm);
-    clip_zero_padding<rnumber>(scalar_descriptor, enstrophy, 1);
-    int return_value = scalar_descriptor->write(fname, enstrophy);
-    delete scalar_descriptor;
-    fftw_interface<rnumber>::free(enstrophy);
-    return return_value;
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::compute_pressure(rnumber (*__restrict__ pressure)[2])
-{
-    TIMEZONE("fluid_solver::compute_pressure");
-    /* assume velocity is already in real space representation */
-    /* diagonal terms 11 22 33 */
-    RLOOP (
-                this,
-                [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-        // rindex indexing is thread safe so there is no overlap between threads
-        ptrdiff_t tindex = 3*rindex;
-        for (int cc=0; cc<3; cc++)
-            this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+cc];
-    }
-    );
-    this->clean_up_real_space(this->rv[1], 3);
-    {
-        TIMEZONE("fftw_interface<rnumber>::execute");
-        fftw_interface<rnumber>::execute(*(this->vr2c[1]));
-    }
-    this->dealias(this->cv[1], 3);
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){
-        if (k2 <= this->kM2 && k2 > 0)
-        {
-            // cindex indexing is thread safe so there is no overlap between threads
-            ptrdiff_t tindex = 3*cindex;
-            for (int i=0; i<2; i++)
-            {
-                pressure[cindex][i] = -(this->kx[xindex]*this->kx[xindex]*this->cv[1][tindex+0][i] +
-                        this->ky[yindex]*this->ky[yindex]*this->cv[1][tindex+1][i] +
-                        this->kz[zindex]*this->kz[zindex]*this->cv[1][tindex+2][i]);
-            }
-        }
-        else
-            std::fill_n((rnumber*)(pressure+cindex), 2, 0.0);
-    }
-    );
-    /* off-diagonal terms 12 23 31 */
-    RLOOP (
-                this,
-                [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-        // rindex indexing is thread safe so there is no overlap between threads
-        ptrdiff_t tindex = 3*rindex;
-        for (int cc=0; cc<3; cc++)
-            this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+(cc+1)%3];
-    }
-    );
-    this->clean_up_real_space(this->rv[1], 3);
-    {
-        TIMEZONE("fftw_interface<rnumber>::execute");
-        fftw_interface<rnumber>::execute(*(this->vr2c[1]));
-    }
-    this->dealias(this->cv[1], 3);
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){
-        if (k2 <= this->kM2 && k2 > 0)
-        {
-            // cindex indexing is thread safe so there is no overlap between threads
-            ptrdiff_t tindex = 3*cindex;
-            for (int i=0; i<2; i++)
-            {
-                pressure[cindex][i] -= 2*(this->kx[xindex]*this->ky[yindex]*this->cv[1][tindex+0][i] +
-                        this->ky[yindex]*this->kz[zindex]*this->cv[1][tindex+1][i] +
-                        this->kz[zindex]*this->kx[xindex]*this->cv[1][tindex+2][i]);
-                pressure[cindex][i] /= this->normalization_factor*k2;
-            }
-        }
-    }
-    );
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::compute_gradient_statistics(
-        rnumber (*__restrict__ vec)[2],
-double *gradu_moments,
-double *trS2QR_moments,
-ptrdiff_t *gradu_hist,
-ptrdiff_t *trS2QR_hist,
-ptrdiff_t *QR2D_hist,
-double trS2QR_max_estimates[],
-double gradu_max_estimates[],
-int nbins,
-int QR2D_nbins)
-{
-    TIMEZONE("fluid_solver::compute_gradient_statistics");
-    typename fftw_interface<rnumber>::complex *ca;
-    rnumber *ra;
-    ca = fftw_interface<rnumber>::alloc_complex(this->cd->local_size*3);
-    ra = (rnumber*)(ca);
-    this->compute_vector_gradient(ca, vec);
-    for (int cc=0; cc<3; cc++)
-    {
-        std::copy(
-                    (rnumber*)(ca + cc*this->cd->local_size),
-                    (rnumber*)(ca + (cc+1)*this->cd->local_size),
-                    (rnumber*)this->cv[1]);
-        fftw_interface<rnumber>::execute(*(this->vc2r[1]));
-        std::copy(
-                    this->rv[1],
-                this->rv[1] + this->cd->local_size*2,
-                ra + cc*this->cd->local_size*2);
-    }
-    /* velocity gradient is now stored, in real space, in ra */
-    std::fill_n(this->rv[1], 2*this->cd->local_size, 0.0);
-    rnumber *dx_u, *dy_u, *dz_u;
-    dx_u = ra;
-    dy_u = ra + 2*this->cd->local_size;
-    dz_u = ra + 4*this->cd->local_size;
-    double binsize[2];
-    double tmp_max_estimate[3];
-    tmp_max_estimate[0] = trS2QR_max_estimates[0];
-    tmp_max_estimate[1] = trS2QR_max_estimates[1];
-    tmp_max_estimate[2] = trS2QR_max_estimates[2];
-    binsize[0] = 2*tmp_max_estimate[2] / QR2D_nbins;
-    binsize[1] = 2*tmp_max_estimate[1] / QR2D_nbins;
-    ptrdiff_t *local_hist = new ptrdiff_t[QR2D_nbins*QR2D_nbins];
-    std::fill_n(local_hist, QR2D_nbins*QR2D_nbins, 0);
-    RLOOP(
-                this,
-                [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-        rnumber AxxAxx;
-        rnumber AyyAyy;
-        rnumber AzzAzz;
-        rnumber AxyAyx;
-        rnumber AyzAzy;
-        rnumber AzxAxz;
-        rnumber Sxy;
-        rnumber Syz;
-        rnumber Szx;
-        // rindex indexing is thread safe so there is no overlap between threads
-        // tindex[0:2] is thread safe too
-        ptrdiff_t tindex = 3*rindex;
-        AxxAxx = dx_u[tindex+0]*dx_u[tindex+0];
-        AyyAyy = dy_u[tindex+1]*dy_u[tindex+1];
-        AzzAzz = dz_u[tindex+2]*dz_u[tindex+2];
-        AxyAyx = dx_u[tindex+1]*dy_u[tindex+0];
-        AyzAzy = dy_u[tindex+2]*dz_u[tindex+1];
-        AzxAxz = dz_u[tindex+0]*dx_u[tindex+2];
-        this->rv[1][tindex+1] = - (AxxAxx + AyyAyy + AzzAzz)/2 - AxyAyx - AyzAzy - AzxAxz;
-        this->rv[1][tindex+2] = - (dx_u[tindex+0]*(AxxAxx/3 + AxyAyx + AzxAxz) +
-                dy_u[tindex+1]*(AyyAyy/3 + AxyAyx + AyzAzy) +
-                dz_u[tindex+2]*(AzzAzz/3 + AzxAxz + AyzAzy) +
-                dx_u[tindex+1]*dy_u[tindex+2]*dz_u[tindex+0] +
-                dx_u[tindex+2]*dy_u[tindex+0]*dz_u[tindex+1]);
-        int bin0 = int(floor((this->rv[1][tindex+2] + tmp_max_estimate[2]) / binsize[0]));
-        int bin1 = int(floor((this->rv[1][tindex+1] + tmp_max_estimate[1]) / binsize[1]));
-        if ((bin0 >= 0 && bin0 < QR2D_nbins) &&
-                (bin1 >= 0 && bin1 < QR2D_nbins))
-            local_hist[bin1*QR2D_nbins + bin0]++;
-        Sxy = dx_u[tindex+1]+dy_u[tindex+0];
-        Syz = dy_u[tindex+2]+dz_u[tindex+1];
-        Szx = dz_u[tindex+0]+dx_u[tindex+2];
-        this->rv[1][tindex] = (AxxAxx + AyyAyy + AzzAzz +
-                               (Sxy*Sxy + Syz*Syz + Szx*Szx)/2);
-    }
-    );
-    MPI_Allreduce(
-                local_hist,
-                QR2D_hist,
-                QR2D_nbins * QR2D_nbins,
-                MPI_INT64_T, MPI_SUM, this->cd->comm);
-    delete[] local_hist;
-    this->compute_rspace_stats3(
-                this->rv[1],
-            trS2QR_moments,
-            trS2QR_hist,
-            tmp_max_estimate,
-            nbins);
-    double *tmp_moments = new double[10*3];
-    ptrdiff_t *tmp_hist = new ptrdiff_t[nbins*3];
-    for (int cc=0; cc<3; cc++)
-    {
-        tmp_max_estimate[0] = gradu_max_estimates[cc*3 + 0];
-        tmp_max_estimate[1] = gradu_max_estimates[cc*3 + 1];
-        tmp_max_estimate[2] = gradu_max_estimates[cc*3 + 2];
-        this->compute_rspace_stats3(
-                    dx_u + cc*2*this->cd->local_size,
-                    tmp_moments,
-                    tmp_hist,
-                    tmp_max_estimate,
-                    nbins);
-        for (int n = 0; n < 10; n++)
-            for (int i = 0; i < 3 ; i++)
-            {
-                gradu_moments[(n*3 + cc)*3 + i] = tmp_moments[n*3 + i];
-            }
-        for (int n = 0; n < nbins; n++)
-            for (int i = 0; i < 3; i++)
-            {
-                gradu_hist[(n*3 + cc)*3 + i] = tmp_hist[n*3 + i];
-            }
-    }
-    delete[] tmp_moments;
-    delete[] tmp_hist;
-    fftw_interface<rnumber>::free(ca);
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::compute_Lagrangian_acceleration(rnumber (*acceleration)[2])
-{
-    TIMEZONE("fluid_solver::compute_Lagrangian_acceleration");
-    typename fftw_interface<rnumber>::complex *pressure;
-    pressure = fftw_interface<rnumber>::alloc_complex(this->cd->local_size/3);
-    this->compute_velocity(this->cvorticity);
-    this->ift_velocity();
-    this->compute_pressure(pressure);
-    this->compute_velocity(this->cvorticity);
-    std::fill_n((rnumber*)this->cv[1], 2*this->cd->local_size, 0.0);
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){
-        if (k2 <= this->kM2)
-        {
-            // cindex indexing is thread safe so there is no overlap between threads
-            ptrdiff_t tindex = 3*cindex;
-            for (int cc=0; cc<3; cc++)
-                for (int i=0; i<2; i++)
-                    this->cv[1][tindex+cc][i] = - this->nu*k2*this->cu[tindex+cc][i];
-            if (strcmp(this->forcing_type, "linear") == 0)
-            {
-                double knorm = sqrt(k2);
-                if ((this->fk0 <= knorm) &&
-                        (this->fk1 >= knorm))
-                    for (int c=0; c<3; c++)
-                        for (int i=0; i<2; i++)
-                            this->cv[1][tindex+c][i] += this->famplitude*this->cu[tindex+c][i];
-            }
-            this->cv[1][tindex+0][0] += this->kx[xindex]*pressure[cindex][1];
-            this->cv[1][tindex+1][0] += this->ky[yindex]*pressure[cindex][1];
-            this->cv[1][tindex+2][0] += this->kz[zindex]*pressure[cindex][1];
-            this->cv[1][tindex+0][1] -= this->kx[xindex]*pressure[cindex][0];
-            this->cv[1][tindex+1][1] -= this->ky[yindex]*pressure[cindex][0];
-            this->cv[1][tindex+2][1] -= this->kz[zindex]*pressure[cindex][0];
-        }
-    }
-    );
-    std::copy(
-                (rnumber*)this->cv[1],
-            (rnumber*)(this->cv[1] + this->cd->local_size),
-            (rnumber*)acceleration);
-    fftw_interface<rnumber>::free(pressure);
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::compute_Eulerian_acceleration(rnumber (*__restrict__ acceleration)[2])
-{
-    TIMEZONE("fluid_solver::compute_Eulerian_acceleration");
-    std::fill_n((rnumber*)(acceleration), 2*this->cd->local_size, 0.0);
-    this->compute_velocity(this->cvorticity);
-    /* put in linear terms */
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/, double k2){
-        if (k2 <= this->kM2)
-        {
-            // cindex indexing is thread safe so there is no overlap between threads
-            ptrdiff_t tindex = 3*cindex;
-            for (int cc=0; cc<3; cc++)
-                for (int i=0; i<2; i++)
-                    acceleration[tindex+cc][i] = - this->nu*k2*this->cu[tindex+cc][i];
-            if (strcmp(this->forcing_type, "linear") == 0)
-            {
-                double knorm = sqrt(k2);
-                if ((this->fk0 <= knorm) &&
-                        (this->fk1 >= knorm))
-                {
-                    for (int c=0; c<3; c++)
-                        for (int i=0; i<2; i++)
-                            acceleration[tindex+c][i] += this->famplitude*this->cu[tindex+c][i];
-                }
-            }
-        }
-    }
-    );
-    this->ift_velocity();
-    /* compute uu */
-    /* 11 22 33 */
-    RLOOP (
-                this,
-                [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-        // cindex indexing is thread safe so there is no overlap between threads
-        ptrdiff_t tindex = 3*rindex;
-        for (int cc=0; cc<3; cc++)
-            this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+cc] / this->normalization_factor;
-    }
-    );
-    this->clean_up_real_space(this->rv[1], 3);
-    fftw_interface<rnumber>::execute(*(this->vr2c[1]));
-    this->dealias(this->cv[1], 3);
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){
-        if (k2 <= this->kM2)
-        {
-            // cindex indexing is thread safe so there is no overlap between threads
-            ptrdiff_t tindex = 3*cindex;
-            acceleration[tindex+0][0] +=
-                    this->kx[xindex]*this->cv[1][tindex+0][1];
-            acceleration[tindex+0][1] +=
-                    -this->kx[xindex]*this->cv[1][tindex+0][0];
-            acceleration[tindex+1][0] +=
-                    this->ky[yindex]*this->cv[1][tindex+1][1];
-            acceleration[tindex+1][1] +=
-                    -this->ky[yindex]*this->cv[1][tindex+1][0];
-            acceleration[tindex+2][0] +=
-                    this->kz[zindex]*this->cv[1][tindex+2][1];
-            acceleration[tindex+2][1] +=
-                    -this->kz[zindex]*this->cv[1][tindex+2][0];
-        }
-    }
-    );
-    /* 12 23 31 */
-    RLOOP (
-                this,
-                [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-        // cindex indexing is thread safe so there is no overlap between threads
-        ptrdiff_t tindex = 3*rindex;
-        for (int cc=0; cc<3; cc++)
-            this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+(cc+1)%3] / this->normalization_factor;
-    }
-    );
-    this->clean_up_real_space(this->rv[1], 3);
-    fftw_interface<rnumber>::execute(*(this->vr2c[1]));
-    this->dealias(this->cv[1], 3);
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){
-        if (k2 <= this->kM2)
-        {
-            // cindex indexing is thread safe so there is no overlap between threads
-            ptrdiff_t tindex = 3*cindex;
-            acceleration[tindex+0][0] +=
-                    (this->ky[yindex]*this->cv[1][tindex+0][1] +
-                    this->kz[zindex]*this->cv[1][tindex+2][1]);
-            acceleration[tindex+0][1] +=
-                    - (this->ky[yindex]*this->cv[1][tindex+0][0] +
-                    this->kz[zindex]*this->cv[1][tindex+2][0]);
-            acceleration[tindex+1][0] +=
-                    (this->kz[zindex]*this->cv[1][tindex+1][1] +
-                    this->kx[xindex]*this->cv[1][tindex+0][1]);
-            acceleration[tindex+1][1] +=
-                    - (this->kz[zindex]*this->cv[1][tindex+1][0] +
-                    this->kx[xindex]*this->cv[1][tindex+0][0]);
-            acceleration[tindex+2][0] +=
-                    (this->kx[xindex]*this->cv[1][tindex+2][1] +
-                    this->ky[yindex]*this->cv[1][tindex+1][1]);
-            acceleration[tindex+2][1] +=
-                    - (this->kx[xindex]*this->cv[1][tindex+2][0] +
-                    this->ky[yindex]*this->cv[1][tindex+1][0]);
-        }
-    }
-    );
-    if (this->cd->myrank == this->cd->rank[0])
-        std::fill_n((rnumber*)(acceleration), 6, 0.0);
-    this->force_divfree(acceleration);
-}
-
-template <class rnumber>
-void fluid_solver<rnumber>::compute_Lagrangian_acceleration(rnumber *__restrict__ acceleration)
-{
-    TIMEZONE("fluid_solver::compute_Lagrangian_acceleration");
-    this->compute_Lagrangian_acceleration((typename fftw_interface<rnumber>::complex*)acceleration);
-    fftw_interface<rnumber>::execute(*(this->vc2r[1]));
-    std::copy(
-                this->rv[1],
-            this->rv[1] + 2*this->cd->local_size,
-            acceleration);
-}
-
-template <class rnumber>
-int fluid_solver<rnumber>::write_rpressure()
-{
-    TIMEZONE("fluid_solver::write_rpressure");
-    char fname[512];
-    typename fftw_interface<rnumber>::complex *pressure;
-    pressure = fftw_interface<rnumber>::alloc_complex(this->cd->local_size/3);
-    this->compute_velocity(this->cvorticity);
-    this->ift_velocity();
-    this->compute_pressure(pressure);
-    this->fill_up_filename("rpressure", fname);
-    rnumber *rpressure = fftw_interface<rnumber>::alloc_real((this->cd->local_size/3)*2);
-    typename fftw_interface<rnumber>::plan c2r;
-    c2r = fftw_interface<rnumber>::mpi_plan_dft_c2r_3d(
-                this->rd->sizes[0], this->rd->sizes[1], this->rd->sizes[2],
-            pressure, rpressure, this->cd->comm,
-            this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN);
-    fftw_interface<rnumber>::execute(c2r);
-    /* output goes here */
-    int ntmp[3];
-    ntmp[0] = this->rd->sizes[0];
-    ntmp[1] = this->rd->sizes[1];
-    ntmp[2] = this->rd->sizes[2];
-    field_descriptor<rnumber> *scalar_descriptor = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), this->cd->comm);
-    clip_zero_padding<rnumber>(scalar_descriptor, rpressure, 1);
-    int return_value = scalar_descriptor->write(fname, rpressure);
-    delete scalar_descriptor;
-    fftw_interface<rnumber>::destroy_plan(c2r);
-    fftw_interface<rnumber>::free(pressure);
-    fftw_interface<rnumber>::free(rpressure);
-    return return_value;
-}
-
-/*****************************************************************************/
-
-
-
-
-/*****************************************************************************/
-/* finally, force generation of code for single precision                    */
-template class fluid_solver<float>;
-template class fluid_solver<double>;
-/*****************************************************************************/
-
diff --git a/bfps/cpp/fluid_solver.hpp b/bfps/cpp/fluid_solver.hpp
deleted file mode 100644
index 4cc75cee4385353f64dc9bc9e7d34c6efba9ad48..0000000000000000000000000000000000000000
--- a/bfps/cpp/fluid_solver.hpp
+++ /dev/null
@@ -1,120 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <iostream>
-#include "field_descriptor.hpp"
-#include "fluid_solver_base.hpp"
-
-#ifndef FLUID_SOLVER
-
-#define FLUID_SOLVER
-
-extern int myrank, nprocs;
-
-
-/* container for field descriptor, fields themselves, parameters, etc
- * using the same big macro idea that they're using in fftw3.h
- * I feel like I should quote:  Ugh.
- * */
-
-template <class rnumber>
-class fluid_solver:public fluid_solver_base<rnumber>
-{
-    public:
-        /* fields */
-        rnumber *rvorticity;
-        rnumber *rvelocity ;
-        typename fluid_solver_base<rnumber>::cnumber *cvorticity;
-        typename fluid_solver_base<rnumber>::cnumber *cvelocity ;
-
-        /* short names for velocity, and 4 vorticity fields */
-        rnumber *ru, *rv[4];
-        typename fluid_solver_base<rnumber>::cnumber *cu, *cv[4];
-
-        /* plans */
-        typename fftw_interface<rnumber>::plan *c2r_vorticity;
-        typename fftw_interface<rnumber>::plan *r2c_vorticity;
-        typename fftw_interface<rnumber>::plan *c2r_velocity;
-        typename fftw_interface<rnumber>::plan *r2c_velocity;
-        typename fftw_interface<rnumber>::plan *uc2r, *ur2c;
-        typename fftw_interface<rnumber>::plan *vr2c[3], *vc2r[3];
-
-        /* physical parameters */
-        double nu;
-        int fmode;         // for Kolmogorov flow
-        double famplitude; // both for Kflow and band forcing
-        double fk0, fk1;   // for band forcing
-        char forcing_type[128];
-
-        /* methods */
-        fluid_solver(
-                const char *NAME,
-                int nx,
-                int ny,
-                int nz,
-                double DKX = 1.0,
-                double DKY = 1.0,
-                double DKZ = 1.0,
-                int DEALIAS_TYPE = 1,
-                unsigned FFTW_PLAN_RIGOR = FFTW_MEASURE);
-        ~fluid_solver(void);
-
-        void compute_gradient_statistics(
-                rnumber (*__restrict__ vec)[2],
-                double *__restrict__ gradu_moments,
-                double *__restrict__ trS2_Q_R_moments,
-                ptrdiff_t *__restrict__ gradu_histograms,
-                ptrdiff_t *__restrict__ trS2_Q_R_histograms,
-                ptrdiff_t *__restrict__ QR2D_histogram,
-                double trS2_Q_R_max_estimates[3],
-                double gradu_max_estimates[9],
-                const int nbins_1D = 256,
-                const int nbins_2D = 64);
-
-        void compute_vorticity(void);
-        void compute_velocity(rnumber (*__restrict__ vorticity)[2]);
-        void compute_pressure(rnumber (*__restrict__ pressure)[2]);
-        void compute_Eulerian_acceleration(rnumber (*__restrict__ dst)[2]);
-        void compute_Lagrangian_acceleration(rnumber (*__restrict__ dst)[2]);
-        void compute_Lagrangian_acceleration(rnumber *__restrict__ dst);
-        void ift_velocity();
-        void dft_velocity();
-        void ift_vorticity();
-        void dft_vorticity();
-        void omega_nonlin(int src);
-        void step(double dt);
-        void impose_zero_modes(void);
-        void add_forcing(rnumber (*__restrict__ acc_field)[2], rnumber (*__restrict__ vort_field)[2], rnumber factor);
-
-        int read(char field, char representation);
-        int write(char field, char representation);
-        int write_rTrS2();
-        int write_renstrophy();
-        int write_rpressure();
-};
-
-#endif//FLUID_SOLVER
-
diff --git a/bfps/cpp/fluid_solver_base.cpp b/bfps/cpp/fluid_solver_base.cpp
deleted file mode 100644
index 6e4fd3335238218bad0b78462d3506ca9b48c721..0000000000000000000000000000000000000000
--- a/bfps/cpp/fluid_solver_base.cpp
+++ /dev/null
@@ -1,834 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#define NDEBUG
-
-#include <cassert>
-#include <cmath>
-#include <cstring>
-#include "base.hpp"
-#include "fluid_solver_base.hpp"
-#include "fftw_tools.hpp"
-#include "scope_timer.hpp"
-#include "shared_array.hpp"
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::fill_up_filename(const char *base_name, char *destination)
-{
-    sprintf(destination, "%s_%s_i%.5x", this->name, base_name, this->iteration);
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::clean_up_real_space(rnumber *a, int howmany)
-{
-    TIMEZONE("fluid_solver_base::clean_up_real_space");
-    for (ptrdiff_t rindex = 0; rindex < this->cd->local_size*2; rindex += howmany*(this->rd->subsizes[2]+2))
-        std::fill_n(a+rindex+this->rd->subsizes[2]*howmany, 2*howmany, 0.0);
-}
-
-template <class rnumber>
-double fluid_solver_base<rnumber>::autocorrel(cnumber *a)
-{
-    double *spec = fftw_alloc_real(this->nshells*9);
-    double sum_local;
-    this->cospectrum(a, a, spec);
-    sum_local = 0.0;
-    for (unsigned int n = 0; n < this->nshells; n++)
-    {
-        sum_local += spec[n*9] + spec[n*9 + 4] + spec[n*9 + 8];
-    }
-    fftw_free(spec);
-    return sum_local;
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::cospectrum(cnumber *a, cnumber *b, double *spec)
-{
-    TIMEZONE("fluid_solver_base::cospectrum");
-    shared_array<double> cospec_local_thread(this->nshells*9,[&](double* cospec_local){
-        std::fill_n(cospec_local, this->nshells*9, 0);
-    });
-
-    CLOOP_K2_NXMODES(
-                this,
-
-                [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/,
-                ptrdiff_t /*zindex*/, double k2, int nxmodes){
-        if (k2 <= this->kMspec2)
-        {
-            int tmp_int = int(sqrt(k2)/this->dk)*9;
-            double* cospec_local = cospec_local_thread.getMine();
-            for (int i=0; i<3; i++)
-                for (int j=0; j<3; j++)
-                {
-                    cospec_local[tmp_int+i*3+j] += nxmodes * (
-                                (*(a + 3*cindex+i))[0] * (*(b + 3*cindex+j))[0] +
-                            (*(a + 3*cindex+i))[1] * (*(b + 3*cindex+j))[1]);
-                }
-        }}
-    );
-    cospec_local_thread.mergeParallel();
-    MPI_Allreduce(
-                cospec_local_thread.getMasterData(),
-                (void*)spec,
-                this->nshells*9,
-                MPI_DOUBLE, MPI_SUM, this->cd->comm);
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::cospectrum(cnumber *a, cnumber *b, double *spec, const double k2exponent)
-{
-    TIMEZONE("fluid_solver_base::cospectrum2");
-    shared_array<double> cospec_local_thread(this->nshells*9,[&](double* cospec_local){
-        std::fill_n(cospec_local, this->nshells*9, 0);
-    });
-
-    CLOOP_K2_NXMODES(
-                this,
-
-                [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/,
-                ptrdiff_t /*zindex*/, double k2, int nxmodes){
-        if (k2 <= this->kMspec2)
-        {
-            double factor = nxmodes*pow(k2, k2exponent);
-            int tmp_int = int(sqrt(k2)/this->dk)*9;
-            double* cospec_local = cospec_local_thread.getMine();
-            for (int i=0; i<3; i++)
-                for (int j=0; j<3; j++)
-                {
-                    cospec_local[tmp_int+i*3+j] += factor * (
-                                (*(a + 3*cindex+i))[0] * (*(b + 3*cindex+j))[0] +
-                            (*(a + 3*cindex+i))[1] * (*(b + 3*cindex+j))[1]);
-                }
-        }}
-    );
-    cospec_local_thread.mergeParallel();
-    MPI_Allreduce(
-                cospec_local_thread.getMasterData(),
-                (void*)spec,
-                this->nshells*9,
-                MPI_DOUBLE, MPI_SUM, this->cd->comm);
-    //for (int n=0; n<this->nshells; n++)
-    //{
-    //    spec[n] *= 12.5663706144*pow(this->kshell[n], 2) / this->nshell[n];
-    //    /*is normalization needed?
-    //     * spec[n] /= this->normalization_factor*/
-    //}
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::compute_rspace_stats(
-        const rnumber *a,
-        const hid_t group,
-        const std::string dset_name,
-        const hsize_t toffset,
-        const std::vector<double> max_estimate)
-{
-    TIMEZONE("fluid_solver_base::compute_rspace_stats");
-    const int nmoments = 10;
-    int nvals, nbins;
-    if (this->rd->myrank == 0)
-    {
-        hid_t dset, wspace;
-        hsize_t dims[3];
-        int ndims;
-        dset = H5Dopen(group, ("moments/" + dset_name).c_str(), H5P_DEFAULT);
-        wspace = H5Dget_space(dset);
-        ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);
-        assert(ndims == 3);
-        variable_used_only_in_assert(ndims);
-        assert(dims[1] == nmoments);
-        nvals = dims[2];
-        H5Sclose(wspace);
-        H5Dclose(dset);
-        dset = H5Dopen(group, ("histograms/" + dset_name).c_str(), H5P_DEFAULT);
-        wspace = H5Dget_space(dset);
-        ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);
-        assert(ndims == 3);
-        nbins = dims[1];
-        assert(nvals == dims[2]);
-        H5Sclose(wspace);
-        H5Dclose(dset);
-    }
-    MPI_Bcast(&nvals, 1, MPI_INT, 0, this->rd->comm);
-    MPI_Bcast(&nbins, 1, MPI_INT, 0, this->rd->comm);
-    assert(nvals == max_estimate.size());
-    shared_array<double> threaded_local_moments(nmoments*nvals, [&](double* local_moments){
-        std::fill_n(local_moments, nmoments*nvals, 0);
-        if (nvals == 4) local_moments[3] = max_estimate[3];
-    });
-
-    shared_array<double> threaded_val_tmp(nvals);
-
-    shared_array<ptrdiff_t> threaded_local_hist(nbins*nvals, [&](ptrdiff_t* local_hist){
-        std::fill_n(local_hist, nbins*nvals, 0);
-    });
-
-    // Not written by threads
-    double *binsize = new double[nvals];
-    for (int i=0; i<nvals; i++)
-        binsize[i] = 2*max_estimate[i] / nbins;
-
-    RLOOP(
-                this,
-                [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-        double *val_tmp = threaded_val_tmp.getMine();
-        ptrdiff_t* local_hist = threaded_local_hist.getMine();
-        double *local_moments = threaded_local_moments.getMine();
-
-        if (nvals == 4) val_tmp[3] = 0.0;
-        for (int i=0; i<3; i++)
-        {
-            val_tmp[i] = a[rindex*3+i];
-            if (nvals == 4) val_tmp[3] += val_tmp[i]*val_tmp[i];
-        }
-        if (nvals == 4)
-        {
-            val_tmp[3] = sqrt(val_tmp[3]);
-            if (val_tmp[3] < local_moments[0*nvals+3])
-                local_moments[0*nvals+3] = val_tmp[3];
-            if (val_tmp[3] > local_moments[9*nvals+3])
-                local_moments[9*nvals+3] = val_tmp[3];
-            int bin = int(floor(val_tmp[3]*2/binsize[3]));
-            if (bin >= 0 && bin < nbins)
-                local_hist[bin*nvals+3]++;
-        }
-        for (int i=0; i<3; i++)
-        {
-            if (val_tmp[i] < local_moments[0*nvals+i])
-                local_moments[0*nvals+i] = val_tmp[i];
-            if (val_tmp[i] > local_moments[(nmoments-1)*nvals+i])
-                local_moments[(nmoments-1)*nvals+i] = val_tmp[i];
-            int bin = int(floor((val_tmp[i] + max_estimate[i]) / binsize[i]));
-            if (bin >= 0 && bin < nbins)
-                local_hist[bin*nvals+i]++;
-        }
-        for (int n=1; n < nmoments-1; n++){
-            double pow_tmp = 1.;
-            for (int i=0; i<nvals; i++){
-                local_moments[n*nvals + i] += (pow_tmp = val_tmp[i]*pow_tmp);
-            }
-        }
-    }
-    );
-
-    threaded_local_hist.mergeParallel();
-    threaded_local_moments.mergeParallel([&](const int idx, const double& v1, const double& v2) -> double {
-          if(nvals == int(4) && idx == 0*nvals+3){
-              return std::min(v1, v2);  
-          }
-          if(nvals == int(4) && idx == 9*nvals+3){
-              return std::max(v1, v2);  
-          }
-          if(idx < 3){
-              return std::min(v1, v2);        
-          }      
-          if((nmoments-1)*nvals <= idx && idx < (nmoments-1)*nvals+3){
-              return std::max(v1, v2);        
-          }
-          return v1 + v2;
-      });
-
-
-    double *moments = new double[nmoments*nvals];
-    MPI_Allreduce(
-                threaded_local_moments.getMasterData(),
-                (void*)moments,
-                nvals,
-                MPI_DOUBLE, MPI_MIN, this->cd->comm);
-    MPI_Allreduce(
-                (threaded_local_moments.getMasterData() + nvals),
-                (void*)(moments+nvals),
-                (nmoments-2)*nvals,
-                MPI_DOUBLE, MPI_SUM, this->cd->comm);
-    MPI_Allreduce(
-                (threaded_local_moments.getMasterData() + (nmoments-1)*nvals),
-                (void*)(moments+(nmoments-1)*nvals),
-                nvals,
-                MPI_DOUBLE, MPI_MAX, this->cd->comm);
-    ptrdiff_t *hist = new ptrdiff_t[nbins*nvals];
-    MPI_Allreduce(
-                threaded_local_hist.getMasterData(),
-                (void*)hist,
-                nbins*nvals,
-                MPI_INT64_T, MPI_SUM, this->cd->comm);
-    for (int n=1; n < nmoments-1; n++)
-        for (int i=0; i<nvals; i++)
-            moments[n*nvals + i] /= this->normalization_factor;
-    delete[] binsize;
-    if (this->rd->myrank == 0)
-    {
-        hid_t dset, wspace, mspace;
-        hsize_t count[3], offset[3], dims[3];
-        dset = H5Dopen(group, ("moments/" + dset_name).c_str(), H5P_DEFAULT);
-        wspace = H5Dget_space(dset);
-        H5Sget_simple_extent_dims(wspace, dims, NULL);
-        offset[0] = toffset;
-        offset[1] = 0;
-        offset[2] = 0;
-        count[0] = 1;
-        count[1] = nmoments;
-        count[2] = nvals;
-        mspace = H5Screate_simple(3, count, NULL);
-        H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
-        H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, moments);
-        H5Sclose(wspace);
-        H5Sclose(mspace);
-        H5Dclose(dset);
-        dset = H5Dopen(group, ("histograms/" + dset_name).c_str(), H5P_DEFAULT);
-        wspace = H5Dget_space(dset);
-        count[1] = nbins;
-        mspace = H5Screate_simple(3, count, NULL);
-        H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
-        H5Dwrite(dset, H5T_NATIVE_INT64, mspace, wspace, H5P_DEFAULT, hist);
-        H5Sclose(wspace);
-        H5Sclose(mspace);
-        H5Dclose(dset);
-    }
-    delete[] moments;
-    delete[] hist;
-}
-
-
-
-template <class rnumber>
-template<int nvals>
-void fluid_solver_base<rnumber>::compute_rspace_stats(
-        rnumber *a,
-        double *moments,
-        ptrdiff_t *hist,
-        double max_estimate[],
-        const int nbins)
-{
-    TIMEZONE("fluid_solver_base::compute_rspace_stats");
-    shared_array<double> threaded_local_moments(10*nvals,[&](double* local_moments){
-        std::fill_n(local_moments, 10*nvals, 0);
-        if (nvals == 4) local_moments[3] = max_estimate[3];
-    });
-
-    shared_array<ptrdiff_t> threaded_local_hist(nbins*nvals, [&](ptrdiff_t* local_hist){
-        std::fill_n(local_hist, nbins*nvals, 0);
-    });
-
-    // Will not be modified by the threads
-    double binsize[nvals];
-    for (int i=0; i<nvals; i++)
-        binsize[i] = 2*max_estimate[i] / nbins;
-
-    RLOOP(
-                this,
-                [&](ptrdiff_t rindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/, ptrdiff_t /*zindex*/){
-        ptrdiff_t *local_hist = threaded_local_hist.getMine();
-        double *local_moments = threaded_local_moments.getMine();
-
-        double val_tmp[nvals];
-        if (nvals == 4) val_tmp[3] = 0.0;
-        for (int i=0; i<3; i++)
-        {
-            val_tmp[i] = a[rindex*3+i];
-            if (nvals == 4) val_tmp[3] += val_tmp[i]*val_tmp[i];
-        }
-        if (nvals == 4)
-        {
-            val_tmp[3] = sqrt(val_tmp[3]);
-            if (val_tmp[3] < local_moments[0*nvals+3])
-                local_moments[0*nvals+3] = val_tmp[3];
-            if (val_tmp[3] > local_moments[9*nvals+3])
-                local_moments[9*nvals+3] = val_tmp[3];
-            int bin = int(floor(val_tmp[3]*2/binsize[3]));
-            if (bin >= 0 && bin < nbins)
-                local_hist[bin*nvals+3]++;
-        }
-        for (int i=0; i<3; i++)
-        {
-            if (val_tmp[i] < local_moments[0*nvals+i])
-                local_moments[0*nvals+i] = val_tmp[i];
-            if (val_tmp[i] > local_moments[9*nvals+i])
-                local_moments[9*nvals+i] = val_tmp[i];
-            int bin = int(floor((val_tmp[i] + max_estimate[i]) / binsize[i]));
-            if (bin >= 0 && bin < nbins)
-                local_hist[bin*nvals+i]++;
-        }
-        for (int n=1; n<9; n++){
-            double pow_tmp = 1;
-            for (int i=0; i<nvals; i++){
-                local_moments[n*nvals + i] += (pow_tmp = val_tmp[i]*pow_tmp);
-            }
-        }
-    }
-    );
-
-    threaded_local_moments.mergeParallel([&](const int idx, const double& v1, const double& v2) -> double {
-          if(nvals == int(4) && idx == 0*nvals+3){
-              return std::min(v1, v2);  
-          }
-          if(nvals == int(4) && idx == 9*nvals+3){
-              return std::max(v1, v2);  
-          }
-          if(idx < 3){
-              return std::min(v1, v2);        
-          }      
-          if(9*nvals <= idx && idx < 9*nvals+3){
-              return std::max(v1, v2);        
-          }
-          return v1 + v2;
-      });
-    threaded_local_hist.mergeParallel();
-
-    MPI_Allreduce(
-                threaded_local_moments.getMasterData(),
-                (void*)moments,
-                nvals,
-                MPI_DOUBLE, MPI_MIN, this->cd->comm);
-    MPI_Allreduce(
-                (threaded_local_moments.getMasterData() + nvals),
-                (void*)(moments+nvals),
-                8*nvals,
-                MPI_DOUBLE, MPI_SUM, this->cd->comm);
-    MPI_Allreduce(
-                (threaded_local_moments.getMasterData() + 9*nvals),
-                (void*)(moments+9*nvals),
-                nvals,
-                MPI_DOUBLE, MPI_MAX, this->cd->comm);
-    MPI_Allreduce(
-                (void*)threaded_local_hist.getMasterData(),
-                (void*)hist,
-                nbins*nvals,
-                MPI_INT64_T, MPI_SUM, this->cd->comm);
-    for (int n=1; n<9; n++)
-        for (int i=0; i<nvals; i++)
-            moments[n*nvals + i] /= this->normalization_factor;
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::write_spectrum(const char *fname, cnumber *a, const double k2exponent)
-{
-    TIMEZONE("fluid_solver_base::write_spectrum");
-    double *spec = fftw_alloc_real(this->nshells);
-    this->cospectrum(a, a, spec, k2exponent);
-    if (this->cd->myrank == 0)
-    {
-        FILE *spec_file;
-        char full_name[512];
-        sprintf(full_name, "%s_%s_spec", this->name, fname);
-        spec_file = fopen(full_name, "ab");
-        fwrite((void*)&this->iteration, sizeof(int), 1, spec_file);
-        fwrite((void*)spec, sizeof(double), this->nshells, spec_file);
-        fclose(spec_file);
-    }
-    fftw_free(spec);
-}
-
-/*****************************************************************************/
-/* macro for specializations to numeric types compatible with FFTW           */
-
-template <class rnumber>
-fluid_solver_base<rnumber>::fluid_solver_base(
-        const char *NAME,
-        int nx,
-        int ny,
-        int nz,
-        double DKX,
-        double DKY,
-        double DKZ,
-        int DEALIAS_TYPE,
-        unsigned FFTW_PLAN_RIGOR)
-{
-    TIMEZONE("fluid_solver_base::fluid_solver_base");
-    strncpy(this->name, NAME, 256);
-    this->name[255] = '\0';
-    this->iteration = 0;
-    this->fftw_plan_rigor = FFTW_PLAN_RIGOR;
-
-    int ntmp[4];
-    ntmp[0] = nz;
-    ntmp[1] = ny;
-    ntmp[2] = nx;
-    ntmp[3] = 3;
-    this->rd = new field_descriptor<rnumber>(
-                4, ntmp, mpi_real_type<rnumber>::real(), MPI_COMM_WORLD);
-    this->normalization_factor = (this->rd->full_size/3);
-    ntmp[0] = ny;
-    ntmp[1] = nz;
-    ntmp[2] = nx/2 + 1;
-    ntmp[3] = 3;
-    this->cd = new field_descriptor<rnumber>(
-                4, ntmp, mpi_real_type<rnumber>::complex(), this->rd->comm);
-
-    this->dkx = DKX;
-    this->dky = DKY;
-    this->dkz = DKZ;
-    this->kx = new double[this->cd->sizes[2]];
-    this->ky = new double[this->cd->subsizes[0]];
-    this->kz = new double[this->cd->sizes[1]];
-    this->dealias_type = DEALIAS_TYPE;
-    switch(this->dealias_type)
-    {
-    /* HL07 smooth filter */
-    case 1:
-        this->kMx = this->dkx*(int(this->rd->sizes[2] / 2)-1);
-        this->kMy = this->dky*(int(this->rd->sizes[1] / 2)-1);
-        this->kMz = this->dkz*(int(this->rd->sizes[0] / 2)-1);
-        break;
-    default:
-        this->kMx = this->dkx*(int(this->rd->sizes[2] / 3)-1);
-        this->kMy = this->dky*(int(this->rd->sizes[1] / 3)-1);
-        this->kMz = this->dkz*(int(this->rd->sizes[0] / 3)-1);
-    }
-    int i, ii;
-    for (i = 0; i<this->cd->sizes[2]; i++)
-        this->kx[i] = i*this->dkx;
-    for (i = 0; i<this->cd->subsizes[0]; i++)
-    {
-        ii = i + this->cd->starts[0];
-        if (ii <= this->rd->sizes[1]/2)
-            this->ky[i] = this->dky*ii;
-        else
-            this->ky[i] = this->dky*(ii - this->rd->sizes[1]);
-    }
-    for (i = 0; i<this->cd->sizes[1]; i++)
-    {
-        if (i <= this->rd->sizes[0]/2)
-            this->kz[i] = this->dkz*i;
-        else
-            this->kz[i] = this->dkz*(i - this->rd->sizes[0]);
-    }
-    this->kM = this->kMx;
-    if (this->kM < this->kMy) this->kM = this->kMy;
-    if (this->kM < this->kMz) this->kM = this->kMz;
-    this->kM2 = this->kM * this->kM;
-    this->kMspec = this->kM;
-    this->kMspec2 = this->kM2;
-    this->dk = this->dkx;
-    if (this->dk > this->dky) this->dk = this->dky;
-    if (this->dk > this->dkz) this->dk = this->dkz;
-    this->dk2 = this->dk*this->dk;
-    DEBUG_MSG(
-                "kM = %g, kM2 = %g, dk = %g, dk2 = %g\n",
-                this->kM, this->kM2, this->dk, this->dk2);
-    /* spectra stuff */
-    this->nshells = int(this->kMspec / this->dk) + 2;
-    DEBUG_MSG(
-                "kMspec = %g, kMspec2 = %g, nshells = %ld\n",
-                this->kMspec, this->kMspec2, this->nshells);
-    this->kshell = new double[this->nshells];
-    std::fill_n(this->kshell, this->nshells, 0.0);
-    this->nshell = new int64_t[this->nshells];
-    std::fill_n(this->nshell, this->nshells, 0);
-    DEBUG_MSG("fluid_solver_base::fluid_solver_base before declaring shared_array\n");
-
-    shared_array<double> kshell_local_threaded(this->nshells,[&](double* kshell_local){
-        std::fill_n(kshell_local, this->nshells, 0.0);
-    });
-    DEBUG_MSG("fluid_solver_base::fluid_solver_base before declaring shared_array\n");
-    shared_array<int64_t> nshell_local_threaded(this->nshells,[&](int64_t* nshell_local){
-        std::fill_n(nshell_local, this->nshells, 0);
-    });
-
-    std::vector<std::unordered_map<int, double>> Fourier_filter_threaded(omp_get_max_threads());
-
-    DEBUG_MSG("fluid_solver_base::fluid_solver_base before cloop_k2_nxmodes\n");
-    CLOOP_K2_NXMODES(
-                this,
-
-                [&](ptrdiff_t /*cindex*/, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/,
-                ptrdiff_t /*zindex*/, double k2, int nxmodes){
-        if (k2 < this->kM2)
-        {
-            double knorm = sqrt(k2);
-            nshell_local_threaded.getMine()[int(knorm/this->dk)] += nxmodes;
-            kshell_local_threaded.getMine()[int(knorm/this->dk)] += nxmodes*knorm;
-        }
-        Fourier_filter_threaded[omp_get_thread_num()][int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.));}
-    );
-
-    // Merge results
-    nshell_local_threaded.mergeParallel();
-    kshell_local_threaded.mergeParallel();
-    for(int idxMerge = 0 ; idxMerge < int(Fourier_filter_threaded.size()) ; ++idxMerge){
-        for(const auto kv : Fourier_filter_threaded[idxMerge]){
-            this->Fourier_filter[kv.first] = kv.second;
-        }
-    }
-
-    MPI_Allreduce(
-                (void*)(nshell_local_threaded.getMasterData()),
-                (void*)(this->nshell),
-                this->nshells,
-                MPI_INT64_T, MPI_SUM, this->cd->comm);
-    MPI_Allreduce(
-                (void*)(kshell_local_threaded.getMasterData()),
-                (void*)(this->kshell),
-                this->nshells,
-                MPI_DOUBLE, MPI_SUM, this->cd->comm);
-    for (unsigned int n=0; n<this->nshells; n++)
-    {
-        if (this->nshell[n] != 0)
-            this->kshell[n] /= this->nshell[n];
-        else
-            this->kshell[n] = -1;
-    }
-    DEBUG_MSG("exiting fluid_solver_base::fluid_solver_base\n");
-}
-
-template <class rnumber>
-fluid_solver_base<rnumber>::~fluid_solver_base()
-{
-    delete[] this->kshell;
-    delete[] this->nshell;
-
-    delete[] this->kx;
-    delete[] this->ky;
-    delete[] this->kz;
-
-    delete this->cd;
-    delete this->rd;
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::low_pass_Fourier(cnumber *a, const int howmany, const double kmax)
-{
-    TIMEZONE("fluid_solver_base::low_pass_Fourier");
-    const double km2 = kmax*kmax;
-    const int howmany2 = 2*howmany;
-    /*DEBUG_MSG("entered low_pass_Fourier, kmax=%lg km2=%lg howmany2=%d\n", kmax, km2, howmany2);*/
-    CLOOP_K2(
-                this,
-                /*DEBUG_MSG("kx=%lg ky=%lg kz=%lg k2=%lg\n",
-                                  this->kx[xindex],
-                                  this->ky[yindex],
-                                  this->kz[zindex],
-                                  k2);*/
-
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex,
-                ptrdiff_t zindex, double k2){
-        if (k2 >= km2)
-            std::fill_n((rnumber*)(a + howmany*cindex), howmany2, 0.0);}
-    );
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::dealias(cnumber *a, const int howmany)
-{
-    TIMEZONE("fluid_solver_base::dealias");
-    if (this->dealias_type == 0)
-    {
-        this->low_pass_Fourier(a, howmany, this->kM);
-        return;
-    }
-
-    CLOOP_K2(
-                this,
-                [&](ptrdiff_t cindex, ptrdiff_t /*xindex*/, ptrdiff_t /*yindex*/,
-                ptrdiff_t /*zindex*/, double k2){
-        double tval = this->Fourier_filter[int(round(k2/this->dk2))];
-        // It is thread safe on the index cindex
-        for (int tcounter = 0; tcounter < howmany; tcounter++)
-            for (int i=0; i<2; i++)
-                a[howmany*cindex+tcounter][i] *= tval;
-    }
-    );
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::force_divfree(cnumber *a)
-{
-    TIMEZONE("fluid_solver_base::force_divfree");
-    CLOOP_K2(
-                this,
-
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex,
-                ptrdiff_t zindex, double k2){
-        if (k2 > 0)
-        {
-            // It is thread safe on index cindex
-            cnumber tval;
-            tval[0] = (this->kx[xindex]*((*(a + cindex*3  ))[0]) +
-                    this->ky[yindex]*((*(a + cindex*3+1))[0]) +
-                    this->kz[zindex]*((*(a + cindex*3+2))[0]) ) / k2;
-            tval[1] = (this->kx[xindex]*((*(a + cindex*3  ))[1]) +
-                    this->ky[yindex]*((*(a + cindex*3+1))[1]) +
-                    this->kz[zindex]*((*(a + cindex*3+2))[1]) ) / k2;
-            for (int imag_part=0; imag_part<2; imag_part++)
-            {
-                a[cindex*3  ][imag_part] -= tval[imag_part]*this->kx[xindex];
-                a[cindex*3+1][imag_part] -= tval[imag_part]*this->ky[yindex];
-                a[cindex*3+2][imag_part] -= tval[imag_part]*this->kz[zindex];
-            }
-        }}
-    );
-    if (this->cd->myrank == this->cd->rank[0])
-        std::fill_n((rnumber*)(a), 6, 0.0);
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::compute_vector_gradient(cnumber *A, cnumber *cvec)
-{
-    TIMEZONE("fluid_solver_base::compute_vector_gradient");
-    std::fill_n((rnumber*)A, 3*2*this->cd->local_size, 0.0);
-    cnumber *dx_u, *dy_u, *dz_u;
-    dx_u = A;
-    dy_u = A + this->cd->local_size;
-    dz_u = A + 2*this->cd->local_size;
-    CLOOP_K2(
-                this,
-
-                [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex,
-                ptrdiff_t zindex, double k2){
-        if (k2 <= this->kM2)
-        {
-            // It is thread safe on cindex
-            ptrdiff_t tindex = 3*cindex;
-            for (int cc=0; cc<3; cc++)
-            {
-                dx_u[tindex + cc][0] = -this->kx[xindex]*cvec[tindex+cc][1];
-                dx_u[tindex + cc][1] =  this->kx[xindex]*cvec[tindex+cc][0];
-                dy_u[tindex + cc][0] = -this->ky[yindex]*cvec[tindex+cc][1];
-                dy_u[tindex + cc][1] =  this->ky[yindex]*cvec[tindex+cc][0];
-                dz_u[tindex + cc][0] = -this->kz[zindex]*cvec[tindex+cc][1];
-                dz_u[tindex + cc][1] =  this->kz[zindex]*cvec[tindex+cc][0];
-            }
-        }}
-    );
-}
-
-template <class rnumber>
-void fluid_solver_base<rnumber>::symmetrize(cnumber *data, const int howmany)
-{
-    TIMEZONE("fluid_solver_base::symmetrize");
-    ptrdiff_t ii, cc;
-    MPI_Status *mpistatus = new MPI_Status;
-    if (this->cd->myrank == this->cd->rank[0])
-    {
-        for (cc = 0; cc < howmany; cc++)
-            data[cc][1] = 0.0;
-        for (ii = 1; ii < this->cd->sizes[1]/2; ii++)
-            for (cc = 0; cc < howmany; cc++) {
-                ( *(data + cc + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]))[0] =
-                        (*(data + cc + howmany*(                     ii)*this->cd->sizes[2]))[0];
-                ( *(data + cc + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]))[1] =
-                        -(*(data + cc + howmany*(                     ii)*this->cd->sizes[2]))[1];
-            }
-    }
-    cnumber *buffer;
-    buffer = fftw_interface<rnumber>::alloc_complex(howmany*this->cd->sizes[1]);
-    ptrdiff_t yy;
-    /*ptrdiff_t tindex;*/
-    int ranksrc, rankdst;
-    for (yy = 1; yy < this->cd->sizes[0]/2; yy++) {
-        ranksrc = this->cd->rank[yy];
-        rankdst = this->cd->rank[this->cd->sizes[0] - yy];
-        if (this->cd->myrank == ranksrc)
-            for (ii = 0; ii < this->cd->sizes[1]; ii++)
-                for (cc = 0; cc < howmany; cc++)
-                    for (int imag_comp=0; imag_comp<2; imag_comp++)
-                        (*(buffer + howmany*ii+cc))[imag_comp] =
-                            (*(data + howmany*((yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[imag_comp];
-        if (ranksrc != rankdst)
-        {
-            if (this->cd->myrank == ranksrc)
-                MPI_Send((void*)buffer,
-                         howmany*this->cd->sizes[1], mpi_real_type<rnumber>::complex(), rankdst, yy,
-                        this->cd->comm);
-            if (this->cd->myrank == rankdst)
-                MPI_Recv((void*)buffer,
-                         howmany*this->cd->sizes[1], mpi_real_type<rnumber>::complex(), ranksrc, yy,
-                        this->cd->comm, mpistatus);
-        }
-        if (this->cd->myrank == rankdst)
-        {
-            for (ii = 1; ii < this->cd->sizes[1]; ii++)
-                for (cc = 0; cc < howmany; cc++)
-                {
-                    (*(data + howmany*((this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[0] =
-                            (*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[0];
-                    (*(data + howmany*((this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[1] =
-                            -(*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[1];
-                }
-            for (cc = 0; cc < howmany; cc++)
-            {
-                (*((data + cc + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2])))[0] =  (*(buffer + cc))[0];
-                (*((data + cc + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2])))[1] = -(*(buffer + cc))[1];
-            }
-        }
-    }
-    fftw_interface<rnumber>::free(buffer);
-    delete mpistatus;
-    /* put asymmetric data to 0 */
-    /*if (this->cd->myrank == this->cd->rank[this->cd->sizes[0]/2])
-    {
-        tindex = howmany*(this->cd->sizes[0]/2 - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2];
-        for (ii = 0; ii < this->cd->sizes[1]; ii++)
-        {
-            std::fill_n((rnumber*)(data + tindex), howmany*2*this->cd->sizes[2], 0.0);
-            tindex += howmany*this->cd->sizes[2];
-        }
-    }
-    tindex = howmany*();
-    std::fill_n((rnumber*)(data + tindex), howmany*2, 0.0);*/
-}
-
-template <class rnumber>
-int fluid_solver_base<rnumber>::read_base(const char *fname, rnumber *data)
-{
-    char full_name[512];
-    sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration);
-    return this->rd->read(full_name, (void*)data);
-}
-
-template <class rnumber>
-int fluid_solver_base<rnumber>::read_base(const char *fname, cnumber *data)
-{
-    char full_name[512];
-    sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration);
-    return this->cd->read(full_name, (void*)data);
-}
-
-template <class rnumber>
-int fluid_solver_base<rnumber>::write_base(const char *fname, rnumber *data)
-{
-    char full_name[512];
-    sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration);
-    return this->rd->write(full_name, (void*)data);
-}
-
-template <class rnumber>
-int fluid_solver_base<rnumber>::write_base(const char *fname, cnumber *data)
-{
-    char full_name[512];
-    sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration);
-    return this->cd->write(full_name, (void*)data);
-}
-
-/* finally, force generation of code                                         */
-template class fluid_solver_base<float>;
-template class fluid_solver_base<double>;
-
-/*****************************************************************************/
-
-
-
-
diff --git a/bfps/cpp/fluid_solver_base.hpp b/bfps/cpp/fluid_solver_base.hpp
deleted file mode 100644
index e446956001a08fdbf0d3b11da8552e1cb6c61a45..0000000000000000000000000000000000000000
--- a/bfps/cpp/fluid_solver_base.hpp
+++ /dev/null
@@ -1,272 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <hdf5.h>
-#include <iostream>
-#include <unordered_map>
-#include <vector>
-#include "base.hpp"
-#include "field_descriptor.hpp"
-#include "scope_timer.hpp"
-#include "omputils.hpp"
-
-#ifndef FLUID_SOLVER_BASE
-
-#define FLUID_SOLVER_BASE
-
-extern int myrank, nprocs;
-
-
-/* container for field descriptor, fields themselves, parameters, etc
- * using the same big macro idea that they're using in fftw3.h
- * I feel like I should quote:  Ugh.
- * */
-
-template <class rnumber>
-class fluid_solver_base
-{
-    protected:
-        typedef rnumber cnumber[2];
-    public:
-        field_descriptor<rnumber> *cd, *rd;
-        ptrdiff_t normalization_factor;
-        unsigned fftw_plan_rigor;
-
-        /* simulation parameters */
-        char name[256];
-        int iteration;
-
-        /* physical parameters */
-        double dkx, dky, dkz, dk, dk2;
-
-        /* mode and dealiasing information */
-        int dealias_type;
-        double kMx, kMy, kMz, kM, kM2;
-        double kMspec, kMspec2;
-        double *kx, *ky, *kz;
-        std::unordered_map<int, double> Fourier_filter;
-        double *kshell;
-        int64_t *nshell;
-        unsigned int nshells;
-
-
-        /* methods */
-        fluid_solver_base(
-                const char *NAME,
-                int nx,
-                int ny,
-                int nz,
-                double DKX = 1.0,
-                double DKY = 1.0,
-                double DKZ = 1.0,
-                int DEALIAS_TYPE = 0,
-                unsigned FFTW_PLAN_RIGOR = DEFAULT_FFTW_FLAG);
-        ~fluid_solver_base();
-
-        void low_pass_Fourier(cnumber *__restrict__ a, int howmany, double kmax);
-        void dealias(cnumber *__restrict__ a, int howmany);
-        void force_divfree(cnumber *__restrict__ a);
-        void symmetrize(cnumber *__restrict__ a, int howmany);
-        void clean_up_real_space(rnumber *__restrict__ a, int howmany);
-        void cospectrum(cnumber *__restrict__ a, cnumber *__restrict__ b, double *__restrict__ spec);
-        void cospectrum(cnumber *__restrict__ a, cnumber *__restrict__ b, double *__restrict__ spec, const double k2exponent);
-        double autocorrel(cnumber *__restrict__ a);
-        void compute_rspace_stats(
-                const rnumber *__restrict__ a,
-                const hid_t group,
-                const std::string dset_name,
-                const hsize_t toffset,
-                const std::vector<double> max_estimate);
-        template <int nvals>
-        void compute_rspace_stats(rnumber *__restrict__ a,
-                                  double *__restrict__ moments,
-                                  ptrdiff_t *__restrict__ hist,
-                                  double max_estimate[nvals],
-                                  const int nbins = 256);
-        inline void compute_rspace_stats3(rnumber *__restrict__ a,
-                                  double *__restrict__ moments,
-                                  ptrdiff_t *__restrict__ hist,
-                                  double max_estimate[3],
-                                  const int nbins = 256)
-        {
-            this->compute_rspace_stats<3>(a, moments, hist, max_estimate, nbins);
-        }
-        inline void compute_rspace_stats4(rnumber *__restrict__ a,
-                                  double *__restrict__ moments,
-                                  ptrdiff_t *__restrict__ hist,
-                                  double max_estimate[4],
-                                  const int nbins = 256)
-        {
-            this->compute_rspace_stats<4>(a, moments, hist, max_estimate, nbins);
-        }
-        void compute_vector_gradient(rnumber (*__restrict__ A)[2], rnumber(*__restrict__ source)[2]);
-        void write_spectrum(const char *fname, cnumber *a, const double k2exponent = 0.0);
-        void fill_up_filename(const char *base_name, char *full_name);
-        int read_base(const char *fname, rnumber *data);
-        int read_base(const char *fname, cnumber *data);
-        int write_base(const char *fname, rnumber *data);
-        int write_base(const char *fname, cnumber *data);
-};
-
-
-
-/*****************************************************************************/
-/* macros for loops                                                          */
-
-/* Fourier space loop */
-template <class ObjectType, class FuncType>
-void CLOOP(ObjectType* obj, FuncType expression)
-{
-    TIMEZONE("CLOOP");
-    #pragma omp parallel
-    {
-        const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[0]);
-        const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[0]);
-        for (ptrdiff_t yindex = start; yindex < ptrdiff_t(end); yindex++){
-            ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2];
-            for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++)
-            for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++)
-                {
-                    expression(cindex, xindex, yindex, zindex);
-                    cindex++;
-                }
-        }
-    }
-}
-
-template <class ObjectType, class FuncType>
-void CLOOP_NXMODES(ObjectType* obj, FuncType expression)
-{
-    TIMEZONE("CLOOP_NXMODES");
-    #pragma omp parallel
-    {
-        const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[1]);
-        const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[1]);
-        for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++){
-            for (ptrdiff_t zindex = start; zindex < ptrdiff_t(end); zindex++)
-            {
-                ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2]
-                                   + zindex*obj->cd->subsizes[2];
-                int nxmodes = 1;
-                ptrdiff_t xindex = 0;
-                expression();
-                cindex++;
-                nxmodes = 2;
-                for (xindex = 1; xindex < obj->cd->subsizes[2]; xindex++)
-                {
-                    expression();
-                    cindex++;
-                }
-            }
-        }
-    }
-}
-
-
-template <class ObjectType, class FuncType>
-void CLOOP_K2(ObjectType* obj, FuncType expression)
-{
-    TIMEZONE("CLOOP_K2");
-    #pragma omp parallel
-    {
-        const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[1]);
-        const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[1]);
-        for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++){
-            for (ptrdiff_t zindex = start; zindex < ptrdiff_t(end); zindex++){
-                ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2]
-                                   + zindex*obj->cd->subsizes[2];
-                for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++)
-                {
-                    double k2 = (obj->kx[xindex]*obj->kx[xindex] +
-                          obj->ky[yindex]*obj->ky[yindex] +
-                          obj->kz[zindex]*obj->kz[zindex]);
-                    expression(cindex, xindex, yindex, zindex, k2);
-                    cindex++;
-                }
-            }
-        }
-    }
-}
-
-
-template <class ObjectType, class FuncType>
-void CLOOP_K2_NXMODES(ObjectType* obj, FuncType expression)
-{
-    #pragma omp parallel
-    {
-        const hsize_t start = OmpUtils::ForIntervalStart(obj->cd->subsizes[1]);
-        const hsize_t end = OmpUtils::ForIntervalEnd(obj->cd->subsizes[1]);
-        for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++){
-            for (ptrdiff_t zindex = start; zindex < ptrdiff_t(end); zindex++)
-            {
-                ptrdiff_t cindex = yindex*obj->cd->subsizes[1]*obj->cd->subsizes[2]
-                                   + zindex*obj->cd->subsizes[2];
-                int nxmodes = 1;
-                ptrdiff_t xindex = 0;
-                double k2 = (obj->kx[xindex]*obj->kx[xindex] +
-                      obj->ky[yindex]*obj->ky[yindex] +
-                      obj->kz[zindex]*obj->kz[zindex]);
-                expression(cindex, xindex, yindex, zindex, k2, nxmodes);
-                cindex++;
-                nxmodes = 2;
-                for (xindex = 1; xindex < obj->cd->subsizes[2]; xindex++)
-                {
-                    double k2 = (obj->kx[xindex]*obj->kx[xindex] +
-                          obj->ky[yindex]*obj->ky[yindex] +
-                          obj->kz[zindex]*obj->kz[zindex]);
-                    expression(cindex, xindex, yindex, zindex, k2, nxmodes);
-                    cindex++;
-                }
-            }
-        }
-    }
-}
-
-
-template <class ObjectType, class FuncType>
-void RLOOP(ObjectType* obj, FuncType expression)
-{
-    #pragma omp parallel
-    {
-        const hsize_t start = OmpUtils::ForIntervalStart(obj->rd->subsizes[1]);
-        const hsize_t end = OmpUtils::ForIntervalEnd(obj->rd->subsizes[1]);
-        for (int zindex = 0; zindex < obj->rd->subsizes[0] ; zindex++)
-        for (int yindex = start; yindex < ptrdiff_t(end); yindex++)
-        {
-            ptrdiff_t rindex = (zindex * obj->rd->subsizes[1] + yindex)*(obj->rd->subsizes[2]+2);
-            for (int xindex = 0; xindex < obj->rd->subsizes[2]; xindex++)
-            {
-                expression(rindex, xindex, yindex, zindex);
-                rindex++;
-            }
-        }
-    }
-}
-
-/*****************************************************************************/
-
-#endif//FLUID_SOLVER_BASE
-
diff --git a/bfps/cpp/full_code/NSVE.cpp b/bfps/cpp/full_code/NSVE.cpp
deleted file mode 100644
index 1e24c7af531e7184f75b1f14257d42b822db7a9c..0000000000000000000000000000000000000000
--- a/bfps/cpp/full_code/NSVE.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-#include <string>
-#include <cmath>
-#include "NSVE.hpp"
-#include "scope_timer.hpp"
-
-
-template <typename rnumber>
-int NSVE<rnumber>::initialize(void)
-{
-    this->read_iteration();
-    this->read_parameters();
-    if (this->myrank == 0)
-    {
-        // set caching parameters
-        hid_t fapl = H5Pcreate(H5P_FILE_ACCESS);
-        herr_t cache_err = H5Pset_cache(fapl, 0, 521, 134217728, 1.0);
-        DEBUG_MSG("when setting stat_file cache I got %d\n", cache_err);
-        this->stat_file = H5Fopen(
-                (this->simname + ".h5").c_str(),
-                H5F_ACC_RDWR,
-                fapl);
-    }
-    int data_file_problem;
-    if (this->myrank == 0)
-        data_file_problem = this->grow_file_datasets();
-    MPI_Bcast(&data_file_problem, 1, MPI_INT, 0, this->comm);
-    if (data_file_problem > 0)
-    {
-        std::cerr <<
-            data_file_problem <<
-            " problems growing file datasets.\ntrying to exit now." <<
-            std::endl;
-        return EXIT_FAILURE;
-    }
-    this->fs = new vorticity_equation<rnumber, FFTW>(
-            simname.c_str(),
-            nx, ny, nz,
-            dkx, dky, dkz,
-            DEFAULT_FFTW_FLAG);
-    this->tmp_vec_field = new field<rnumber, FFTW, THREE>(
-            nx, ny, nz,
-            this->comm,
-            DEFAULT_FFTW_FLAG);
-
-
-    this->fs->checkpoints_per_file = checkpoints_per_file;
-    this->fs->nu = nu;
-    this->fs->fmode = fmode;
-    this->fs->famplitude = famplitude;
-    this->fs->fk0 = fk0;
-    this->fs->fk1 = fk1;
-    strncpy(this->fs->forcing_type, forcing_type, 128);
-    this->fs->iteration = this->iteration;
-    this->fs->checkpoint = this->checkpoint;
-
-    this->fs->cvorticity->real_space_representation = false;
-    this->fs->io_checkpoint();
-
-    if (this->myrank == 0 && this->iteration == 0)
-        this->fs->kk->store(stat_file);
-    return EXIT_SUCCESS;
-}
-
-template <typename rnumber>
-int NSVE<rnumber>::step(void)
-{
-    this->fs->step(this->dt);
-    this->iteration = this->fs->iteration;
-    return EXIT_SUCCESS;
-}
-
-template <typename rnumber>
-int NSVE<rnumber>::write_checkpoint(void)
-{
-    this->fs->io_checkpoint(false);
-    this->checkpoint = this->fs->checkpoint;
-    this->write_iteration();
-    return EXIT_SUCCESS;
-}
-
-template <typename rnumber>
-int NSVE<rnumber>::finalize(void)
-{
-    if (this->myrank == 0)
-        H5Fclose(this->stat_file);
-    delete this->fs;
-    delete this->tmp_vec_field;
-    return EXIT_SUCCESS;
-}
-
-/** \brief Compute standard statistics for velocity and vorticity fields.
- *
- *  IMPORTANT: at the end of this subroutine, `this->fs->cvelocity` contains
- *  the Fourier space representation of the velocity field, and
- *  `this->tmp_vec_field` contains the real space representation of the
- *  velocity field.
- *  This behavior is relied upon in the `NSVEparticles` class, so please
- *  don't break it.
- */
-
-template <typename rnumber>
-int NSVE<rnumber>::do_stats()
-{
-    if (!(this->iteration % this->niter_stat == 0))
-        return EXIT_SUCCESS;
-    hid_t stat_group;
-    if (this->myrank == 0)
-        stat_group = H5Gopen(
-                this->stat_file,
-                "statistics",
-                H5P_DEFAULT);
-    else
-        stat_group = 0;
-
-    *tmp_vec_field = fs->cvorticity->get_cdata();
-    tmp_vec_field->compute_stats(
-            fs->kk,
-            stat_group,
-            "vorticity",
-            fs->iteration / niter_stat,
-            max_vorticity_estimate/sqrt(3));
-
-    fs->compute_velocity(fs->cvorticity);
-    *tmp_vec_field = fs->cvelocity->get_cdata();
-    tmp_vec_field->compute_stats(
-            fs->kk,
-            stat_group,
-            "velocity",
-            fs->iteration / niter_stat,
-            max_velocity_estimate/sqrt(3));
-
-    if (this->myrank == 0)
-        H5Gclose(stat_group);
-    return EXIT_SUCCESS;
-}
-
-template class NSVE<float>;
-template class NSVE<double>;
-
diff --git a/bfps/cpp/full_code/NSVE_no_output.hpp b/bfps/cpp/full_code/NSVE_no_output.hpp
deleted file mode 100644
index 0047a45a02dd58ae8934f78fdd8d804424ae817c..0000000000000000000000000000000000000000
--- a/bfps/cpp/full_code/NSVE_no_output.hpp
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef NSVE_NO_OUTPUT_HPP
-#define NSVE_NO_OUTPUT_HPP
-
-#include "full_code/NSVE.hpp"
-
-template <typename rnumber>
-class NSVE_no_output: public NSVE<rnumber>
-{
-    public:
-    NSVE_no_output(
-            const MPI_Comm COMMUNICATOR,
-            const std::string &simulation_name):
-        NSVE<rnumber>(
-                COMMUNICATOR,
-                simulation_name){}
-    ~NSVE_no_output(){}
-    int write_checkpoint(void)
-    {
-        return 0;
-    }
-    int read_parameters(void);
-};
-
-#endif//NSVE_NO_OUTPUT_HPP
-
diff --git a/bfps/cpp/full_code/NSVEparticles.cpp b/bfps/cpp/full_code/NSVEparticles.cpp
deleted file mode 100644
index ba84b3943d579965836f05af2447722e273f2dc3..0000000000000000000000000000000000000000
--- a/bfps/cpp/full_code/NSVEparticles.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-#include <string>
-#include <cmath>
-#include "NSVEparticles.hpp"
-#include "scope_timer.hpp"
-#include "particles/particles_sampling.hpp"
-
-template <typename rnumber>
-int NSVEparticles<rnumber>::initialize(void)
-{
-    this->NSVE<rnumber>::initialize();
-
-    this->ps = particles_system_builder(
-                this->fs->cvelocity,              // (field object)
-                this->fs->kk,                     // (kspace object, contains dkx, dky, dkz)
-                tracers0_integration_steps, // to check coherency between parameters and hdf input file (nb rhs)
-                (long long int)nparticles,  // to check coherency between parameters and hdf input file
-                this->fs->get_current_fname(),    // particles input filename
-                std::string("/tracers0/state/") + std::to_string(this->fs->iteration), // dataset name for initial input
-                std::string("/tracers0/rhs/")  + std::to_string(this->fs->iteration),  // dataset name for initial input
-                tracers0_neighbours,        // parameter (interpolation no neighbours)
-                tracers0_smoothness,        // parameter
-                this->comm,
-                this->fs->iteration+1);
-    this->particles_output_writer_mpi = new particles_output_hdf5<
-        long long int, double, 3, 3>(
-                MPI_COMM_WORLD,
-                "tracers0",
-                nparticles,
-                tracers0_integration_steps);
-    return EXIT_SUCCESS;
-}
-
-template <typename rnumber>
-int NSVEparticles<rnumber>::step(void)
-{
-    this->fs->compute_velocity(this->fs->cvorticity);
-    this->fs->cvelocity->ift();
-    this->ps->completeLoop(this->dt);
-    this->NSVE<rnumber>::step();
-    return EXIT_SUCCESS;
-}
-
-template <typename rnumber>
-int NSVEparticles<rnumber>::write_checkpoint(void)
-{
-    this->NSVE<rnumber>::write_checkpoint();
-    this->particles_output_writer_mpi->open_file(this->fs->get_current_fname());
-    this->particles_output_writer_mpi->save(
-            this->ps->getParticlesPositions(),
-            this->ps->getParticlesRhs(),
-            this->ps->getParticlesIndexes(),
-            this->ps->getLocalNbParticles(),
-            this->fs->iteration);
-    this->particles_output_writer_mpi->close_file();
-    return EXIT_SUCCESS;
-}
-
-template <typename rnumber>
-int NSVEparticles<rnumber>::finalize(void)
-{
-    this->NSVE<rnumber>::finalize();
-    this->ps.release();
-    delete this->particles_output_writer_mpi;
-    return EXIT_SUCCESS;
-}
-
-/** \brief Compute fluid stats and sample fields at particle locations.
- */
-
-template <typename rnumber>
-int NSVEparticles<rnumber>::do_stats()
-{
-    /// fluid stats go here
-    this->NSVE<rnumber>::do_stats();
-
-
-    if (!(this->iteration % this->niter_part == 0))
-        return EXIT_SUCCESS;
-
-    /// sample velocity
-    sample_from_particles_system(*this->tmp_vec_field,              // field to save
-                                 this->ps,
-                                 (this->simname + "_particles.h5"), // filename
-                                 "tracers0",                        // hdf5 parent group
-                                 "velocity"                         // dataset basename TODO
-                                 );
-
-    /// compute acceleration and sample it
-    this->fs->compute_Lagrangian_acceleration(this->tmp_vec_field);
-    this->tmp_vec_field->ift();
-    sample_from_particles_system(*this->tmp_vec_field,
-                                 this->ps,
-                                 (this->simname + "_particles.h5"),
-                                 "tracers0",
-                                 "acceleration");
-
-    return EXIT_SUCCESS;
-}
-
-template class NSVEparticles<float>;
-template class NSVEparticles<double>;
-
diff --git a/bfps/cpp/full_code/NSVEparticles_no_output.hpp b/bfps/cpp/full_code/NSVEparticles_no_output.hpp
deleted file mode 100644
index 264fd75ac9b0628aff167d018d888030b7029a35..0000000000000000000000000000000000000000
--- a/bfps/cpp/full_code/NSVEparticles_no_output.hpp
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef NSVEPARTICLES_NO_OUTPUT_HPP
-#define NSVEPARTICLES_NO_OUTPUT_HPP
-
-#include "full_code/NSVEparticles.hpp"
-
-template <typename rnumber>
-class NSVEparticles_no_output: public NSVEparticles<rnumber>
-{
-    public:
-    NSVEparticles_no_output(
-            const MPI_Comm COMMUNICATOR,
-            const std::string &simulation_name):
-        NSVEparticles<rnumber>(
-                COMMUNICATOR,
-                simulation_name){}
-    ~NSVEparticles_no_output(){}
-    int write_checkpoint(void)
-    {
-        return 0;
-    }
-    int read_parameters(void);
-};
-
-#endif//NSVEPARTICLES_NO_OUTPUT_HPP
-
diff --git a/bfps/cpp/full_code/code_base.cpp b/bfps/cpp/full_code/code_base.cpp
deleted file mode 100644
index 1b06fe8e66a4180034b9f6a494a1a432ae5ea3f9..0000000000000000000000000000000000000000
--- a/bfps/cpp/full_code/code_base.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-#include "code_base.hpp"
-#include "scope_timer.hpp"
-
-code_base::code_base(
-        const MPI_Comm COMMUNICATOR,
-        const std::string &simulation_name):
-    comm(COMMUNICATOR),
-    simname(simulation_name)
-{
-    MPI_Comm_rank(this->comm, &this->myrank);
-    MPI_Comm_size(this->comm, &this->nprocs);
-    this->stop_code_now = false;
-}
-
-int code_base::check_stopping_condition(void)
-{
-    if (myrank == 0)
-    {
-        std::string fname = (
-                std::string("stop_") +
-                std::string(this->simname));
-        {
-            struct stat file_buffer;
-            this->stop_code_now = (
-                    stat(fname.c_str(), &file_buffer) == 0);
-        }
-    }
-    MPI_Bcast(
-            &this->stop_code_now,
-            1,
-            MPI_C_BOOL,
-            0,
-            MPI_COMM_WORLD);
-    return EXIT_SUCCESS;
-}
-
diff --git a/bfps/cpp/full_code/codes_with_no_output.hpp b/bfps/cpp/full_code/codes_with_no_output.hpp
deleted file mode 100644
index f4cd3b5495ecb432653a7027bcaa330954865d21..0000000000000000000000000000000000000000
--- a/bfps/cpp/full_code/codes_with_no_output.hpp
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef CODES_WITH_NO_OUTPUT_HPP
-#define CODES_WITH_NO_OUTPUT_HPP
-
-#include "full_code/NSVE_no_output.hpp"
-#include "full_code/NSVEparticles_no_output.hpp"
-
-
-#endif//CODES_WITH_NO_OUTPUT_HPP
-
diff --git a/bfps/cpp/full_code/native_binary_to_hdf5.cpp b/bfps/cpp/full_code/native_binary_to_hdf5.cpp
deleted file mode 100644
index 7774e2dea9012394c389858038e8ca82674256d7..0000000000000000000000000000000000000000
--- a/bfps/cpp/full_code/native_binary_to_hdf5.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <string>
-#include <cmath>
-#include "native_binary_to_hdf5.hpp"
-#include "scope_timer.hpp"
-
-
-template <typename rnumber>
-int native_binary_to_hdf5<rnumber>::initialize(void)
-{
-    this->read_parameters();
-    this->vec_field = new field<rnumber, FFTW, THREE>(
-            nx, ny, nz,
-            this->comm,
-            DEFAULT_FFTW_FLAG);
-    this->vec_field->real_space_representation = false;
-    this->bin_IO = new field_binary_IO<rnumber, COMPLEX, THREE>(
-            this->vec_field->clayout->sizes,
-            this->vec_field->clayout->subsizes,
-            this->vec_field->clayout->starts,
-            this->vec_field->clayout->comm);
-    return EXIT_SUCCESS;
-}
-
-template <typename rnumber>
-int native_binary_to_hdf5<rnumber>::work_on_current_iteration(void)
-{
-    char itername[16];
-    sprintf(itername, "i%.5x", this->iteration);
-    std::string native_binary_fname = (
-            this->simname +
-            std::string("_cvorticity_") +
-            std::string(itername));
-    this->bin_IO->read(
-            native_binary_fname,
-            this->vec_field->get_cdata());
-    this->vec_field->io(
-            (native_binary_fname +
-             std::string(".h5")),
-            "vorticity",
-            this->iteration,
-            false);
-    return EXIT_SUCCESS;
-}
-
-template <typename rnumber>
-int native_binary_to_hdf5<rnumber>::finalize(void)
-{
-    delete this->bin_IO;
-    delete this->vec_field;
-    return EXIT_SUCCESS;
-}
-
-template <typename rnumber>
-int native_binary_to_hdf5<rnumber>::read_parameters(void)
-{
-    this->postprocess::read_parameters();
-    hid_t parameter_file = H5Fopen(
-            (this->simname + std::string(".h5")).c_str(),
-            H5F_ACC_RDONLY,
-            H5P_DEFAULT);
-    this->iteration_list = hdf5_tools::read_vector<int>(
-            parameter_file,
-            "/native_binary_to_hdf5/iteration_list");
-    H5Fclose(parameter_file);
-    return EXIT_SUCCESS;
-}
-
-template class native_binary_to_hdf5<float>;
-template class native_binary_to_hdf5<double>;
-
diff --git a/bfps/cpp/full_code/postprocess.cpp b/bfps/cpp/full_code/postprocess.cpp
deleted file mode 100644
index edb5929f72c5197c123f8f4e20d426ca1ad9eb6f..0000000000000000000000000000000000000000
--- a/bfps/cpp/full_code/postprocess.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-#include <cstdlib>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include "scope_timer.hpp"
-#include "hdf5_tools.hpp"
-#include "full_code/postprocess.hpp"
-
-
-int postprocess::main_loop(void)
-{
-    this->start_simple_timer();
-    for (unsigned int iteration_counter = 0;
-         iteration_counter < iteration_list.size();
-         iteration_counter++)
-    {
-        this->iteration = iteration_list[iteration_counter];
-    #ifdef USE_TIMINGOUTPUT
-        const std::string loopLabel = ("postprocess::main_loop-" +
-                                       std::to_string(this->iteration));
-        TIMEZONE(loopLabel.c_str());
-    #endif
-        this->work_on_current_iteration();
-        this->print_simple_timer(
-                "iteration " + std::to_string(this->iteration));
-
-        this->check_stopping_condition();
-        if (this->stop_code_now)
-            break;
-    }
-    return EXIT_SUCCESS;
-}
-
-
-int postprocess::read_parameters()
-{
-    hid_t parameter_file;
-    hid_t dset, memtype, space;
-    char fname[256];
-    char *string_data;
-    sprintf(fname, "%s.h5", this->simname.c_str());
-    parameter_file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT);
-    dset = H5Dopen(parameter_file, "/parameters/dealias_type", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dealias_type);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/dkx", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dkx);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/dky", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dky);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/dkz", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dkz);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/dt", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dt);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/famplitude", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->famplitude);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/fk0", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->fk0);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/fk1", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->fk1);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/fmode", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->fmode);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/forcing_type", H5P_DEFAULT);
-    space = H5Dget_space(dset);
-    memtype = H5Dget_type(dset);
-    string_data = (char*)malloc(256);
-    H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data);
-    sprintf(this->forcing_type, "%s", string_data);
-    free(string_data);
-    H5Sclose(space);
-    H5Tclose(memtype);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/nu", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->nu);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/nx", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->nx);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/ny", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->ny);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/nz", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->nz);
-    H5Dclose(dset);
-    H5Fclose(parameter_file);
-    return 0;
-}
-
diff --git a/bfps/cpp/full_code/test.cpp b/bfps/cpp/full_code/test.cpp
deleted file mode 100644
index 4f7a402c44c2a2999975881929c2582107897c5c..0000000000000000000000000000000000000000
--- a/bfps/cpp/full_code/test.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-#include <cstdlib>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include "scope_timer.hpp"
-#include "hdf5_tools.hpp"
-#include "full_code/test.hpp"
-
-
-int test::main_loop(void)
-{
-    #ifdef USE_TIMINGOUTPUT
-        TIMEZONE("test::main_loop");
-    #endif
-    this->start_simple_timer();
-    this->do_work();
-    this->print_simple_timer(
-            "do_work required " + std::to_string(this->iteration));
-    return EXIT_SUCCESS;
-}
-
-
-int test::read_parameters()
-{
-    hid_t parameter_file;
-    hid_t dset, memtype, space;
-    char fname[256];
-    char *string_data;
-    sprintf(fname, "%s.h5", this->simname.c_str());
-    parameter_file = H5Fopen(fname, H5F_ACC_RDONLY, H5P_DEFAULT);
-    dset = H5Dopen(parameter_file, "/parameters/dealias_type", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dealias_type);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/dkx", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dkx);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/dky", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dky);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/dkz", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->dkz);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/nx", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->nx);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/ny", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->ny);
-    H5Dclose(dset);
-    dset = H5Dopen(parameter_file, "/parameters/nz", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->nz);
-    H5Dclose(dset);
-    H5Fclose(parameter_file);
-    return 0;
-}
-
diff --git a/bfps/cpp/interpolator.cpp b/bfps/cpp/interpolator.cpp
deleted file mode 100644
index a0b38c4059585cc7fd58ab830b792be4f8bc193d..0000000000000000000000000000000000000000
--- a/bfps/cpp/interpolator.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#define NDEBUG
-
-#include "interpolator.hpp"
-
-template <class rnumber, int interp_neighbours>
-interpolator<rnumber, interp_neighbours>::interpolator(
-        fluid_solver_base<rnumber> *fs,
-        base_polynomial_values BETA_POLYS,
-        ...) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS)
-{
-    int tdims[4];
-    this->compute_beta = BETA_POLYS;
-    tdims[0] = (interp_neighbours+1)*2*this->descriptor->nprocs + this->descriptor->sizes[0];
-    tdims[1] = this->descriptor->sizes[1];
-    tdims[2] = this->descriptor->sizes[2]+2;
-    tdims[3] = this->descriptor->sizes[3];
-    this->buffered_descriptor = new field_descriptor<rnumber>(
-            4, tdims,
-            this->descriptor->mpi_dtype,
-            this->descriptor->comm);
-    this->buffer_size = (interp_neighbours+1)*this->buffered_descriptor->slice_size;
-    this->field = new rnumber[this->buffered_descriptor->local_size];
-}
-
-template <class rnumber, int interp_neighbours>
-interpolator<rnumber, interp_neighbours>::~interpolator()
-{
-    delete[] this->field;
-    delete this->buffered_descriptor;
-}
-
-template <class rnumber, int interp_neighbours>
-int interpolator<rnumber, interp_neighbours>::read_rFFTW(const void *void_src)
-{
-    rnumber *src = (rnumber*)void_src;
-    rnumber *dst = this->field;
-    /* do big copy of middle stuff */
-    std::copy(src,
-              src + this->buffered_descriptor->slice_size*this->descriptor->subsizes[0],
-              dst + this->buffer_size);
-    MPI_Datatype MPI_RNUM = (sizeof(rnumber) == 4) ? MPI_FLOAT : MPI_DOUBLE;
-    int rsrc;
-    /* get upper slices */
-    for (int rdst = 0; rdst < this->descriptor->nprocs; rdst++)
-    {
-        rsrc = this->descriptor->rank[(this->descriptor->all_start0[rdst] +
-                                       this->descriptor->all_size0[rdst]) %
-                                       this->descriptor->sizes[0]];
-        if (this->descriptor->myrank == rsrc)
-            MPI_Send(
-                    src,
-                    this->buffer_size,
-                    MPI_RNUM,
-                    rdst,
-                    2*(rsrc*this->descriptor->nprocs + rdst),
-                    this->buffered_descriptor->comm);
-        if (this->descriptor->myrank == rdst)
-            MPI_Recv(
-                    dst + this->buffer_size + this->buffered_descriptor->slice_size*this->descriptor->subsizes[0],
-                    this->buffer_size,
-                    MPI_RNUM,
-                    rsrc,
-                    2*(rsrc*this->descriptor->nprocs + rdst),
-                    this->buffered_descriptor->comm,
-                    MPI_STATUS_IGNORE);
-    }
-    /* get lower slices */
-    for (int rdst = 0; rdst < this->descriptor->nprocs; rdst++)
-    {
-        rsrc = this->descriptor->rank[MOD(this->descriptor->all_start0[rdst] - 1,
-                                          this->descriptor->sizes[0])];
-        if (this->descriptor->myrank == rsrc)
-            MPI_Send(
-                    src + this->buffered_descriptor->slice_size*this->descriptor->subsizes[0] - this->buffer_size,
-                    this->buffer_size,
-                    MPI_RNUM,
-                    rdst,
-                    2*(rsrc*this->descriptor->nprocs + rdst)+1,
-                    this->descriptor->comm);
-        if (this->descriptor->myrank == rdst)
-            MPI_Recv(
-                    dst,
-                    this->buffer_size,
-                    MPI_RNUM,
-                    rsrc,
-                    2*(rsrc*this->descriptor->nprocs + rdst)+1,
-                    this->descriptor->comm,
-                    MPI_STATUS_IGNORE);
-    }
-    return EXIT_SUCCESS;
-}
-
-template <class rnumber, int interp_neighbours>
-void interpolator<rnumber, interp_neighbours>::sample(
-        const int nparticles,
-        const int pdimension,
-        const double *__restrict__ x,
-        double *__restrict__ y,
-        const int *deriv)
-{
-    /* get grid coordinates */
-    int *xg = new int[3*nparticles];
-    double *xx = new double[3*nparticles];
-    double *yy = new double[3*nparticles];
-    std::fill_n(yy, 3*nparticles, 0.0);
-    this->get_grid_coordinates(nparticles, pdimension, x, xg, xx);
-    /* perform interpolation */
-    for (int p=0; p<nparticles; p++)
-        if (this->descriptor->rank[MOD(xg[p*3+2], this->descriptor->sizes[0])] == this->descriptor->myrank)
-            this->operator()(xg + p*3, xx + p*3, yy + p*3, deriv);
-    MPI_Allreduce(
-            yy,
-            y,
-            3*nparticles,
-            MPI_DOUBLE,
-            MPI_SUM,
-            this->descriptor->comm);
-    delete[] yy;
-    delete[] xg;
-    delete[] xx;
-}
-
-template <class rnumber, int interp_neighbours>
-void interpolator<rnumber, interp_neighbours>::operator()(
-        const int *xg,
-        const double *xx,
-        double *__restrict__ dest,
-        const int *deriv)
-{
-    double bx[interp_neighbours*2+2], by[interp_neighbours*2+2], bz[interp_neighbours*2+2];
-    if (deriv == NULL)
-    {
-        this->compute_beta(0, xx[0], bx);
-        this->compute_beta(0, xx[1], by);
-        this->compute_beta(0, xx[2], bz);
-    }
-    else
-    {
-        this->compute_beta(deriv[0], xx[0], bx);
-        this->compute_beta(deriv[1], xx[1], by);
-        this->compute_beta(deriv[2], xx[2], bz);
-    }
-    std::fill_n(dest, 3, 0);
-    ptrdiff_t bigiz, bigiy, bigix;
-    for (int iz = -interp_neighbours; iz <= interp_neighbours+1; iz++)
-    {
-        bigiz = ptrdiff_t(xg[2]+iz)-this->descriptor->starts[0];
-        for (int iy = -interp_neighbours; iy <= interp_neighbours+1; iy++)
-        {
-            bigiy = ptrdiff_t(MOD(xg[1]+iy, this->descriptor->sizes[1]));
-            for (int ix = -interp_neighbours; ix <= interp_neighbours+1; ix++)
-            {
-                bigix = ptrdiff_t(MOD(xg[0]+ix, this->descriptor->sizes[2]));
-                ptrdiff_t tindex = ((bigiz *this->buffered_descriptor->sizes[1] +
-                                     bigiy)*this->buffered_descriptor->sizes[2] +
-                                     bigix)*3 + this->buffer_size;
-                for (int c=0; c<3; c++)
-                {
-                    dest[c] += this->field[tindex+c]*(bz[iz+interp_neighbours]*
-                                                      by[iy+interp_neighbours]*
-                                                      bx[ix+interp_neighbours]);
-                }
-            }
-        }
-    }
-}
-
-template class interpolator<float, 1>;
-template class interpolator<float, 2>;
-template class interpolator<float, 3>;
-template class interpolator<float, 4>;
-template class interpolator<float, 5>;
-template class interpolator<float, 6>;
-template class interpolator<float, 7>;
-template class interpolator<float, 8>;
-template class interpolator<float, 9>;
-template class interpolator<float, 10>;
-template class interpolator<double, 1>;
-template class interpolator<double, 2>;
-template class interpolator<double, 3>;
-template class interpolator<double, 4>;
-template class interpolator<double, 5>;
-template class interpolator<double, 6>;
-template class interpolator<double, 7>;
-template class interpolator<double, 8>;
-template class interpolator<double, 9>;
-template class interpolator<double, 10>;
-
diff --git a/bfps/cpp/interpolator_base.cpp b/bfps/cpp/interpolator_base.cpp
deleted file mode 100644
index 668a965c65744ac5aae31afb6bee05711a433657..0000000000000000000000000000000000000000
--- a/bfps/cpp/interpolator_base.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#define NDEBUG
-
-#include <cmath>
-#include "interpolator_base.hpp"
-
-template <class rnumber, int interp_neighbours>
-interpolator_base<rnumber, interp_neighbours>::interpolator_base(
-        fluid_solver_base<rnumber> *fs,
-        base_polynomial_values BETA_POLYS)
-{
-    this->descriptor = fs->rd;
-    this->compute_beta = BETA_POLYS;
-
-    // compute dx, dy, dz;
-    this->dx = 4*acos(0) / (fs->dkx*this->descriptor->sizes[2]);
-    this->dy = 4*acos(0) / (fs->dky*this->descriptor->sizes[1]);
-    this->dz = 4*acos(0) / (fs->dkz*this->descriptor->sizes[0]);
-}
-
-template <class rnumber, int interp_neighbours>
-interpolator_base<rnumber, interp_neighbours>::interpolator_base(
-        vorticity_equation<rnumber, FFTW> *fs,
-        base_polynomial_values BETA_POLYS)
-{
-//    this->descriptor = fs->rd;
-//    this->compute_beta = BETA_POLYS;
-//
-//    // compute dx, dy, dz;
-//    this->dx = 4*acos(0) / (fs->kk->dkx*this->descriptor->sizes[2]);
-//    this->dy = 4*acos(0) / (fs->kk->dky*this->descriptor->sizes[1]);
-//    this->dz = 4*acos(0) / (fs->kk->dkz*this->descriptor->sizes[0]);
-}
-
-template <class rnumber, int interp_neighbours>
-void interpolator_base<rnumber, interp_neighbours>::get_grid_coordinates(
-        const int nparticles,
-        const int pdimension,
-        const double *x,
-        int *xg,
-        double *xx)
-{
-    for (int p=0; p<nparticles; p++)
-        this->get_grid_coordinates(
-                x + p*pdimension,
-                xg + p*3,
-                xx + p*3);
-}
-
-template <class rnumber, int interp_neighbours>
-void interpolator_base<rnumber, interp_neighbours>::get_grid_coordinates(
-        const double *x,
-        int *xg,
-        double *xx)
-{
-    static double grid_size[] = {this->dx, this->dy, this->dz};
-    double tval;
-    for (int c=0; c<3; c++)
-    {
-        tval = floor(x[c]/grid_size[c]);
-        xg[c] = MOD(int(tval), this->descriptor->sizes[2-c]);
-        xx[c] = (x[c] - tval*grid_size[c]) / grid_size[c];
-    }
-}
-
-
-
-template class interpolator_base<float, 1>;
-template class interpolator_base<float, 2>;
-template class interpolator_base<float, 3>;
-template class interpolator_base<float, 4>;
-template class interpolator_base<float, 5>;
-template class interpolator_base<float, 6>;
-template class interpolator_base<float, 7>;
-template class interpolator_base<float, 8>;
-template class interpolator_base<float, 9>;
-template class interpolator_base<float, 10>;
-template class interpolator_base<double, 1>;
-template class interpolator_base<double, 2>;
-template class interpolator_base<double, 3>;
-template class interpolator_base<double, 4>;
-template class interpolator_base<double, 5>;
-template class interpolator_base<double, 6>;
-template class interpolator_base<double, 7>;
-template class interpolator_base<double, 8>;
-template class interpolator_base<double, 9>;
-template class interpolator_base<double, 10>;
-
diff --git a/bfps/cpp/interpolator_base.hpp b/bfps/cpp/interpolator_base.hpp
deleted file mode 100644
index f4c28db7b9de632e8ec4977dd67f929f06080e19..0000000000000000000000000000000000000000
--- a/bfps/cpp/interpolator_base.hpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#include "fluid_solver_base.hpp"
-#include "vorticity_equation.hpp"
-#include "spline_n1.hpp"
-#include "spline_n2.hpp"
-#include "spline_n3.hpp"
-#include "spline_n4.hpp"
-#include "spline_n5.hpp"
-#include "spline_n6.hpp"
-#include "spline_n7.hpp"
-#include "spline_n8.hpp"
-#include "spline_n9.hpp"
-#include "spline_n10.hpp"
-#include "Lagrange_polys.hpp"
-
-#ifndef INTERPOLATOR_BASE
-
-#define INTERPOLATOR_BASE
-
-typedef void (*base_polynomial_values)(
-        const int derivative,
-        const double fraction,
-        double *__restrict__ destination);
-
-template <class rnumber, int interp_neighbours>
-class interpolator_base
-{
-    public:
-        /* pointer to polynomial function */
-        base_polynomial_values compute_beta;
-
-        /* descriptor of field to interpolate */
-        field_descriptor<rnumber> *descriptor;
-
-        /* physical parameters of field */
-        double dx, dy, dz;
-
-        interpolator_base(
-                fluid_solver_base<rnumber> *FSOLVER,
-                base_polynomial_values BETA_POLYS);
-
-        interpolator_base(
-                vorticity_equation<rnumber, FFTW> *FSOLVER,
-                base_polynomial_values BETA_POLYS);
-        virtual ~interpolator_base(){}
-
-        /* may not destroy input */
-        virtual int read_rFFTW(const void *src) = 0;
-
-        /* map real locations to grid coordinates */
-        void get_grid_coordinates(
-                const int nparticles,
-                const int pdimension,
-                const double *__restrict__ x,
-                int *__restrict__ xg,
-                double *__restrict__ xx);
-        void get_grid_coordinates(
-                const double *__restrict__ x,
-                int *__restrict__ xg,
-                double *__restrict__ xx);
-        /* interpolate field at an array of locations */
-        virtual void sample(
-                const int nparticles,
-                const int pdimension,
-                const double *__restrict__ x,
-                double *__restrict__ y,
-                const int *deriv = NULL) = 0;
-        /* interpolate 1 point */
-        virtual void operator()(
-                const int *__restrict__ xg,
-                const double *__restrict__ xx,
-                double *__restrict__ dest,
-                const int *deriv = NULL) = 0;
-
-        /* interpolate 1 point */
-        inline void operator()(
-                const double *__restrict__ x,
-                double *__restrict__ dest,
-                const int *deriv = NULL)
-        {
-            int xg[3];
-            double xx[3];
-            this->get_grid_coordinates(x, xg, xx);
-            (*this)(xg, xx, dest, deriv);
-        }
-};
-
-#endif//INTERPOLATOR_BASE
-
diff --git a/bfps/cpp/particles.cpp b/bfps/cpp/particles.cpp
deleted file mode 100644
index cdaf157cb912c3074faf84bfecf1d9b3752c78a7..0000000000000000000000000000000000000000
--- a/bfps/cpp/particles.cpp
+++ /dev/null
@@ -1,254 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#define NDEBUG
-
-#include <cmath>
-#include <cassert>
-#include <cstring>
-#include <string>
-#include <sstream>
-
-#include "base.hpp"
-#include "particles.hpp"
-#include "fftw_tools.hpp"
-
-
-extern int myrank, nprocs;
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-particles<particle_type, rnumber, interp_neighbours>::particles(
-        const char *NAME,
-        const hid_t data_file_id,
-        interpolator_base<rnumber, interp_neighbours> *VEL,
-        const int TRAJ_SKIP,
-        const int INTEGRATION_STEPS) : particles_io_base<particle_type>(
-            NAME,
-            TRAJ_SKIP,
-            data_file_id,
-            VEL->descriptor->comm)
-{
-    assert((INTEGRATION_STEPS <= 6) &&
-           (INTEGRATION_STEPS >= 1));
-    this->vel = VEL;
-    this->integration_steps = INTEGRATION_STEPS;
-    this->array_size = this->nparticles * state_dimension(particle_type);
-    this->state = new double[this->array_size];
-    std::fill_n(this->state, this->array_size, 0.0);
-    for (int i=0; i < this->integration_steps; i++)
-    {
-        this->rhs[i] = new double[this->array_size];
-        std::fill_n(this->rhs[i], this->array_size, 0.0);
-    }
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-particles<particle_type, rnumber, interp_neighbours>::~particles()
-{
-    delete[] this->state;
-    for (int i=0; i < this->integration_steps; i++)
-    {
-        delete[] this->rhs[i];
-    }
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void particles<particle_type, rnumber, interp_neighbours>::get_rhs(double *x, double *y)
-{
-    switch(particle_type)
-    {
-        case VELOCITY_TRACER:
-            this->vel->sample(this->nparticles, state_dimension(particle_type), x, y);
-            break;
-    }
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void particles<particle_type, rnumber, interp_neighbours>::roll_rhs()
-{
-    for (int i=this->integration_steps-2; i>=0; i--)
-        std::copy(this->rhs[i],
-                  this->rhs[i] + this->array_size,
-                  this->rhs[i+1]);
-}
-
-
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth(
-        const int nsteps)
-{
-    ptrdiff_t ii;
-    this->get_rhs(this->state, this->rhs[0]);
-    switch(nsteps)
-    {
-        case 1:
-            for (unsigned int p=0; p<this->nparticles; p++)
-                for (unsigned int i=0; i<state_dimension(particle_type); i++)
-                {
-                    ii = p*state_dimension(particle_type)+i;
-                    this->state[ii] += this->dt*this->rhs[0][ii];
-                }
-            break;
-        case 2:
-            for (unsigned int p=0; p<this->nparticles; p++)
-                for (unsigned int i=0; i<state_dimension(particle_type); i++)
-                {
-                    ii = p*state_dimension(particle_type)+i;
-                    this->state[ii] += this->dt*(3*this->rhs[0][ii]
-                                               -   this->rhs[1][ii])/2;
-                }
-            break;
-        case 3:
-            for (unsigned int p=0; p<this->nparticles; p++)
-                for (unsigned int i=0; i<state_dimension(particle_type); i++)
-                {
-                    ii = p*state_dimension(particle_type)+i;
-                    this->state[ii] += this->dt*(23*this->rhs[0][ii]
-                                               - 16*this->rhs[1][ii]
-                                               +  5*this->rhs[2][ii])/12;
-                }
-            break;
-        case 4:
-            for (unsigned int p=0; p<this->nparticles; p++)
-                for (unsigned int i=0; i<state_dimension(particle_type); i++)
-                {
-                    ii = p*state_dimension(particle_type)+i;
-                    this->state[ii] += this->dt*(55*this->rhs[0][ii]
-                                               - 59*this->rhs[1][ii]
-                                               + 37*this->rhs[2][ii]
-                                               -  9*this->rhs[3][ii])/24;
-                }
-            break;
-        case 5:
-            for (unsigned int p=0; p<this->nparticles; p++)
-                for (unsigned int i=0; i<state_dimension(particle_type); i++)
-                {
-                    ii = p*state_dimension(particle_type)+i;
-                    this->state[ii] += this->dt*(1901*this->rhs[0][ii]
-                                               - 2774*this->rhs[1][ii]
-                                               + 2616*this->rhs[2][ii]
-                                               - 1274*this->rhs[3][ii]
-                                               +  251*this->rhs[4][ii])/720;
-                }
-            break;
-        case 6:
-            for (unsigned int p=0; p<this->nparticles; p++)
-                for (unsigned int i=0; i<state_dimension(particle_type); i++)
-                {
-                    ii = p*state_dimension(particle_type)+i;
-                    this->state[ii] += this->dt*(4277*this->rhs[0][ii]
-                                               - 7923*this->rhs[1][ii]
-                                               + 9982*this->rhs[2][ii]
-                                               - 7298*this->rhs[3][ii]
-                                               + 2877*this->rhs[4][ii]
-                                               -  475*this->rhs[5][ii])/1440;
-                }
-            break;
-    }
-    this->roll_rhs();
-}
-
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void particles<particle_type, rnumber, interp_neighbours>::step()
-{
-    this->AdamsBashforth((this->iteration < this->integration_steps) ?
-                            this->iteration+1 :
-                            this->integration_steps);
-    this->iteration++;
-}
-
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void particles<particle_type, rnumber, interp_neighbours>::read()
-{
-    if (this->myrank == 0)
-        for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
-        {
-            this->read_state_chunk(cindex, this->state+cindex*this->chunk_size*state_dimension(particle_type));
-            if (this->iteration > 0)
-                for (int i=0; i<this->integration_steps; i++)
-                    this->read_rhs_chunk(cindex, i, this->rhs[i]+cindex*this->chunk_size*state_dimension(particle_type));
-        }
-    MPI_Bcast(
-            this->state,
-            this->array_size,
-            MPI_DOUBLE,
-            0,
-            this->comm);
-    if (this->iteration > 0)
-        for (int i = 0; i<this->integration_steps; i++)
-            MPI_Bcast(
-                    this->rhs[i],
-                    this->array_size,
-                    MPI_DOUBLE,
-                    0,
-                    this->comm);
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void particles<particle_type, rnumber, interp_neighbours>::write(
-        const bool write_rhs)
-{
-    if (this->myrank == 0)
-        for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
-        {
-            this->write_state_chunk(cindex, this->state+cindex*this->chunk_size*state_dimension(particle_type));
-            if (write_rhs)
-                for (int i=0; i<this->integration_steps; i++)
-                    this->write_rhs_chunk(cindex, i, this->rhs[i]+cindex*this->chunk_size*state_dimension(particle_type));
-        }
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void particles<particle_type, rnumber, interp_neighbours>::sample(
-        interpolator_base<rnumber, interp_neighbours> *field,
-        const char *dset_name)
-{
-    double *y = new double[this->nparticles*3];
-    field->sample(this->nparticles, state_dimension(particle_type), this->state, y);
-    if (this->myrank == 0)
-        for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
-            this->write_point3D_chunk(dset_name, cindex, y+cindex*this->chunk_size*3);
-    delete[] y;
-}
-
-
-/*****************************************************************************/
-template class particles<VELOCITY_TRACER, float, 1>;
-template class particles<VELOCITY_TRACER, float, 2>;
-template class particles<VELOCITY_TRACER, float, 3>;
-template class particles<VELOCITY_TRACER, float, 4>;
-template class particles<VELOCITY_TRACER, float, 5>;
-template class particles<VELOCITY_TRACER, float, 6>;
-template class particles<VELOCITY_TRACER, double, 1>;
-template class particles<VELOCITY_TRACER, double, 2>;
-template class particles<VELOCITY_TRACER, double, 3>;
-template class particles<VELOCITY_TRACER, double, 4>;
-template class particles<VELOCITY_TRACER, double, 5>;
-template class particles<VELOCITY_TRACER, double, 6>;
-/*****************************************************************************/
diff --git a/bfps/cpp/particles.hpp b/bfps/cpp/particles.hpp
deleted file mode 100644
index 03daf3e3fc866ac485b3649a28dfb13cf1b50ff1..0000000000000000000000000000000000000000
--- a/bfps/cpp/particles.hpp
+++ /dev/null
@@ -1,99 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <iostream>
-#include <hdf5.h>
-#include "base.hpp"
-#include "particles_base.hpp"
-#include "fluid_solver_base.hpp"
-#include "interpolator_base.hpp"
-
-#ifndef PARTICLES
-
-#define PARTICLES
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-class particles: public particles_io_base<particle_type>
-{
-    private:
-        double *state;
-        double *rhs[6];
-
-    public:
-        int array_size;
-        int integration_steps;
-        interpolator_base<rnumber, interp_neighbours> *vel;
-
-        /* simulation parameters */
-        double dt;
-
-        /* methods */
-
-        /* constructor and destructor.
-         * allocate and deallocate:
-         *  this->state
-         *  this->rhs
-         * */
-        particles(
-                const char *NAME,
-                const hid_t data_file_id,
-                interpolator_base<rnumber, interp_neighbours> *FIELD,
-                const int TRAJ_SKIP,
-                const int INTEGRATION_STEPS = 2);
-        ~particles();
-
-        void sample(
-                interpolator_base<rnumber, interp_neighbours> *field,
-                const char *dset_name);
-
-        inline void sample(
-                interpolator_base<rnumber, interp_neighbours> *field,
-                double *y)
-        {
-            field->sample(this->nparticles, state_dimension(particle_type), this->state, y);
-        }
-
-        void get_rhs(
-                double *__restrict__ x,
-                double *__restrict__ rhs);
-
-        /* input/output */
-        void read();
-        void write(
-                const char *dset_name,
-                const double *data);
-        void write(const bool write_rhs = true);
-
-        /* solvers */
-        void step();
-        void roll_rhs();
-        void AdamsBashforth(const int nsteps);
-};
-
-#endif//PARTICLES
-
diff --git a/bfps/cpp/particles/abstract_particles_input.hpp b/bfps/cpp/particles/abstract_particles_input.hpp
deleted file mode 100644
index 77dcbc638903a668ce6e2a0084815832b0580495..0000000000000000000000000000000000000000
--- a/bfps/cpp/particles/abstract_particles_input.hpp
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef ABSTRACT_PARTICLES_INPUT_HPP
-#define ABSTRACT_PARTICLES_INPUT_HPP
-
-#include <tuple>
-
-template <class partsize_t, class real_number>
-class abstract_particles_input {
-public:
-    virtual ~abstract_particles_input(){}
-
-    virtual partsize_t getTotalNbParticles()  = 0;
-    virtual partsize_t getLocalNbParticles()  = 0;
-    virtual int getNbRhs()  = 0;
-
-    virtual std::unique_ptr<real_number[]> getMyParticles()  = 0;
-    virtual std::unique_ptr<partsize_t[]> getMyParticlesIndexes()  = 0;
-    virtual std::vector<std::unique_ptr<real_number[]>> getMyRhs()  = 0;
-};
-
-
-#endif
diff --git a/bfps/cpp/particles/abstract_particles_system.hpp b/bfps/cpp/particles/abstract_particles_system.hpp
deleted file mode 100644
index 1c8592f37536e5c6c6b4df8f45cc855b3f21eb3f..0000000000000000000000000000000000000000
--- a/bfps/cpp/particles/abstract_particles_system.hpp
+++ /dev/null
@@ -1,57 +0,0 @@
-#ifndef ABSTRACT_PARTICLES_SYSTEM_HPP
-#define ABSTRACT_PARTICLES_SYSTEM_HPP
-
-#include <memory>
-
-//- Not generic to enable sampling begin
-#include "field.hpp"
-#include "kspace.hpp"
-//- Not generic to enable sampling end
-
-
-template <class partsize_t, class real_number>
-class abstract_particles_system {
-public:
-    virtual void compute() = 0;
-
-    virtual void move(const real_number dt) = 0;
-
-    virtual void redistribute() = 0;
-
-    virtual void inc_step_idx() = 0;
-
-    virtual void shift_rhs_vectors() = 0;
-
-    virtual void completeLoop(const real_number dt) = 0;
-
-    virtual const real_number* getParticlesPositions() const = 0;
-
-    virtual const std::unique_ptr<real_number[]>* getParticlesRhs() const = 0;
-
-    virtual const partsize_t* getParticlesIndexes() const = 0;
-
-    virtual partsize_t getLocalNbParticles() const = 0;
-
-    virtual partsize_t getGlobalNbParticles() const = 0;
-
-    virtual int getNbRhs() const = 0;
-
-    virtual int get_step_idx() const = 0;
-
-    //- Not generic to enable sampling begin
-    virtual void sample_compute_field(const field<float, FFTW, ONE>& sample_field,
-                                real_number sample_rhs[]) = 0;
-    virtual void sample_compute_field(const field<float, FFTW, THREE>& sample_field,
-                                real_number sample_rhs[]) = 0;
-    virtual void sample_compute_field(const field<float, FFTW, THREExTHREE>& sample_field,
-                                real_number sample_rhs[]) = 0;
-    virtual void sample_compute_field(const field<double, FFTW, ONE>& sample_field,
-                                real_number sample_rhs[]) = 0;
-    virtual void sample_compute_field(const field<double, FFTW, THREE>& sample_field,
-                                real_number sample_rhs[]) = 0;
-    virtual void sample_compute_field(const field<double, FFTW, THREExTHREE>& sample_field,
-                                real_number sample_rhs[]) = 0;
-    //- Not generic to enable sampling end
-};
-
-#endif
diff --git a/bfps/cpp/particles/particles_output_sampling_hdf5.hpp b/bfps/cpp/particles/particles_output_sampling_hdf5.hpp
deleted file mode 100644
index 238c9acf9a16db9c36b81d3c6eb6dc2388bbf117..0000000000000000000000000000000000000000
--- a/bfps/cpp/particles/particles_output_sampling_hdf5.hpp
+++ /dev/null
@@ -1,188 +0,0 @@
-#ifndef PARTICLES_OUTPUT_SAMPLING_HDF5_HPP
-#define PARTICLES_OUTPUT_SAMPLING_HDF5_HPP
-
-#include "abstract_particles_output.hpp"
-
-#include <hdf5.h>
-
-template <class partsize_t,
-          class real_number,
-          int size_particle_positions,
-          int size_particle_rhs>
-class particles_output_sampling_hdf5 : public abstract_particles_output<partsize_t,
-                                                               real_number,
-                                                               size_particle_positions,
-                                                               size_particle_rhs>{
-    using Parent = abstract_particles_output<partsize_t,
-                                             real_number,
-                                             size_particle_positions,
-                                             size_particle_rhs>;
-
-    hid_t file_id, pgroup_id;
-
-    const std::string dataset_name;
-    const bool use_collective_io;
-
-public:
-    static bool DatasetExistsCol(MPI_Comm in_mpi_com,
-                                  const std::string& in_filename,
-                                  const std::string& in_groupname,
-                                 const std::string& in_dataset_name){
-        int my_rank;
-        AssertMpi(MPI_Comm_rank(in_mpi_com, &my_rank));
-
-        int dataset_exists = -1;
-
-        if(my_rank == 0){
-            // Parallel HDF5 write
-            hid_t file_id = H5Fopen(
-                    in_filename.c_str(),
-                    H5F_ACC_RDWR | H5F_ACC_DEBUG,
-                    H5P_DEFAULT);
-            assert(file_id >= 0);
-
-            dataset_exists = H5Lexists(
-                    file_id,
-                    (in_groupname + "/" + in_dataset_name).c_str(),
-                    H5P_DEFAULT);
-
-            int retTest = H5Fclose(file_id);
-            assert(retTest >= 0);
-        }
-
-        AssertMpi(MPI_Bcast( &dataset_exists, 1, MPI_INT, 0, in_mpi_com ));
-        return dataset_exists;
-    }
-
-    particles_output_sampling_hdf5(MPI_Comm in_mpi_com,
-                          const partsize_t inTotalNbParticles,
-                                   const std::string& in_filename,
-                                   const std::string& in_groupname,
-                          const std::string& in_dataset_name,
-                          const bool in_use_collective_io = false)
-            : Parent(in_mpi_com, inTotalNbParticles, 1),
-              dataset_name(in_dataset_name),
-              use_collective_io(in_use_collective_io){
-        if(Parent::isInvolved()){
-            hid_t plist_id_par = H5Pcreate(H5P_FILE_ACCESS);
-            assert(plist_id_par >= 0);
-            int retTest = H5Pset_fapl_mpio(
-                    plist_id_par,
-                    Parent::getComWriter(),
-                    MPI_INFO_NULL);
-            assert(retTest >= 0);
-
-            // Parallel HDF5 write
-            file_id = H5Fopen(
-                    in_filename.c_str(),
-                    H5F_ACC_RDWR | H5F_ACC_DEBUG,
-                    plist_id_par);
-            assert(file_id >= 0);
-            retTest = H5Pclose(plist_id_par);
-            assert(retTest >= 0);
-
-            pgroup_id = H5Gopen(
-                    file_id,
-                    in_groupname.c_str(),
-                    H5P_DEFAULT);
-            assert(pgroup_id >= 0);
-        }
-    }
-
-    ~particles_output_sampling_hdf5(){
-        if(Parent::isInvolved()){
-            int retTest = H5Gclose(pgroup_id);
-            assert(retTest >= 0);
-            retTest = H5Fclose(file_id);
-            assert(retTest >= 0);
-        }
-    }
-
-    void write(
-            const int /*idx_time_step*/,
-            const real_number* /*particles_positions*/,
-            const std::unique_ptr<real_number[]>* particles_rhs,
-            const partsize_t nb_particles,
-            const partsize_t particles_idx_offset) final{
-        assert(Parent::isInvolved());
-
-        TIMEZONE("particles_output_hdf5::write");
-
-        assert(particles_idx_offset < Parent::getTotalNbParticles() || (particles_idx_offset == Parent::getTotalNbParticles() && nb_particles == 0));
-        assert(particles_idx_offset+nb_particles <= Parent::getTotalNbParticles());
-
-        static_assert(std::is_same<real_number, double>::value ||
-                      std::is_same<real_number, float>::value,
-                      "real_number must be double or float");
-        const hid_t type_id = (sizeof(real_number) == 8 ? H5T_NATIVE_DOUBLE : H5T_NATIVE_FLOAT);
-
-        hid_t plist_id = H5Pcreate(H5P_DATASET_XFER);
-        assert(plist_id >= 0);
-        {
-            int rethdf = H5Pset_dxpl_mpio(plist_id, use_collective_io ? H5FD_MPIO_COLLECTIVE : H5FD_MPIO_INDEPENDENT);
-            assert(rethdf >= 0);
-        }
-        {
-            assert(size_particle_rhs >= 0);
-            const hsize_t datacount[3] = {hsize_t(Parent::getNbRhs()),
-                                          hsize_t(Parent::getTotalNbParticles()),
-                                          hsize_t(size_particle_rhs)};
-            hid_t dataspace = H5Screate_simple(3, datacount, NULL);
-            assert(dataspace >= 0);
-
-            hid_t dataset_id = H5Dcreate( pgroup_id,
-                                          dataset_name.c_str(),
-                                          type_id,
-                                          dataspace,
-                                          H5P_DEFAULT,
-                                          H5P_DEFAULT,
-                                          H5P_DEFAULT);
-            assert(dataset_id >= 0);
-
-            assert(particles_idx_offset >= 0);
-            const hsize_t count[3] = {
-                1,
-                hsize_t(nb_particles),
-                hsize_t(size_particle_rhs)};
-            const hsize_t offset[3] = {
-                0,
-                hsize_t(particles_idx_offset),
-                0};
-            hid_t memspace = H5Screate_simple(3, count, NULL);
-            assert(memspace >= 0);
-
-            hid_t filespace = H5Dget_space(dataset_id);
-            assert(filespace >= 0);
-            int rethdf = H5Sselect_hyperslab(
-                    filespace,
-                    H5S_SELECT_SET,
-                    offset,
-                    NULL,
-                    count,
-                    NULL);
-            assert(rethdf >= 0);
-
-            herr_t	status = H5Dwrite(
-                    dataset_id,
-                    type_id,
-                    memspace,
-                    filespace,
-                    plist_id,
-                    particles_rhs[0].get());
-            assert(status >= 0);
-            rethdf = H5Sclose(filespace);
-            assert(rethdf >= 0);
-            rethdf = H5Sclose(memspace);
-            assert(rethdf >= 0);
-            rethdf = H5Dclose(dataset_id);
-            assert(rethdf >= 0);
-        }
-
-        {
-            int rethdf = H5Pclose(plist_id);
-            assert(rethdf >= 0);
-        }
-    }
-};
-
-#endif
diff --git a/bfps/cpp/particles/particles_sampling.hpp b/bfps/cpp/particles/particles_sampling.hpp
deleted file mode 100644
index 3adc255341f3ca879d5cae1445124091f31b4394..0000000000000000000000000000000000000000
--- a/bfps/cpp/particles/particles_sampling.hpp
+++ /dev/null
@@ -1,52 +0,0 @@
-#ifndef PARTICLES_SAMPLING_HPP
-#define PARTICLES_SAMPLING_HPP
-
-#include <memory>
-#include <string>
-
-#include "abstract_particles_system.hpp"
-#include "particles_output_sampling_hdf5.hpp"
-
-#include "field.hpp"
-#include "kspace.hpp"
-
-
-template <class partsize_t, class particles_rnumber, class rnumber, field_backend be, field_components fc>
-void sample_from_particles_system(const field<rnumber, be, fc>& in_field, // a pointer to a field<rnumber, FFTW, fc>
-                                  std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>& ps, // a pointer to an particles_system<double>
-                                  const std::string& filename,
-                                  const std::string& parent_groupname,
-                                  const std::string& fname){
-    const std::string datasetname = fname + std::string("/") + std::to_string(ps->get_step_idx());
-    const int size_particle_rhs = ncomp(fc);
-
-    // Stop here if already exists
-    if(particles_output_sampling_hdf5<partsize_t, particles_rnumber, 3, size_particle_rhs>::DatasetExistsCol(MPI_COMM_WORLD,
-                                                                                                             filename,
-                                                                                                             parent_groupname,
-                                                                                                             datasetname)){
-        return;
-    }
-
-    const partsize_t nb_particles = ps->getLocalNbParticles();
-    std::unique_ptr<particles_rnumber[]> sample_rhs(new particles_rnumber[size_particle_rhs*nb_particles]);
-    std::fill_n(sample_rhs.get(), size_particle_rhs*nb_particles, 0);
-
-    ps->sample_compute_field(in_field, sample_rhs.get());
-
-
-
-    particles_output_sampling_hdf5<partsize_t, particles_rnumber, 3, size_particle_rhs> outputclass(MPI_COMM_WORLD,
-                                                                                                    ps->getGlobalNbParticles(),
-                                                                                                    filename,
-                                                                                                    parent_groupname,
-                                                                                                    datasetname);
-    outputclass.save(ps->getParticlesPositions(),
-                     &sample_rhs,
-                     ps->getParticlesIndexes(),
-                     ps->getLocalNbParticles(),
-                     ps->get_step_idx());
-}
-
-#endif
-
diff --git a/bfps/cpp/particles_base.cpp b/bfps/cpp/particles_base.cpp
deleted file mode 100644
index 1410488410a429ff463a1751e86f78cc2157679b..0000000000000000000000000000000000000000
--- a/bfps/cpp/particles_base.cpp
+++ /dev/null
@@ -1,424 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#define NDEBUG
-
-#include <algorithm>
-#include <cassert>
-#include "particles_base.hpp"
-#include "scope_timer.hpp"
-
-template <particle_types particle_type>
-single_particle_state<particle_type>::single_particle_state()
-{
-    std::fill_n(this->data, state_dimension(particle_type), 0);
-}
-
-template <particle_types particle_type>
-single_particle_state<particle_type>::single_particle_state(
-        const single_particle_state<particle_type> &src)
-{
-    std::copy(
-            src.data,
-            src.data + state_dimension(particle_type),
-            this->data);
-}
-
-template <particle_types particle_type>
-single_particle_state<particle_type>::single_particle_state(
-        const double *src)
-{
-    std::copy(
-            src,
-            src + state_dimension(particle_type),
-            this->data);
-}
-
-template <particle_types particle_type>
-single_particle_state<particle_type>::~single_particle_state()
-{
-}
-
-template <particle_types particle_type>
-single_particle_state<particle_type> &single_particle_state<particle_type>::operator=(
-        const single_particle_state &src)
-{
-    std::copy(
-            src.data,
-            src.data + state_dimension(particle_type),
-            this->data);
-    return *this;
-}
-
-template <particle_types particle_type>
-single_particle_state<particle_type> &single_particle_state<particle_type>::operator=(
-        const double *src)
-{
-    std::copy(
-            src,
-            src + state_dimension(particle_type),
-            this->data);
-    return *this;
-}
-
-int get_chunk_offsets(
-        std::vector<hsize_t> data_dims,
-        std::vector<hsize_t> chnk_dims,
-        std::vector<std::vector<hsize_t>> &co)
-{
-    TIMEZONE("get_chunk_offsets");
-    std::vector<hsize_t> nchunks(data_dims);
-    int total_number_of_chunks = 1;
-    for (unsigned i=0; i<nchunks.size(); i++)
-    {
-        DEBUG_MSG("get_chunk_offset nchunks[%d] = %ld, chnk_dims[%d] = %ld\n",
-                i, nchunks[i], i, chnk_dims[i]);
-        nchunks[i] = data_dims[i] / chnk_dims[i];
-        total_number_of_chunks *= nchunks[i];
-    }
-    co.resize(total_number_of_chunks);
-    DEBUG_MSG("total number of chunks is %d\n", total_number_of_chunks);
-    for (int cindex=0; cindex < total_number_of_chunks; cindex++)
-    {
-        int cc = cindex;
-        for (unsigned i=0; i<nchunks.size(); i++)
-        {
-            int ii = nchunks.size()-1-i;
-            co[cindex].resize(nchunks.size());
-            co[cindex][ii] = cc % nchunks[ii];
-            cc = (cc - co[cindex][ii]) / nchunks[ii];
-            co[cindex][ii] *= chnk_dims[ii];
-        }
-    }
-    return EXIT_SUCCESS;
-}
-
-template <particle_types particle_type>
-particles_io_base<particle_type>::particles_io_base(
-        const char *NAME,
-        const int TRAJ_SKIP,
-        const hid_t data_file_id,
-        MPI_Comm COMM)
-{
-    TIMEZONE("particles_io_base::particles_io_base");
-    this->name = std::string(NAME);
-    this->traj_skip = TRAJ_SKIP;
-    this->comm = COMM;
-    MPI_Comm_rank(COMM, &this->myrank);
-    MPI_Comm_size(COMM, &this->nprocs);
-
-    if (this->myrank == 0)
-    {
-        hid_t dset, prop_list, dspace;
-        this->hdf5_group_id = H5Gopen(data_file_id, this->name.c_str(), H5P_DEFAULT);
-        dset = H5Dopen(this->hdf5_group_id, "state", H5P_DEFAULT);
-        dspace = H5Dget_space(dset);
-        this->hdf5_state_dims.resize(H5Sget_simple_extent_ndims(dspace));
-        H5Sget_simple_extent_dims(dspace, &this->hdf5_state_dims.front(), NULL);
-        assert(this->hdf5_state_dims[this->hdf5_state_dims.size()-1] == state_dimension(particle_type));
-        this->nparticles = 1;
-        for (unsigned int i=1; i<this->hdf5_state_dims.size()-1; i++)
-            this->nparticles *= this->hdf5_state_dims[i];
-        prop_list = H5Dget_create_plist(dset);
-        this->hdf5_state_chunks.resize(this->hdf5_state_dims.size());
-        H5Pget_chunk(prop_list, this->hdf5_state_dims.size(), &this->hdf5_state_chunks.front());
-        H5Pclose(prop_list);
-        H5Sclose(dspace);
-        H5Dclose(dset);
-        this->chunk_size = 1;
-        for (unsigned int i=1; i<this->hdf5_state_dims.size()-1; i++)
-            this->chunk_size *= this->hdf5_state_chunks[i];
-        dset = H5Dopen(this->hdf5_group_id, "rhs", H5P_DEFAULT);
-        dspace = H5Dget_space(dset);
-        this->hdf5_rhs_dims.resize(H5Sget_simple_extent_ndims(dspace));
-        H5Sget_simple_extent_dims(dspace, &this->hdf5_rhs_dims.front(), NULL);
-        prop_list = H5Dget_create_plist(dset);
-        this->hdf5_rhs_chunks.resize(this->hdf5_rhs_dims.size());
-        H5Pget_chunk(prop_list, this->hdf5_rhs_dims.size(), &this->hdf5_rhs_chunks.front());
-        H5Pclose(prop_list);
-        H5Sclose(dspace);
-        H5Dclose(dset);
-    }
-    DEBUG_MSG("hello, rank 0 just read particle thingie\n");
-
-    int tmp;
-    tmp = this->hdf5_state_dims.size();
-    MPI_Bcast(
-            &tmp,
-            1,
-            MPI_INTEGER,
-            0,
-            this->comm);
-    if (this->myrank != 0)
-    {
-        this->hdf5_state_dims.resize(tmp);
-        this->hdf5_state_chunks.resize(tmp);
-    }
-    DEBUG_MSG("successfully resized state_dims and state_chunks\n");
-    MPI_Bcast(
-            &this->hdf5_state_dims.front(),
-            this->hdf5_state_dims.size(),
-            // hsize_t is in fact unsigned long long. Will this ever change...?
-            MPI_UNSIGNED_LONG_LONG,
-            0,
-            this->comm);
-    MPI_Bcast(
-            &this->hdf5_state_chunks.front(),
-            this->hdf5_state_chunks.size(),
-            MPI_UNSIGNED_LONG_LONG,
-            0,
-            this->comm);
-    DEBUG_MSG("successfully broadcasted state_dims and state_chunks\n");
-    for (unsigned i=0; i<this->hdf5_state_chunks.size(); i++)
-        DEBUG_MSG(
-                "hdf5_state_dims[%d] = %ld, hdf5_state_chunks[%d] = %ld\n",
-                i, this->hdf5_state_dims[i],
-                i, this->hdf5_state_chunks[i]
-                );
-    std::vector<hsize_t> tdims(this->hdf5_state_dims), tchnk(this->hdf5_state_chunks);
-    tdims.erase(tdims.begin()+0);
-    tchnk.erase(tchnk.begin()+0);
-    tdims.erase(tdims.end()-1);
-    tchnk.erase(tchnk.end()-1);
-    DEBUG_MSG("before get_chunk_offsets\n");
-    get_chunk_offsets(tdims, tchnk, this->chunk_offsets);
-    DEBUG_MSG("after get_chunk_offsets\n");
-    MPI_Bcast(
-            &this->chunk_size,
-            1,
-            MPI_UNSIGNED,
-            0,
-            this->comm);
-    MPI_Bcast(
-            &this->nparticles,
-            1,
-            MPI_UNSIGNED,
-            0,
-            this->comm);
-    DEBUG_MSG("nparticles = %d, chunk_size = %d\n",
-            this->nparticles,
-            this->chunk_size);
-    DEBUG_MSG("exiting particles_io_base constructor\n");
-}
-
-template <particle_types particle_type>
-particles_io_base<particle_type>::~particles_io_base()
-{
-    if(this->myrank == 0)
-        H5Gclose(this->hdf5_group_id);
-}
-
-template <particle_types particle_type>
-void particles_io_base<particle_type>::read_state_chunk(
-        const int cindex,
-        double *data)
-{
-    TIMEZONE("particles_io_base::read_state_chunk");
-    DEBUG_MSG("entered read_state_chunk\n");
-    hid_t dset = H5Dopen(this->hdf5_group_id, "state", H5P_DEFAULT);
-    hid_t rspace = H5Dget_space(dset);
-    std::vector<hsize_t> mem_dims(this->hdf5_state_chunks);
-    mem_dims[0] = 1;
-    hid_t mspace = H5Screate_simple(
-            this->hdf5_state_dims.size(),
-            &mem_dims.front(),
-            NULL);
-    hsize_t *offset = new hsize_t[this->hdf5_state_dims.size()];
-    offset[0] = this->iteration / this->traj_skip;
-    for (unsigned int i=1; i<this->hdf5_state_dims.size()-1; i++)
-        offset[i] = this->chunk_offsets[cindex][i-1];
-    offset[this->hdf5_state_dims.size()-1] = 0;
-    H5Sselect_hyperslab(
-            rspace,
-            H5S_SELECT_SET,
-            offset,
-            NULL,
-            &mem_dims.front(),
-            NULL);
-    H5Dread(dset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, data);
-    H5Sclose(mspace);
-    H5Sclose(rspace);
-    H5Dclose(dset);
-    delete[] offset;
-    DEBUG_MSG("exiting read_state_chunk\n");
-}
-
-template <particle_types particle_type>
-void particles_io_base<particle_type>::write_state_chunk(
-        const int cindex,
-        const double *data)
-{
-    TIMEZONE("particles_io_base::write_state_chunk");
-    hid_t dset = H5Dopen(this->hdf5_group_id, "state", H5P_DEFAULT);
-    hid_t rspace = H5Dget_space(dset);
-    std::vector<hsize_t> mem_dims(this->hdf5_state_chunks);
-    mem_dims[0] = 1;
-    hid_t mspace = H5Screate_simple(
-            this->hdf5_state_dims.size(),
-            &mem_dims.front(),
-            NULL);
-    hsize_t *offset = new hsize_t[this->hdf5_state_dims.size()];
-    offset[0] = this->iteration / this->traj_skip;
-    for (unsigned int i=1; i<this->hdf5_state_dims.size()-1; i++)
-        offset[i] = this->chunk_offsets[cindex][i-1];
-    offset[this->hdf5_state_dims.size()-1] = 0;
-    H5Sselect_hyperslab(
-            rspace,
-            H5S_SELECT_SET,
-            offset,
-            NULL,
-            &mem_dims.front(),
-            NULL);
-    H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, data);
-    H5Sclose(mspace);
-    H5Sclose(rspace);
-    H5Dclose(dset);
-    delete[] offset;
-}
-
-template <particle_types particle_type>
-void particles_io_base<particle_type>::read_rhs_chunk(
-        const int cindex,
-        const int rhsindex,
-        double *data)
-{
-    TIMEZONE("particles_io_base::read_rhs_chunk");
-    //DEBUG_MSG("entered read_rhs_chunk\n");
-    hid_t dset = H5Dopen(this->hdf5_group_id, "rhs", H5P_DEFAULT);
-    hid_t rspace = H5Dget_space(dset);
-    std::vector<hsize_t> mem_dims(this->hdf5_rhs_chunks);
-    mem_dims[0] = 1;
-    mem_dims[1] = 1;
-    hid_t mspace = H5Screate_simple(
-            this->hdf5_rhs_dims.size(),
-            &mem_dims.front(),
-            NULL);
-    hsize_t *offset = new hsize_t[this->hdf5_rhs_dims.size()];
-    offset[0] = this->hdf5_rhs_dims[0]-2;
-    offset[1] = rhsindex;
-    for (unsigned int i=2; i<this->hdf5_rhs_dims.size()-1; i++)
-        offset[i] = this->chunk_offsets[cindex][i-2];
-    offset[this->hdf5_rhs_dims.size()-1] = 0;
-    //for (int i=0; i<this->hdf5_rhs_dims.size(); i++)
-    //    DEBUG_MSG("rhs dim %d: size=%d chunk=%d offset=%d\n",
-    //        i, this->hdf5_rhs_dims[i], this->hdf5_rhs_chunks[i], offset[i]);
-    H5Sselect_hyperslab(
-            rspace,
-            H5S_SELECT_SET,
-            offset,
-            NULL,
-            &mem_dims.front(),
-            NULL);
-    //DEBUG_MSG("selected hyperslab\n");
-    H5Dread(dset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, data);
-    //DEBUG_MSG("data has been read\n");
-    H5Sclose(mspace);
-    H5Sclose(rspace);
-    H5Dclose(dset);
-    delete[] offset;
-    //DEBUG_MSG("exiting read_rhs_chunk\n");
-}
-
-template <particle_types particle_type>
-void particles_io_base<particle_type>::write_rhs_chunk(
-        const int cindex,
-        const int rhsindex,
-        const double *data)
-{
-    TIMEZONE("particles_io_base::write_rhs_chunk");
-    hid_t dset = H5Dopen(this->hdf5_group_id, "rhs", H5P_DEFAULT);
-    hid_t rspace = H5Dget_space(dset);
-    std::vector<hsize_t> mem_dims(this->hdf5_rhs_chunks);
-    mem_dims[0] = 1;
-    mem_dims[1] = 1;
-    hid_t mspace = H5Screate_simple(
-            this->hdf5_rhs_dims.size(),
-            &mem_dims.front(),
-            NULL);
-    hsize_t *offset = new hsize_t[this->hdf5_rhs_dims.size()];
-    offset[0] = this->hdf5_rhs_dims[0]-1;
-    offset[1] = rhsindex;
-    for (unsigned int i=2; i<this->hdf5_rhs_dims.size()-1; i++)
-        offset[i] = this->chunk_offsets[cindex][i-2];
-    offset[this->hdf5_rhs_dims.size()-1] = 0;
-    DEBUG_MSG("rhs write offsets are %d %d %d %d\n",
-            offset[0], offset[1], offset[2], offset[3]);
-    H5Sselect_hyperslab(
-            rspace,
-            H5S_SELECT_SET,
-            offset,
-            NULL,
-            &mem_dims.front(),
-            NULL);
-    H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, data);
-    H5Sclose(mspace);
-    H5Sclose(rspace);
-    H5Dclose(dset);
-    delete[] offset;
-}
-
-template <particle_types particle_type>
-void particles_io_base<particle_type>::write_point3D_chunk(
-        const std::string dset_name,
-        const int cindex,
-        const double *data)
-{
-    TIMEZONE("particles_io_base::write_point3D_chunk");
-    hid_t dset = H5Dopen(this->hdf5_group_id, dset_name.c_str(), H5P_DEFAULT);
-    hid_t rspace = H5Dget_space(dset);
-    std::vector<hsize_t> mem_dims(this->hdf5_state_chunks);
-    mem_dims[0] = 1;
-    mem_dims[mem_dims.size()-1] = 3;
-    hid_t mspace = H5Screate_simple(
-            this->hdf5_state_dims.size(),
-            &mem_dims.front(),
-            NULL);
-    hsize_t *offset = new hsize_t[this->hdf5_state_dims.size()];
-    offset[0] = this->iteration / this->traj_skip;
-    for (unsigned int i=1; i<this->hdf5_state_dims.size()-1; i++)
-        offset[i] = this->chunk_offsets[cindex][i-1];
-    offset[this->hdf5_state_dims.size()-1] = 0;
-    H5Sselect_hyperslab(
-            rspace,
-            H5S_SELECT_SET,
-            offset,
-            NULL,
-            &mem_dims.front(),
-            NULL);
-    H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, data);
-    H5Sclose(mspace);
-    H5Sclose(rspace);
-    H5Dclose(dset);
-    delete[] offset;
-}
-
-/*****************************************************************************/
-template class single_particle_state<POINT3D>;
-template class single_particle_state<VELOCITY_TRACER>;
-
-template class particles_io_base<VELOCITY_TRACER>;
-/*****************************************************************************/
-
diff --git a/bfps/cpp/particles_base.hpp b/bfps/cpp/particles_base.hpp
deleted file mode 100644
index 8afd5d439cdc121982868b5eadc991cdc1c5abdb..0000000000000000000000000000000000000000
--- a/bfps/cpp/particles_base.hpp
+++ /dev/null
@@ -1,136 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#include <vector>
-#include <hdf5.h>
-#include <unordered_map>
-#include "interpolator_base.hpp"
-
-#ifndef PARTICLES_BASE
-
-#define PARTICLES_BASE
-
-/* particle types */
-enum particle_types {POINT3D, VELOCITY_TRACER};
-
-/* space dimension */
-constexpr unsigned int state_dimension(particle_types particle_type)
-{
-    return ((particle_type == POINT3D) ? 3 : (
-            (particle_type == VELOCITY_TRACER) ? 3 :
-            3));
-}
-
-/* 1 particle state type */
-
-template <particle_types particle_type>
-class single_particle_state
-{
-    public:
-        double data[state_dimension(particle_type)];
-
-        single_particle_state();
-        single_particle_state(const single_particle_state &src);
-        single_particle_state(const double *src);
-        ~single_particle_state();
-
-        single_particle_state<particle_type> &operator=(const single_particle_state &src);
-        single_particle_state<particle_type> &operator=(const double *src);
-
-        inline double &operator[](const int i)
-        {
-            return this->data[i];
-        }
-};
-
-std::vector<std::vector<hsize_t>> get_chunk_offsets(
-        std::vector<hsize_t> data_dims,
-        std::vector<hsize_t> chnk_dims);
-
-template <particle_types particle_type>
-class particles_io_base
-{
-    protected:
-        int myrank, nprocs;
-        MPI_Comm comm;
-
-        unsigned int nparticles;
-
-        std::string name;
-        unsigned int chunk_size;
-        int traj_skip;
-
-        hid_t hdf5_group_id;
-        std::vector<hsize_t> hdf5_state_dims, hdf5_state_chunks;
-        std::vector<hsize_t> hdf5_rhs_dims, hdf5_rhs_chunks;
-
-        std::vector<std::vector<hsize_t>> chunk_offsets;
-
-        particles_io_base(
-                const char *NAME,
-                const int TRAJ_SKIP,
-                const hid_t data_file_id,
-                MPI_Comm COMM);
-        virtual ~particles_io_base();
-
-        void read_state_chunk(
-                const int cindex,
-                double *__restrict__ data);
-        void write_state_chunk(
-                const int cindex,
-                const double *data);
-        void read_rhs_chunk(
-                const int cindex,
-                const int rhsindex,
-                double *__restrict__ data);
-        void write_rhs_chunk(
-                const int cindex,
-                const int rhsindex,
-                const double *data);
-
-        void write_point3D_chunk(
-                const std::string dset_name,
-                const int cindex,
-                const double *data);
-
-    public:
-        int iteration;
-
-        inline const char *get_name()
-        {
-            return this->name.c_str();
-        }
-        inline const unsigned int get_number_of_chunks()
-        {
-            return this->chunk_offsets.size();
-        }
-        inline const unsigned int get_number_of_rhs_chunks();
-        virtual void read() = 0;
-        virtual void write(const bool write_rhs = true) = 0;
-};
-
-#endif//PARTICLES_BASE
-
diff --git a/bfps/cpp/rFFTW_distributed_particles.cpp b/bfps/cpp/rFFTW_distributed_particles.cpp
deleted file mode 100644
index 265975f8c817a1b40942e076bd016c2921618bbc..0000000000000000000000000000000000000000
--- a/bfps/cpp/rFFTW_distributed_particles.cpp
+++ /dev/null
@@ -1,804 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#define NDEBUG
-
-#include <cmath>
-#include <cassert>
-#include <cstring>
-#include <string>
-#include <sstream>
-#include <set>
-#include <algorithm>
-#include <ctime>
-
-#include "base.hpp"
-#include "rFFTW_distributed_particles.hpp"
-#include "fftw_tools.hpp"
-#include "scope_timer.hpp"
-
-
-extern int myrank, nprocs;
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::rFFTW_distributed_particles(
-        const char *NAME,
-        const hid_t data_file_id,
-        rFFTW_interpolator<rnumber, interp_neighbours> *VEL,
-        const int TRAJ_SKIP,
-        const int INTEGRATION_STEPS) : particles_io_base<particle_type>(
-            NAME,
-            TRAJ_SKIP,
-            data_file_id,
-            VEL->descriptor->comm)
-{
-    TIMEZONE("rFFTW_distributed_particles::rFFTW_distributed_particles");
-    /* check that integration_steps has a valid value.
-     * If NDEBUG is defined, "assert" doesn't do anything.
-     * With NDEBUG defined, and an invalid INTEGRATION_STEPS,
-     * the particles will simply sit still.
-     * */
-    assert((INTEGRATION_STEPS <= 6) &&
-           (INTEGRATION_STEPS >= 1));
-    /* check that the field layout is compatible with this class.
-     * if it's not, the code will fail in bad ways, most likely ending up
-     * with various CPUs locked in some MPI send/receive.
-     * therefore I prefer to just kill the code at this point,
-     * no matter whether or not NDEBUG is present.
-     * */
-    if (interp_neighbours*2+2 > VEL->descriptor->subsizes[0])
-    {
-        DEBUG_MSG("parameters incompatible with rFFTW_distributed_particles.\n"
-                  "interp kernel size is %d, local_z_size is %d\n",
-                  interp_neighbours*2+2, VEL->descriptor->subsizes[0]);
-        if (VEL->descriptor->myrank == 0)
-            std::cerr << "parameters incompatible with rFFTW_distributed_particles." << std::endl;
-        exit(0);
-    }
-    this->vel = VEL;
-    this->rhs.resize(INTEGRATION_STEPS);
-    this->integration_steps = INTEGRATION_STEPS;
-    /* the particles are expected to be evenly distributed among processes.
-     * therefore allocating twice that amount of memory seems enough.
-     * */
-    this->state.reserve(2*this->nparticles / this->nprocs);
-    for (unsigned int i=0; i<this->rhs.size(); i++)
-        this->rhs[i].reserve(2*this->nparticles / this->nprocs);
-
-    /* build communicators and stuff for interpolation */
-
-    /* number of processors per domain */
-    this->domain_nprocs[-1] = 2; // domain in common with lower z CPU
-    this->domain_nprocs[ 0] = 1; // local domain
-    this->domain_nprocs[ 1] = 2; // domain in common with higher z CPU
-
-    /* initialize domain bins */
-    this->domain_particles[-1] = std::unordered_set<int>();
-    this->domain_particles[ 0] = std::unordered_set<int>();
-    this->domain_particles[ 1] = std::unordered_set<int>();
-    this->domain_particles[-1].reserve(unsigned(
-                1.5*(interp_neighbours*2+2)*
-                float(this->nparticles) /
-                this->nprocs));
-    this->domain_particles[ 1].reserve(unsigned(
-                1.5*(interp_neighbours*2+2)*
-                float(this->nparticles) /
-                this->nprocs));
-    this->domain_particles[ 0].reserve(unsigned(
-                1.5*(this->vel->descriptor->subsizes[0] - interp_neighbours*2-2)*
-                float(this->nparticles) /
-                this->nprocs));
-
-    int color, key;
-    MPI_Comm tmpcomm;
-    for (int rank=0; rank<this->nprocs; rank++)
-    {
-        color = MPI_UNDEFINED;
-        key = MPI_UNDEFINED;
-        if (this->myrank == rank)
-        {
-            color = rank;
-            key = 0;
-        }
-        if (this->myrank == MOD(rank + 1, this->nprocs))
-        {
-            color = rank;
-            key = 1;
-        }
-        MPI_Comm_split(this->comm, color, key, &tmpcomm);
-        if (this->myrank == rank)
-            this->domain_comm[ 1] = tmpcomm;
-        if (this->myrank == MOD(rank+1, this->nprocs))
-            this->domain_comm[-1] = tmpcomm;
-
-    }
-
-    /* following code may be useful in the future for the general case */
-    //this->interp_comm.resize(this->vel->descriptor->sizes[0]);
-    //this->interp_nprocs.resize(this->vel->descriptor->sizes[0]);
-    //for (int zg=0; zg<this->vel->descriptor->sizes[0]; zg++)
-    //{
-    //    color = (this->vel->get_rank_info(
-    //                (zg+.5)*this->vel->dz, rminz, rmaxz) ? zg : MPI_UNDEFINED);
-    //    key = zg - this->vel->descriptor->starts[0] + interp_neighbours;
-    //    MPI_Comm_split(this->comm, color, key, &this->interp_comm[zg]);
-    //    if (this->interp_comm[zg] != MPI_COMM_NULL)
-    //        MPI_Comm_size(this->interp_comm[zg], &this->interp_nprocs[zg]);
-    //    else
-    //        this->interp_nprocs[zg] = 0;
-    //}
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::~rFFTW_distributed_particles()
-{
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sample(
-        rFFTW_interpolator<rnumber, interp_neighbours> *field,
-        const std::unordered_map<int, single_particle_state<particle_type>> &x,
-        const std::unordered_map<int, std::unordered_set<int>> &dp,
-        std::unordered_map<int, single_particle_state<POINT3D>> &y)
-{
-    TIMEZONE("rFFTW_distributed_particles::sample");
-    double *yyy;
-    double *yy;
-    y.clear();
-    /* local z domain */
-    yy = new double[3];
-    for (auto p: dp.at(0))
-    {
-        (*field)(x.find(p)->second.data, yy);
-        y[p] = yy;
-    }
-    delete[] yy;
-    /* boundary z domains */
-    int domain_index;
-    for (int rankpair = 0; rankpair < this->nprocs; rankpair++)
-    {
-        if (this->myrank == rankpair)
-            domain_index = 1;
-        if (this->myrank == MOD(rankpair+1, this->nprocs))
-            domain_index = -1;
-        if (this->myrank == rankpair ||
-            this->myrank == MOD(rankpair+1, this->nprocs))
-        {
-            yy = new double[3*dp.at(domain_index).size()];
-            yyy = new double[3*dp.at(domain_index).size()];
-            int tindex;
-            tindex = 0;
-            // can this sorting be done more efficiently?
-            std::vector<int> ordered_dp;
-            {
-                TIMEZONE("rFFTW_distributed_particles::sample::ordered_dp");
-            ordered_dp.reserve(dp.at(domain_index).size());
-            for (auto p: dp.at(domain_index))
-                ordered_dp.push_back(p);
-            //std::set<int> ordered_dp(dp.at(domain_index));
-            std::sort(ordered_dp.begin(), ordered_dp.end());
-            }
-
-            for (auto p: ordered_dp)
-            //for (auto p: dp.at(domain_index))
-            {
-                (*field)(x.at(p).data, yy + tindex*3);
-                tindex++;
-            }
-            {
-                TIMEZONE("rFFTW_distributed_particles::sample::MPI_Allreduce");
-                MPI_Allreduce(
-                    yy,
-                    yyy,
-                    3*dp.at(domain_index).size(),
-                    MPI_DOUBLE,
-                    MPI_SUM,
-                    this->domain_comm[domain_index]);
-            }
-            tindex = 0;
-            for (auto p: ordered_dp)
-            //for (auto p: dp.at(domain_index))
-            {
-                y[p] = yyy + tindex*3;
-                tindex++;
-            }
-            delete[] yy;
-            delete[] yyy;
-        }
-    }
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::get_rhs(
-        const std::unordered_map<int, single_particle_state<particle_type>> &x,
-        const std::unordered_map<int, std::unordered_set<int>> &dp,
-        std::unordered_map<int, single_particle_state<particle_type>> &y)
-{
-    std::unordered_map<int, single_particle_state<POINT3D>> yy;
-    switch(particle_type)
-    {
-        case VELOCITY_TRACER:
-            this->sample(this->vel, x, dp, yy);
-            y.clear();
-            y.reserve(yy.size());
-            y.rehash(this->nparticles);
-            for (auto &pp: yy)
-                y[pp.first] = pp.second.data;
-            break;
-    }
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sample(
-        rFFTW_interpolator<rnumber, interp_neighbours> *field,
-        const char *dset_name)
-{
-    std::unordered_map<int, single_particle_state<POINT3D>> y;
-    this->sample(field, this->state, this->domain_particles, y);
-    this->write(dset_name, y);
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::roll_rhs()
-{
-    for (int i=this->integration_steps-2; i>=0; i--)
-        rhs[i+1] = rhs[i];
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::redistribute(
-        std::unordered_map<int, single_particle_state<particle_type>> &x,
-        std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals,
-        std::unordered_map<int, std::unordered_set<int>> &dp)
-{
-    TIMEZONE("rFFTW_distributed_particles::redistribute");
-    //DEBUG_MSG("entered redistribute\n");
-    /* get new distribution of particles */
-    std::unordered_map<int, std::unordered_set<int>> newdp;
-    {
-        TIMEZONE("sort_into_domains");
-        this->sort_into_domains(x, newdp);
-    }
-    /* take care of particles that are leaving the shared domains */
-    int dindex[2] = {-1, 1};
-    // for each D of the 2 shared domains
-    {
-        TIMEZONE("Loop1");
-        for (int di=0; di<2; di++)
-            // for all particles previously in D
-            for (auto p: dp[dindex[di]])
-            {
-                // if the particle is no longer in D
-                if (newdp[dindex[di]].find(p) == newdp[dindex[di]].end())
-                {
-                    // and the particle is not in the local domain
-                    if (newdp[0].find(p) == newdp[0].end())
-                    {
-                        // remove the particle from the local list
-                        x.erase(p);
-                        for (unsigned int i=0; i<vals.size(); i++)
-                            vals[i].erase(p);
-                    }
-                    // if the particle is in the local domain, do nothing
-                }
-            }
-    }
-    /* take care of particles that are entering the shared domains */
-    /* neighbouring rank offsets */
-    int ro[2];
-    ro[0] = -1;
-    ro[1] = 1;
-    /* particles to send, particles to receive */
-    std::vector<int> ps[2], pr[2];
-    for (int tcounter = 0; tcounter < 2; tcounter++)
-    {
-        ps[tcounter].reserve(newdp[dindex[tcounter]].size());
-    }
-    /* number of particles to send, number of particles to receive */
-    int nps[2], npr[2];
-    int rsrc, rdst;
-    /* get list of id-s to send */
-    {
-        TIMEZONE("Loop2");
-        for (auto &p: dp[0])
-        {
-            for (int di=0; di<2; di++)
-            {
-                if (newdp[dindex[di]].find(p) != newdp[dindex[di]].end())
-                    ps[di].push_back(p);
-            }
-        }
-    }
-    /* prepare data for send recv */
-    for (int i=0; i<2; i++)
-        nps[i] = ps[i].size();
-    for (rsrc = 0; rsrc<this->nprocs; rsrc++)
-        for (int i=0; i<2; i++)
-        {
-            rdst = MOD(rsrc+ro[i], this->nprocs);
-            if (this->myrank == rsrc){
-                TIMEZONE("MPI_Send");
-                MPI_Send(
-                        nps+i,
-                        1,
-                        MPI_INTEGER,
-                        rdst,
-                        2*(rsrc*this->nprocs + rdst)+i,
-                        this->comm);
-            }
-            if (this->myrank == rdst){
-                TIMEZONE("MPI_Recv");
-                MPI_Recv(
-                        npr+1-i,
-                        1,
-                        MPI_INTEGER,
-                        rsrc,
-                        2*(rsrc*this->nprocs + rdst)+i,
-                        this->comm,
-                        MPI_STATUS_IGNORE);
-            }
-        }
-    //DEBUG_MSG("I have to send %d %d particles\n", nps[0], nps[1]);
-    //DEBUG_MSG("I have to recv %d %d particles\n", npr[0], npr[1]);
-    for (int i=0; i<2; i++)
-        pr[i].resize(npr[i]);
-
-    int buffer_size = (nps[0] > nps[1]) ? nps[0] : nps[1];
-    buffer_size = (buffer_size > npr[0])? buffer_size : npr[0];
-    buffer_size = (buffer_size > npr[1])? buffer_size : npr[1];
-    //DEBUG_MSG("buffer size is %d\n", buffer_size);
-    double *buffer = new double[buffer_size*state_dimension(particle_type)*(1+vals.size())];
-    for (rsrc = 0; rsrc<this->nprocs; rsrc++)
-        for (int i=0; i<2; i++)
-        {
-            rdst = MOD(rsrc+ro[i], this->nprocs);
-            if (this->myrank == rsrc && nps[i] > 0)
-            {
-                TIMEZONE("this->myrank == rsrc && nps[i] > 0");
-                MPI_Send(
-                        &ps[i].front(),
-                        nps[i],
-                        MPI_INTEGER,
-                        rdst,
-                        2*(rsrc*this->nprocs + rdst),
-                        this->comm);
-                int pcounter = 0;
-                for (int p: ps[i])
-                {
-                    std::copy(x[p].data,
-                              x[p].data + state_dimension(particle_type),
-                              buffer + pcounter*(1+vals.size())*state_dimension(particle_type));
-                    for (unsigned int tindex=0; tindex<vals.size(); tindex++)
-                    {
-                        std::copy(vals[tindex][p].data,
-                                  vals[tindex][p].data + state_dimension(particle_type),
-                                  buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type));
-                    }
-                    pcounter++;
-                }
-                MPI_Send(
-                        buffer,
-                        nps[i]*(1+vals.size())*state_dimension(particle_type),
-                        MPI_DOUBLE,
-                        rdst,
-                        2*(rsrc*this->nprocs + rdst)+1,
-                        this->comm);
-            }
-            if (this->myrank == rdst && npr[1-i] > 0)
-            {
-                TIMEZONE("this->myrank == rdst && npr[1-i] > 0");
-                MPI_Recv(
-                        &pr[1-i].front(),
-                        npr[1-i],
-                        MPI_INTEGER,
-                        rsrc,
-                        2*(rsrc*this->nprocs + rdst),
-                        this->comm,
-                        MPI_STATUS_IGNORE);
-                MPI_Recv(
-                        buffer,
-                        npr[1-i]*(1+vals.size())*state_dimension(particle_type),
-                        MPI_DOUBLE,
-                        rsrc,
-                        2*(rsrc*this->nprocs + rdst)+1,
-                        this->comm,
-                        MPI_STATUS_IGNORE);
-                int pcounter = 0;
-                for (int p: pr[1-i])
-                {
-                    x[p] = buffer + (pcounter*(1+vals.size()))*state_dimension(particle_type);
-                    newdp[1-i].insert(p);
-                    for (unsigned int tindex=0; tindex<vals.size(); tindex++)
-                    {
-                        vals[tindex][p] = buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type);
-                    }
-                    pcounter++;
-                }
-            }
-        }
-    delete[] buffer;
-    // x has been changed, so newdp is obsolete
-    // we need to sort into domains again
-    {
-        TIMEZONE("sort_into_domains2");
-        this->sort_into_domains(x, dp);
-    }
-
-#ifndef NDEBUG
-    /* check that all particles at x are local */
-    //for (auto &pp: x)
-    //    if (this->vel->get_rank(pp.second.data[2]) != this->myrank)
-    //    {
-    //        DEBUG_MSG("found particle %d with rank %d\n",
-    //                pp.first,
-    //                this->vel->get_rank(pp.second.data[2]));
-    //        assert(false);
-    //    }
-#endif
-    //DEBUG_MSG("exiting redistribute\n");
-}
-
-
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth(
-        const int nsteps)
-{
-    this->get_rhs(this->state, this->domain_particles, this->rhs[0]);
-#define AdamsBashforth_LOOP_PREAMBLE \
-    for (auto &pp: this->state) \
-        for (unsigned int i=0; i<state_dimension(particle_type); i++)
-    switch(nsteps)
-    {
-        case 1:
-            AdamsBashforth_LOOP_PREAMBLE
-            pp.second[i] += this->dt*this->rhs[0][pp.first][i];
-            break;
-        case 2:
-            AdamsBashforth_LOOP_PREAMBLE
-            pp.second[i] += this->dt*(3*this->rhs[0][pp.first][i]
-                                    -   this->rhs[1][pp.first][i])/2;
-            break;
-        case 3:
-            AdamsBashforth_LOOP_PREAMBLE
-            pp.second[i] += this->dt*(23*this->rhs[0][pp.first][i]
-                                    - 16*this->rhs[1][pp.first][i]
-                                    +  5*this->rhs[2][pp.first][i])/12;
-            break;
-        case 4:
-            AdamsBashforth_LOOP_PREAMBLE
-            pp.second[i] += this->dt*(55*this->rhs[0][pp.first][i]
-                                    - 59*this->rhs[1][pp.first][i]
-                                    + 37*this->rhs[2][pp.first][i]
-                                    -  9*this->rhs[3][pp.first][i])/24;
-            break;
-        case 5:
-            AdamsBashforth_LOOP_PREAMBLE
-            pp.second[i] += this->dt*(1901*this->rhs[0][pp.first][i]
-                                    - 2774*this->rhs[1][pp.first][i]
-                                    + 2616*this->rhs[2][pp.first][i]
-                                    - 1274*this->rhs[3][pp.first][i]
-                                    +  251*this->rhs[4][pp.first][i])/720;
-            break;
-        case 6:
-            AdamsBashforth_LOOP_PREAMBLE
-            pp.second[i] += this->dt*(4277*this->rhs[0][pp.first][i]
-                                    - 7923*this->rhs[1][pp.first][i]
-                                    + 9982*this->rhs[2][pp.first][i]
-                                    - 7298*this->rhs[3][pp.first][i]
-                                    + 2877*this->rhs[4][pp.first][i]
-                                    -  475*this->rhs[5][pp.first][i])/1440;
-            break;
-    }
-    this->redistribute(this->state, this->rhs, this->domain_particles);
-    this->roll_rhs();
-}
-
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::step()
-{
-    TIMEZONE("rFFTW_distributed_particles::step");
-    this->AdamsBashforth((this->iteration < this->integration_steps) ?
-                          this->iteration+1 :
-                          this->integration_steps);
-    this->iteration++;
-}
-
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sort_into_domains(
-        const std::unordered_map<int, single_particle_state<particle_type>> &x,
-        std::unordered_map<int, std::unordered_set<int>> &dp)
-{
-    TIMEZONE("rFFTW_distributed_particles::sort_into_domains");
-    int tmpint1, tmpint2;
-    dp.clear();
-    dp[-1] = std::unordered_set<int>();
-    dp[ 0] = std::unordered_set<int>();
-    dp[ 1] = std::unordered_set<int>();
-    dp[-1].reserve(unsigned(
-                1.5*(interp_neighbours*2+2)*
-                float(this->nparticles) /
-                this->nprocs));
-    dp[ 1].reserve(unsigned(
-                1.5*(interp_neighbours*2+2)*
-                float(this->nparticles) /
-                this->nprocs));
-    dp[ 0].reserve(unsigned(
-                1.5*(this->vel->descriptor->subsizes[0] - interp_neighbours*2-2)*
-                float(this->nparticles) /
-                this->nprocs));
-    for (auto &xx: x)
-    {
-        if (this->vel->get_rank_info(xx.second.data[2], tmpint1, tmpint2))
-        {
-            if (tmpint1 == tmpint2)
-                dp[0].insert(xx.first);
-            else
-            {
-                if (this->myrank == tmpint1)
-                    dp[-1].insert(xx.first);
-                else
-                    dp[ 1].insert(xx.first);
-            }
-        }
-    }
-}
-
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::read()
-{
-    TIMEZONE("rFFTW_distributed_particles::read");
-    double *temp = new double[this->chunk_size*state_dimension(particle_type)];
-    int tmpint1, tmpint2;
-    for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
-    {
-        //read state
-        if (this->myrank == 0){
-            TIMEZONE("read_state_chunk");
-            this->read_state_chunk(cindex, temp);
-        }
-        {
-            TIMEZONE("MPI_Bcast");
-            MPI_Bcast(
-                temp,
-                this->chunk_size*state_dimension(particle_type),
-                MPI_DOUBLE,
-                0,
-                this->comm);
-        }
-        for (unsigned int p=0; p<this->chunk_size; p++)
-        {
-            if (this->vel->get_rank_info(temp[state_dimension(particle_type)*p+2], tmpint1, tmpint2))
-            {
-                this->state[p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p;
-            }
-        }
-        //read rhs
-        if (this->iteration > 0){
-            TIMEZONE("this->iteration > 0");
-            for (int i=0; i<this->integration_steps; i++)
-            {
-                if (this->myrank == 0){
-                    TIMEZONE("read_rhs_chunk");
-                    this->read_rhs_chunk(cindex, i, temp);
-                }
-                {
-                    TIMEZONE("MPI_Bcast");
-                    MPI_Bcast(
-                        temp,
-                        this->chunk_size*state_dimension(particle_type),
-                        MPI_DOUBLE,
-                        0,
-                        this->comm);
-                }
-                for (unsigned int p=0; p<this->chunk_size; p++)
-                {
-                    auto pp = this->state.find(p+cindex*this->chunk_size);
-                    if (pp != this->state.end())
-                        this->rhs[i][p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p;
-                }
-            }
-        }
-    }
-    this->sort_into_domains(this->state, this->domain_particles);
-    DEBUG_MSG("%s->state.size = %ld\n", this->name.c_str(), this->state.size());
-    for (int domain=-1; domain<=1; domain++)
-    {
-        DEBUG_MSG("domain %d nparticles = %ld\n", domain, this->domain_particles[domain].size());
-    }
-    delete[] temp;
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::write(
-        const char *dset_name,
-        std::unordered_map<int, single_particle_state<POINT3D>> &y)
-{
-    TIMEZONE("rFFTW_distributed_particles::write");
-    double *data = new double[this->chunk_size*3];
-    double *yy = new double[this->chunk_size*3];
-    //int pindex = 0;
-   for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
-    {
-        std::fill_n(yy, this->chunk_size*3, 0);
-        //for (unsigned int p=0; p<this->chunk_size; p++, pindex++)
-        //{
-        //    if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() ||
-        //        this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end())
-        //    {
-        //        std::copy(y[pindex].data,
-        //                  y[pindex].data + 3,
-        //                  yy + p*3);
-        //    }
-        //}
-        for (int s = -1; s <= 0; s++)
-             for (auto &pp: this->domain_particles[s])
-             {
-                 if (pp >= int(cindex*this->chunk_size) &&
-                     pp < int((cindex+1)*this->chunk_size))
-                {
-                    std::copy(y[pp].data,
-                              y[pp].data + 3,
-                              yy + (pp-cindex*this->chunk_size)*3);
-                }
-             }
-        {
-            TIMEZONE("MPI_Allreduce");
-            MPI_Allreduce(
-                yy,
-                data,
-                3*this->chunk_size,
-                MPI_DOUBLE,
-                MPI_SUM,
-                this->comm);
-        }
-        if (this->myrank == 0){
-            TIMEZONE("write_point3D_chunk");
-            this->write_point3D_chunk(dset_name, cindex, data);
-        }
-    }
-    delete[] yy;
-    delete[] data;
-}
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::write(
-        const bool write_rhs)
-{
-    TIMEZONE("rFFTW_distributed_particles::write2");
-    double *temp0 = new double[this->chunk_size*state_dimension(particle_type)];
-    double *temp1 = new double[this->chunk_size*state_dimension(particle_type)];
-    //int pindex = 0;
-    for (unsigned int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
-    {
-        //write state
-        std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0);
-        //pindex = cindex*this->chunk_size;
-        //for (unsigned int p=0; p<this->chunk_size; p++, pindex++)
-        //{
-        //    if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() ||
-        //        this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end())
-        //    {
-        //        TIMEZONE("std::copy");
-        //        std::copy(this->state[pindex].data,
-        //                  this->state[pindex].data + state_dimension(particle_type),
-        //                  temp0 + p*state_dimension(particle_type));
-        //    }
-        //}
-        for (int s = -1; s <= 0; s++)
-             for (auto &pp: this->domain_particles[s])
-             {
-                 if (pp >= int(cindex*this->chunk_size) &&
-                     pp < int((cindex+1)*this->chunk_size))
-                {
-                    std::copy(this->state[pp].data,
-                              this->state[pp].data + state_dimension(particle_type),
-                              temp0 + (pp-cindex*this->chunk_size)*state_dimension(particle_type));
-                }
-             }
-        {
-            TIMEZONE("MPI_Allreduce");
-            MPI_Allreduce(
-                    temp0,
-                    temp1,
-                    state_dimension(particle_type)*this->chunk_size,
-                    MPI_DOUBLE,
-                    MPI_SUM,
-                    this->comm);
-        }
-        if (this->myrank == 0){
-            TIMEZONE("write_state_chunk");
-            this->write_state_chunk(cindex, temp1);
-        }
-        //write rhs
-        if (write_rhs){
-            TIMEZONE("write_rhs");
-            for (int i=0; i<this->integration_steps; i++)
-            {
-                std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0);
-                //pindex = cindex*this->chunk_size;
-                //for (unsigned int p=0; p<this->chunk_size; p++, pindex++)
-                //{
-                //    if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() ||
-                //        this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end())
-                //    {
-                //        TIMEZONE("std::copy");
-                //        std::copy(this->rhs[i][pindex].data,
-                //                  this->rhs[i][pindex].data + state_dimension(particle_type),
-                //                  temp0 + p*state_dimension(particle_type));
-                //    }
-                //}
-                for (int s = -1; s <= 0; s++)
-                     for (auto &pp: this->domain_particles[s])
-                     {
-                         if (pp >= int(cindex*this->chunk_size) &&
-                             pp < int((cindex+1)*this->chunk_size))
-                        {
-                            std::copy(this->rhs[i][pp].data,
-                                      this->rhs[i][pp].data + state_dimension(particle_type),
-                                      temp0 + (pp-cindex*this->chunk_size)*state_dimension(particle_type));
-                        }
-                     }
-                {
-                    TIMEZONE("MPI_Allreduce");
-                    MPI_Allreduce(
-                        temp0,
-                        temp1,
-                        state_dimension(particle_type)*this->chunk_size,
-                        MPI_DOUBLE,
-                        MPI_SUM,
-                        this->comm);
-                }
-                if (this->myrank == 0){
-                    TIMEZONE("write_rhs_chunk");
-                    this->write_rhs_chunk(cindex, i, temp1);
-                }
-            }
-        }
-    }
-    delete[] temp0;
-    delete[] temp1;
-}
-
-
-/*****************************************************************************/
-template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 1>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 2>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 3>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 4>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 5>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 6>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 1>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 2>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 3>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 4>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 5>;
-template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 6>;
-/*****************************************************************************/
-
diff --git a/bfps/cpp/rFFTW_distributed_particles.hpp b/bfps/cpp/rFFTW_distributed_particles.hpp
deleted file mode 100644
index 400411d5f1fd6e597714be494a72272a76e01206..0000000000000000000000000000000000000000
--- a/bfps/cpp/rFFTW_distributed_particles.hpp
+++ /dev/null
@@ -1,144 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <iostream>
-#include <unordered_map>
-#include <unordered_set>
-#include <vector>
-#include <hdf5.h>
-#include "base.hpp"
-#include "particles_base.hpp"
-#include "fluid_solver_base.hpp"
-#include "rFFTW_interpolator.hpp"
-
-#ifndef RFFTW_DISTRIBUTED_PARTICLES
-
-#define RFFTW_DISTRIBUTED_PARTICLES
-
-template <particle_types particle_type, class rnumber, int interp_neighbours>
-class rFFTW_distributed_particles: public particles_io_base<particle_type>
-{
-    private:
-        // a "domain" corresponds to a region in 3D real space where a fixed set
-        // of MPI processes are required to participate in the interpolation
-        // formula (i.e. they all contain required information).
-        // we need to know how many processes there are for each of the domains
-        // to which the local process belongs.
-        std::unordered_map<int, int> domain_nprocs;
-        // each domain has an associated communicator, and we keep a list of the
-        // communicators to which the local process belongs
-        std::unordered_map<int, MPI_Comm> domain_comm;
-        // for each domain, we need a list of the IDs of the particles located
-        // in that domain
-        std::unordered_map<int, std::unordered_set<int>> domain_particles;
-
-        // for each domain, we need the state of each particle
-        std::unordered_map<int, single_particle_state<particle_type>> state;
-        // for each domain, we also need the last few values of the right hand
-        // side of the ODE, since we use Adams-Bashforth integration
-        std::vector<std::unordered_map<int, single_particle_state<particle_type>>> rhs;
-
-    public:
-        int integration_steps;
-        // this class only works with rFFTW interpolator
-        rFFTW_interpolator<rnumber, interp_neighbours> *vel;
-
-        /* simulation parameters */
-        double dt;
-
-        /* methods */
-
-        /* constructor and destructor.
-         * allocate and deallocate:
-         *  this->state
-         *  this->rhs
-         * */
-        rFFTW_distributed_particles(
-                const char *NAME,
-                const hid_t data_file_id,
-                rFFTW_interpolator<rnumber, interp_neighbours> *FIELD,
-                const int TRAJ_SKIP,
-                const int INTEGRATION_STEPS = 2);
-        ~rFFTW_distributed_particles();
-
-        void sample(
-                rFFTW_interpolator<rnumber, interp_neighbours> *field,
-                const char *dset_name);
-        void sample(
-                rFFTW_interpolator<rnumber, interp_neighbours> *field,
-                const std::unordered_map<int, single_particle_state<particle_type>> &x,
-                const std::unordered_map<int, std::unordered_set<int>> &dp,
-                std::unordered_map<int, single_particle_state<POINT3D>> &y);
-        void get_rhs(
-                const std::unordered_map<int, single_particle_state<particle_type>> &x,
-                const std::unordered_map<int, std::unordered_set<int>> &dp,
-                std::unordered_map<int, single_particle_state<particle_type>> &y);
-
-
-        /* given a list of particle positions,
-         * figure out which go into what local domain, and construct the relevant
-         * map of ID lists "dp" (for domain particles).
-         * */
-        void sort_into_domains(
-                const std::unordered_map<int, single_particle_state<particle_type>> &x,
-                std::unordered_map<int, std::unordered_set<int>> &dp);
-        /* suppose the particles are currently badly distributed, and some
-         * arbitrary quantities (stored in "vals") are associated to the particles,
-         * and we need to properly distribute them among processes.
-         * that's what this function does.
-         * In practice it's only used to redistribute the rhs values (and it
-         * automatically redistributes the state x being passed).
-         * Some more comments are present in the .cpp file, but, in brief: the
-         * particles are simply moved from one domain to another.
-         * If it turns out that the new domain contains a process which does not
-         * know about a particle, that information is sent from the closest process.
-         * */
-        void redistribute(
-                std::unordered_map<int, single_particle_state<particle_type>> &x,
-                std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals,
-                std::unordered_map<int, std::unordered_set<int>> &dp);
-
-
-        /* input/output */
-        void read();
-        void write(
-                const char *dset_name,
-                std::unordered_map<int, single_particle_state<POINT3D>> &y);
-        void write(
-                const char *dset_name,
-                std::unordered_map<int, single_particle_state<particle_type>> &y);
-        void write(const bool write_rhs = true);
-
-        /* solvers */
-        void step();
-        void roll_rhs();
-        void AdamsBashforth(const int nsteps);
-};
-
-#endif//RFFTW_DISTRIBUTED_PARTICLES
-
diff --git a/bfps/cpp/rFFTW_interpolator.cpp b/bfps/cpp/rFFTW_interpolator.cpp
deleted file mode 100644
index b8b21e8811d7f5286dc4edd00833c205539ea89c..0000000000000000000000000000000000000000
--- a/bfps/cpp/rFFTW_interpolator.cpp
+++ /dev/null
@@ -1,210 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#define NDEBUG
-
-#include <cmath>
-#include "rFFTW_interpolator.hpp"
-#include "scope_timer.hpp"
-
-template <class rnumber, int interp_neighbours>
-rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator(
-        fluid_solver_base<rnumber> *fs,
-        base_polynomial_values BETA_POLYS,
-        rnumber *FIELD_POINTER) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS)
-{
-    this->field = FIELD_POINTER;
-
-
-    // generate compute array
-    this->compute = new bool[this->descriptor->sizes[0]];
-    std::fill_n(this->compute, this->descriptor->sizes[0], false);
-    for (int iz = this->descriptor->starts[0]-interp_neighbours-1;
-            iz <= this->descriptor->starts[0]+this->descriptor->subsizes[0]+interp_neighbours;
-            iz++)
-        this->compute[((iz + this->descriptor->sizes[0]) % this->descriptor->sizes[0])] = true;
-}
-
-template <class rnumber, int interp_neighbours>
-rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator(
-        vorticity_equation<rnumber, FFTW> *fs,
-        base_polynomial_values BETA_POLYS,
-        rnumber *FIELD_POINTER) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS)
-{
-//    this->field = FIELD_POINTER;
-//
-//
-//    // generate compute array
-//    this->compute = new bool[this->descriptor->sizes[0]];
-//    std::fill_n(this->compute, this->descriptor->sizes[0], false);
-//    for (int iz = this->descriptor->starts[0]-interp_neighbours-1;
-//            iz <= this->descriptor->starts[0]+this->descriptor->subsizes[0]+interp_neighbours;
-//            iz++)
-//        this->compute[((iz + this->descriptor->sizes[0]) % this->descriptor->sizes[0])] = true;
-}
-
-template <class rnumber, int interp_neighbours>
-rFFTW_interpolator<rnumber, interp_neighbours>::~rFFTW_interpolator()
-{
-    delete[] this->compute;
-}
-
-template <class rnumber, int interp_neighbours>
-bool rFFTW_interpolator<rnumber, interp_neighbours>::get_rank_info(double z, int &maxz_rank, int &minz_rank)
-{
-    int zg = int(floor(z/this->dz));
-    minz_rank = this->descriptor->rank[MOD(
-             zg - interp_neighbours,
-            this->descriptor->sizes[0])];
-    maxz_rank = this->descriptor->rank[MOD(
-            zg + 1 + interp_neighbours,
-            this->descriptor->sizes[0])];
-    bool is_here = false;
-    for (int iz = -interp_neighbours; iz <= interp_neighbours+1; iz++)
-        is_here = (is_here ||
-                   (this->descriptor->myrank ==
-                    this->descriptor->rank[MOD(zg+iz, this->descriptor->sizes[0])]));
-    return is_here;
-}
-
-template <class rnumber, int interp_neighbours>
-void rFFTW_interpolator<rnumber, interp_neighbours>::sample(
-        const int nparticles,
-        const int pdimension,
-        const double *__restrict__ x,
-        double *__restrict__ y,
-        const int *deriv)
-{
-    TIMEZONE("rFFTW_interpolator::sample");
-    /* get grid coordinates */
-    int *xg = new int[3*nparticles];
-    double *xx = new double[3*nparticles];
-    double *yy =  new double[3*nparticles];
-    std::fill_n(yy, 3*nparticles, 0.0);
-    this->get_grid_coordinates(nparticles, pdimension, x, xg, xx);
-    /* perform interpolation */
-    for (int p=0; p<nparticles; p++)
-        if (this->compute[xg[p*3+2]])
-            this->operator()(xg + p*3, xx + p*3, yy + p*3, deriv);
-    MPI_Allreduce(
-            yy,
-            y,
-            3*nparticles,
-            MPI_DOUBLE,
-            MPI_SUM,
-            this->descriptor->comm);
-    delete[] yy;
-    delete[] xg;
-    delete[] xx;
-}
-
-template <class rnumber, int interp_neighbours>
-void rFFTW_interpolator<rnumber, interp_neighbours>::operator()(
-        const int *xg,
-        const double *xx,
-        double *dest,
-        const int *deriv)
-{
-    TIMEZONE("rFFTW_interpolator::operator()");
-    double bx[interp_neighbours*2+2], by[interp_neighbours*2+2], bz[interp_neighbours*2+2];
-    /* please note that the polynomials in z are computed for all the different
-     * iz values, independently of whether or not "myrank" will perform the
-     * computation for all the different iz slices.
-     * I don't know how big a deal this really is, but it is something that we can
-     * optimize.
-     * */
-    if (deriv == NULL)
-    {
-        this->compute_beta(0, xx[0], bx);
-        this->compute_beta(0, xx[1], by);
-        this->compute_beta(0, xx[2], bz);
-    }
-    else
-    {
-        this->compute_beta(deriv[0], xx[0], bx);
-        this->compute_beta(deriv[1], xx[1], by);
-        this->compute_beta(deriv[2], xx[2], bz);
-    }
-    std::fill_n(dest, 3, 0);
-    ptrdiff_t bigiz, bigiy, bigix;
-    // loop over the 2*interp_neighbours + 2 z slices
-    for (int iz = -interp_neighbours; iz <= interp_neighbours+1; iz++)
-    {
-        // bigiz is the z index of the cell containing the particles
-        // this->descriptor->sizes[0] is added before taking the modulo
-        // because we want to be sure that "bigiz" is a positive number.
-        // I'm no longer sure why I don't use the MOD function here.
-        bigiz = ptrdiff_t(((xg[2]+iz) + this->descriptor->sizes[0]) % this->descriptor->sizes[0]);
-        // once we know bigiz, we know whether "myrank" has the relevant slice.
-        // if not, go to next value of bigiz
-        if (this->descriptor->myrank == this->descriptor->rank[bigiz])
-        {
-            for (int iy = -interp_neighbours; iy <= interp_neighbours+1; iy++)
-            {
-                // bigiy is the y index of the cell
-                // since we have all the y indices in myrank, we can safely use the
-                // modulo value
-                bigiy = ptrdiff_t(MOD(xg[1]+iy, this->descriptor->sizes[1]));
-                for (int ix = -interp_neighbours; ix <= interp_neighbours+1; ix++)
-                {
-                    // bigix is the x index of the cell
-                    bigix = ptrdiff_t(MOD(xg[0]+ix, this->descriptor->sizes[2]));
-                    // here we create the index to the current grid node
-                    // note the removal of local_z_start from bigiz.
-                    ptrdiff_t tindex = (((bigiz-this->descriptor->starts[0])*this->descriptor->sizes[1] +
-                                         bigiy)*(this->descriptor->sizes[2]+2) +
-                                         bigix)*3;
-                    for (int c=0; c<3; c++)
-                        dest[c] += this->field[tindex+c]*(bz[iz+interp_neighbours]*
-                                                          by[iy+interp_neighbours]*
-                                                          bx[ix+interp_neighbours]);
-                }
-            }
-        }
-    }
-}
-
-template class rFFTW_interpolator<float, 1>;
-template class rFFTW_interpolator<float, 2>;
-template class rFFTW_interpolator<float, 3>;
-template class rFFTW_interpolator<float, 4>;
-template class rFFTW_interpolator<float, 5>;
-template class rFFTW_interpolator<float, 6>;
-template class rFFTW_interpolator<float, 7>;
-template class rFFTW_interpolator<float, 8>;
-template class rFFTW_interpolator<float, 9>;
-template class rFFTW_interpolator<float, 10>;
-template class rFFTW_interpolator<double, 1>;
-template class rFFTW_interpolator<double, 2>;
-template class rFFTW_interpolator<double, 3>;
-template class rFFTW_interpolator<double, 4>;
-template class rFFTW_interpolator<double, 5>;
-template class rFFTW_interpolator<double, 6>;
-template class rFFTW_interpolator<double, 7>;
-template class rFFTW_interpolator<double, 8>;
-template class rFFTW_interpolator<double, 9>;
-template class rFFTW_interpolator<double, 10>;
-
diff --git a/bfps/cpp/rFFTW_interpolator.hpp b/bfps/cpp/rFFTW_interpolator.hpp
deleted file mode 100644
index 5088be8b2f3094fd96332af0c923d7cc905e4f3f..0000000000000000000000000000000000000000
--- a/bfps/cpp/rFFTW_interpolator.hpp
+++ /dev/null
@@ -1,118 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#include "field_descriptor.hpp"
-#include "fftw_tools.hpp"
-#include "fluid_solver_base.hpp"
-#include "vorticity_equation.hpp"
-#include "interpolator_base.hpp"
-
-#ifndef RFFTW_INTERPOLATOR
-
-#define RFFTW_INTERPOLATOR
-
-template <class rnumber, int interp_neighbours>
-class rFFTW_interpolator:public interpolator_base<rnumber, interp_neighbours>
-{
-    public:
-        using interpolator_base<rnumber, interp_neighbours>::operator();
-
-        /* pointer to field that has to be interpolated
-         * The reason this is a member variable is because I want this class
-         * to be consistent with the "interpolator" class, where a member
-         * variable is absolutely required (since that class uses padding).
-         * */
-        rnumber *field;
-
-        /* compute[iz] is an array that says whether or not the current MPI
-         * process is involved in the interpolation formula for a particle
-         * located in cell "iz".
-         * It is mostly used in the formula itself.
-         * This translates as the following condition:
-         * local_zstart - neighbours <= iz <= local_zend + 1 + neighbours
-         * I think it's cleaner to keep things in an array, especially since
-         * "local_zend" is shorthand for another arithmetic operation anyway.
-         * */
-        bool *compute;
-
-
-        /* Constructors */
-        rFFTW_interpolator(
-                fluid_solver_base<rnumber> *FSOLVER,
-                base_polynomial_values BETA_POLYS,
-                rnumber *FIELD_DATA);
-
-        /* this constructor is empty, I just needed for a quick hack of the
-         * "vorticity_equation" class.
-         * It should be removed soon.
-         * */
-        rFFTW_interpolator(
-                vorticity_equation<rnumber, FFTW> *FSOLVER,
-                base_polynomial_values BETA_POLYS,
-                rnumber *FIELD_DATA);
-        ~rFFTW_interpolator();
-
-        /* This method is provided for consistency with "interpolator", and it
-         * does not destroy input */
-        inline int read_rFFTW(const void *src)
-        {
-            this->field = (rnumber*)src;
-            return EXIT_SUCCESS;
-        }
-
-        /* This is used when "compute" is not enough.
-         * For a given z location, it gives the outermost ranks that are relevant
-         * for the interpolation formula.
-         * */
-        bool get_rank_info(double z, int &maxz_rank, int &minz_rank);
-
-        /* interpolate field at an array of locations.
-         * After interpolation is performed, call Allreduce for "y", over
-         * this->descriptor->comm --- generally MPI_COMM_WORLD.
-         * This is useful for the simple "particles" class, where particle
-         * information is synchronized across all processes.
-         * */
-        void sample(
-                const int nparticles,
-                const int pdimension,
-                const double *__restrict__ x,
-                double *__restrict__ y,
-                const int *deriv = NULL);
-        /* interpolate 1 point.
-         * Result is kept local.
-         * This is used in the "rFFTW_distributed_particles" class, with the
-         * result being synchronized across the relevant "local particle
-         * communicator".
-         * */
-        void operator()(
-                const int *__restrict__ xg,
-                const double *__restrict__ xx,
-                double *__restrict__ dest,
-                const int *deriv = NULL);
-};
-
-#endif//RFFTW_INTERPOLATOR
-
diff --git a/bfps/cpp/slab_field_particles.cpp b/bfps/cpp/slab_field_particles.cpp
deleted file mode 100644
index 15fa363f6d277d34c6081fd545c4578e1f735929..0000000000000000000000000000000000000000
--- a/bfps/cpp/slab_field_particles.cpp
+++ /dev/null
@@ -1,799 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#define NDEBUG
-
-
-#include <cmath>
-#include <cassert>
-#include <cstring>
-#include <string>
-#include <sstream>
-
-#include "base.hpp"
-#include "slab_field_particles.hpp"
-#include "fftw_tools.hpp"
-
-
-extern int myrank, nprocs;
-
-template <class rnumber>
-slab_field_particles<rnumber>::slab_field_particles(
-        const char *NAME,
-        fluid_solver_base<rnumber> *FSOLVER,
-        const int NPARTICLES,
-        const int NCOMPONENTS,
-        base_polynomial_values BETA_POLYS,
-        const int INTERP_NEIGHBOURS,
-        const int TRAJ_SKIP,
-        const int INTEGRATION_STEPS)
-{
-    assert((NCOMPONENTS % 3) == 0);
-    assert((INTERP_NEIGHBOURS >= 1) ||
-           (INTERP_NEIGHBOURS <= 8));
-    assert((INTEGRATION_STEPS <= 6) &&
-           (INTEGRATION_STEPS >= 1));
-    strncpy(this->name, NAME, 256);
-    this->fs = FSOLVER;
-    this->nparticles = NPARTICLES;
-    this->ncomponents = NCOMPONENTS;
-    this->integration_steps = INTEGRATION_STEPS;
-    this->interp_neighbours = INTERP_NEIGHBOURS;
-    this->traj_skip = TRAJ_SKIP;
-    this->compute_beta = BETA_POLYS;
-    // in principle only the buffer width at the top needs the +1,
-    // but things are simpler if buffer_width is the same
-    this->buffer_width = this->interp_neighbours+1;
-    this->buffer_size = this->buffer_width*this->fs->rd->slice_size;
-    this->array_size = this->nparticles * this->ncomponents;
-    this->state = fftw_alloc_real(this->array_size);
-    std::fill_n(this->state, this->array_size, 0.0);
-    for (int i=0; i < this->integration_steps; i++)
-    {
-        this->rhs[i] = fftw_alloc_real(this->array_size);
-        std::fill_n(this->rhs[i], this->array_size, 0.0);
-    }
-    this->watching = new bool[this->fs->rd->nprocs*nparticles];
-    std::fill_n(this->watching, this->fs->rd->nprocs*this->nparticles, false);
-    this->computing = new int[nparticles];
-
-    int tdims[4];
-    tdims[0] = this->buffer_width*2*this->fs->rd->nprocs + this->fs->rd->sizes[0];
-    tdims[1] = this->fs->rd->sizes[1];
-    tdims[2] = this->fs->rd->sizes[2];
-    tdims[3] = this->fs->rd->sizes[3];
-    this->buffered_field_descriptor = new field_descriptor<rnumber>(
-            4, tdims,
-            this->fs->rd->mpi_dtype,
-            this->fs->rd->comm);
-
-    // compute dx, dy, dz;
-    this->dx = 4*acos(0) / (this->fs->dkx*this->fs->rd->sizes[2]);
-    this->dy = 4*acos(0) / (this->fs->dky*this->fs->rd->sizes[1]);
-    this->dz = 4*acos(0) / (this->fs->dkz*this->fs->rd->sizes[0]);
-
-    // compute lower and upper bounds
-    this->lbound = new double[nprocs];
-    this->ubound = new double[nprocs];
-    double *tbound = new double[nprocs];
-    std::fill_n(tbound, nprocs, 0.0);
-    tbound[this->fs->rd->myrank] = this->fs->rd->starts[0]*this->dz;
-    MPI_Allreduce(
-            tbound,
-            this->lbound,
-            nprocs,
-            MPI_DOUBLE,
-            MPI_SUM,
-            this->fs->rd->comm);
-    std::fill_n(tbound, nprocs, 0.0);
-    tbound[this->fs->rd->myrank] = (this->fs->rd->starts[0] + this->fs->rd->subsizes[0])*this->dz;
-    MPI_Allreduce(
-            tbound,
-            this->ubound,
-            nprocs,
-            MPI_DOUBLE,
-            MPI_SUM,
-            this->fs->rd->comm);
-    delete[] tbound;
-    //for (int r = 0; r<nprocs; r++)
-    //    DEBUG_MSG(
-    //            "lbound[%d] = %lg, ubound[%d] = %lg\n",
-    //            r, this->lbound[r],
-    //            r, this->ubound[r]
-    //            );
-}
-
-template <class rnumber>
-slab_field_particles<rnumber>::~slab_field_particles()
-{
-    delete[] this->computing;
-    delete[] this->watching;
-    fftw_free(this->state);
-    for (int i=0; i < this->integration_steps; i++)
-    {
-        fftw_free(this->rhs[i]);
-    }
-    delete[] this->lbound;
-    delete[] this->ubound;
-    delete this->buffered_field_descriptor;
-}
-
-template <class rnumber>
-void slab_field_particles<rnumber>::get_rhs(double *x, double *y)
-{
-    std::fill_n(y, this->array_size, 0.0);
-}
-
-template <class rnumber>
-void slab_field_particles<rnumber>::jump_estimate(double *dest)
-{
-    std::fill_n(dest, this->nparticles, 0.0);
-}
-
-template <class rnumber>
-int slab_field_particles<rnumber>::get_rank(double z)
-{
-    int tmp = this->fs->rd->rank[MOD(int(floor(z/this->dz)), this->fs->rd->sizes[0])];
-    assert(tmp >= 0 && tmp < this->fs->rd->nprocs);
-    return tmp;
-}
-
-template <class rnumber>
-void slab_field_particles<rnumber>::synchronize_single_particle_state(int p, double *x, int source)
-{
-    if (source == -1) source = this->computing[p];
-    if (this->watching[this->fs->rd->myrank*this->nparticles+p]) for (int r=0; r<this->fs->rd->nprocs; r++)
-        if (r != source &&
-            this->watching[r*this->nparticles+p])
-        {
-            //DEBUG_MSG("synchronizing state %d from %d to %d\n", p, this->computing[p], r);
-            if (this->fs->rd->myrank == source)
-                MPI_Send(
-                        x+p*this->ncomponents,
-                        this->ncomponents,
-                        MPI_DOUBLE,
-                        r,
-                        p+this->computing[p]*this->nparticles,
-                        this->fs->rd->comm);
-            if (this->fs->rd->myrank == r)
-                MPI_Recv(
-                        x+p*this->ncomponents,
-                        this->ncomponents,
-                        MPI_DOUBLE,
-                        source,
-                        p+this->computing[p]*this->nparticles,
-                        this->fs->rd->comm,
-                        MPI_STATUS_IGNORE);
-        }
-}
-
-template <class rnumber>
-void slab_field_particles<rnumber>::synchronize()
-{
-    double *tstate = fftw_alloc_real(this->array_size);
-    // first, synchronize state and jump across CPUs
-    std::fill_n(tstate, this->array_size, 0.0);
-    for (int p=0; p<this->nparticles; p++)
-    {
-        //if (this->watching[this->fs->rd->myrank*this->nparticles + p])
-        //DEBUG_MSG(
-        //        "in synchronize, position for particle %d is %g %g %g\n",
-        //        p,
-        //        this->state[p*this->ncomponents],
-        //        this->state[p*this->ncomponents+1],
-        //        this->state[p*this->ncomponents+2]);
-        if (this->fs->rd->myrank == this->computing[p])
-            std::copy(this->state + p*this->ncomponents,
-                      this->state + (p+1)*this->ncomponents,
-                      tstate + p*this->ncomponents);
-    }
-    MPI_Allreduce(
-            tstate,
-            this->state,
-            this->array_size,
-            MPI_DOUBLE,
-            MPI_SUM,
-            this->fs->rd->comm);
-    if (this->integration_steps >= 1)
-    {
-        for (int i=0; i<this->integration_steps; i++)
-        {
-            std::fill_n(tstate, this->array_size, 0.0);
-            for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
-                std::copy(this->rhs[i] + p*this->ncomponents,
-                          this->rhs[i] + (p+1)*this->ncomponents,
-                          tstate + p*this->ncomponents);
-            std::fill_n(this->rhs[i], this->array_size, 0.0);
-            MPI_Allreduce(
-                    tstate,
-                    this->rhs[i],
-                    this->array_size,
-                    MPI_DOUBLE,
-                    MPI_SUM,
-                    this->fs->rd->comm);
-        }
-    }
-    fftw_free(tstate);
-    // assignment of particles
-    for (int p=0; p<this->nparticles; p++)
-    {
-        this->computing[p] = this->get_rank(this->state[p*this->ncomponents + 2]);
-        //DEBUG_MSG("synchronizing particles, particle %d computing is %d\n", p, this->computing[p]);
-    }
-    double *jump = fftw_alloc_real(this->nparticles);
-    this->jump_estimate(jump);
-    // now, see who needs to watch
-    bool *local_watching = new bool[this->fs->rd->nprocs*this->nparticles];
-    std::fill_n(local_watching, this->fs->rd->nprocs*this->nparticles, false);
-    for (int p=0; p<this->nparticles; p++)
-        if (this->fs->rd->myrank == this->computing[p])
-        {
-            local_watching[this->get_rank(this->state[this->ncomponents*p+2]        )*this->nparticles+p] = true;
-            local_watching[this->get_rank(this->state[this->ncomponents*p+2]-jump[p])*this->nparticles+p] = true;
-            local_watching[this->get_rank(this->state[this->ncomponents*p+2]+jump[p])*this->nparticles+p] = true;
-        }
-    fftw_free(jump);
-    MPI_Allreduce(
-            local_watching,
-            this->watching,
-            this->nparticles*this->fs->rd->nprocs,
-            MPI_C_BOOL,
-            MPI_LOR,
-            this->fs->rd->comm);
-    delete[] local_watching;
-    for (int p=0; p<this->nparticles; p++)
-        DEBUG_MSG("watching = %d for particle %d\n", this->watching[this->fs->rd->myrank*nparticles+p], p);
-}
-
-
-
-template <class rnumber>
-void slab_field_particles<rnumber>::roll_rhs()
-{
-    for (int i=this->integration_steps-2; i>=0; i--)
-        std::copy(this->rhs[i],
-                  this->rhs[i] + this->array_size,
-                  this->rhs[i+1]);
-}
-
-
-
-template <class rnumber>
-void slab_field_particles<rnumber>::AdamsBashforth(int nsteps)
-{
-    ptrdiff_t ii;
-    this->get_rhs(this->state, this->rhs[0]);
-    //if (myrank == 0)
-    //{
-    //    DEBUG_MSG(
-    //            "in AdamsBashforth for particles %s, integration_steps = %d, nsteps = %d, iteration = %d\n",
-    //            this->name,
-    //            this->integration_steps,
-    //            nsteps,
-    //            this->iteration);
-    //    std::stringstream tstring;
-    //    for (int p=0; p<this->nparticles; p++)
-    //        tstring << " " << this->computing[p];
-    //    DEBUG_MSG("%s\n", tstring.str().c_str());
-    //    for (int i=0; i<this->integration_steps; i++)
-    //    {
-    //        std::stringstream tstring;
-    //        for (int p=0; p<this->nparticles; p++)
-    //            tstring << " " << this->rhs[i][p*3];
-    //        DEBUG_MSG("%s\n", tstring.str().c_str());
-    //    }
-    //}
-    switch(nsteps)
-    {
-        case 1:
-            for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
-                for (int i=0; i<this->ncomponents; i++)
-                {
-                    ii = p*this->ncomponents+i;
-                    this->state[ii] += this->dt*this->rhs[0][ii];
-                }
-            break;
-        case 2:
-            for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
-                for (int i=0; i<this->ncomponents; i++)
-                {
-                    ii = p*this->ncomponents+i;
-                    this->state[ii] += this->dt*(3*this->rhs[0][ii]
-                                               -   this->rhs[1][ii])/2;
-                }
-            break;
-        case 3:
-            for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
-                for (int i=0; i<this->ncomponents; i++)
-                {
-                    ii = p*this->ncomponents+i;
-                    this->state[ii] += this->dt*(23*this->rhs[0][ii]
-                                               - 16*this->rhs[1][ii]
-                                               +  5*this->rhs[2][ii])/12;
-                }
-            break;
-        case 4:
-            for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
-                for (int i=0; i<this->ncomponents; i++)
-                {
-                    ii = p*this->ncomponents+i;
-                    this->state[ii] += this->dt*(55*this->rhs[0][ii]
-                                               - 59*this->rhs[1][ii]
-                                               + 37*this->rhs[2][ii]
-                                               -  9*this->rhs[3][ii])/24;
-                }
-            break;
-        case 5:
-            for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
-                for (int i=0; i<this->ncomponents; i++)
-                {
-                    ii = p*this->ncomponents+i;
-                    this->state[ii] += this->dt*(1901*this->rhs[0][ii]
-                                               - 2774*this->rhs[1][ii]
-                                               + 2616*this->rhs[2][ii]
-                                               - 1274*this->rhs[3][ii]
-                                               +  251*this->rhs[4][ii])/720;
-                }
-            break;
-        case 6:
-            for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
-                for (int i=0; i<this->ncomponents; i++)
-                {
-                    ii = p*this->ncomponents+i;
-                    this->state[ii] += this->dt*(4277*this->rhs[0][ii]
-                                               - 7923*this->rhs[1][ii]
-                                               + 9982*this->rhs[2][ii]
-                                               - 7298*this->rhs[3][ii]
-                                               + 2877*this->rhs[4][ii]
-                                               -  475*this->rhs[5][ii])/1440;
-                }
-            break;
-    }
-    this->roll_rhs();
-}
-
-
-template <class rnumber>
-void slab_field_particles<rnumber>::step()
-{
-    this->AdamsBashforth((this->iteration < this->integration_steps) ? this->iteration+1 : this->integration_steps);
-    //this->cRK4();
-    this->iteration++;
-    this->synchronize();
-}
-
-
-template <class rnumber>
-void slab_field_particles<rnumber>::Euler()
-{
-    double *y = fftw_alloc_real(this->array_size);
-    this->get_rhs(this->state, y);
-    for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
-    {
-        for (int i=0; i<this->ncomponents; i++)
-            this->state[p*this->ncomponents+i] += this->dt*y[p*this->ncomponents+i];
-        //DEBUG_MSG(
-        //        "particle %d state is %lg %lg %lg\n",
-        //        p, this->state[p*this->ncomponents], this->state[p*this->ncomponents+1], this->state[p*this->ncomponents+2]);
-    }
-    fftw_free(y);
-}
-
-
-template <class rnumber>
-void slab_field_particles<rnumber>::Heun()
-{
-    double *y = new double[this->array_size];
-    double dtfactor[] = {0.0, this->dt};
-    this->get_rhs(this->state, this->rhs[0]);
-    for (int p=0; p<this->nparticles; p++)
-    {
-        this->synchronize_single_particle_state(p, this->rhs[0]);
-        //int crank = this->get_rank(this->state[p*3 + 2]);
-        //DEBUG_MSG(
-        //        "k 0 iteration %d particle is %d, crank is %d, computing rank is %d, position is %g %g %g, rhs is %g %g %g\n",
-        //        this->iteration, p,
-        //        crank, this->computing[p],
-        //        this->state[p*3], this->state[p*3+1], this->state[p*3+2],
-        //        this->rhs[0][p*3], this->rhs[0][p*3+1], this->rhs[0][p*3+2]);
-    }
-    for (int kindex = 1; kindex < 2; kindex++)
-    {
-        for (int p=0; p<this->nparticles; p++)
-        {
-            if (this->watching[this->fs->rd->myrank*this->nparticles+p])
-                for (int i=0; i<this->ncomponents; i++)
-                {
-                    ptrdiff_t tindex = ptrdiff_t(p)*this->ncomponents + i;
-                    y[tindex] = this->state[tindex] + dtfactor[kindex]*this->rhs[kindex-1][tindex];
-                }
-        }
-        for (int p=0; p<this->nparticles; p++)
-            this->synchronize_single_particle_state(p, y);
-        this->get_rhs(y, this->rhs[kindex]);
-        for (int p=0; p<this->nparticles; p++)
-        {
-            this->synchronize_single_particle_state(p, this->rhs[kindex]);
-        DEBUG_MSG(
-                "k %d iteration %d particle is %d, position is %g %g %g, rhs is %g %g %g\n",
-                kindex, this->iteration, p,
-                y[p*3], y[p*3+1], y[p*3+2],
-                this->rhs[kindex][p*3], this->rhs[kindex][p*3+1], this->rhs[kindex][p*3+2]);
-        }
-    }
-    for (int p=0; p<this->nparticles; p++)
-    {
-        if (this->watching[this->fs->rd->myrank*this->nparticles+p])
-        {
-            for (int i=0; i<this->ncomponents; i++)
-            {
-                ptrdiff_t tindex = ptrdiff_t(p)*this->ncomponents + i;
-                this->state[tindex] += this->dt*(this->rhs[0][tindex] + this->rhs[1][tindex])/2;
-            }
-            //int crank = this->get_rank(this->state[p*3 + 2]);
-            //if (crank != this->computing[p])
-            //    DEBUG_MSG(
-            //            "k _ iteration %d particle is %d, crank is %d, computing rank is %d, position is %g %g %g\n",
-            //            this->iteration, p,
-            //            crank, this->computing[p],
-            //            this->state[p*3], this->state[p*3+1], this->state[p*3+2]);
-        }
-    }
-    delete[] y;
-    DEBUG_MSG("exiting Heun\n");
-}
-
-
-template <class rnumber>
-void slab_field_particles<rnumber>::cRK4()
-{
-    double *y = new double[this->array_size];
-    double dtfactor[] = {0.0, this->dt/2, this->dt/2, this->dt};
-    this->get_rhs(this->state, this->rhs[0]);
-    for (int p=0; p<this->nparticles; p++)
-        this->synchronize_single_particle_state(p, this->rhs[0]);
-    for (int kindex = 1; kindex < 4; kindex++)
-    {
-        for (int p=0; p<this->nparticles; p++)
-        {
-            if (this->watching[this->fs->rd->myrank*this->nparticles+p])
-                for (int i=0; i<this->ncomponents; i++)
-                {
-                    ptrdiff_t tindex = ptrdiff_t(p)*this->ncomponents + i;
-                    y[tindex] = this->state[tindex] + dtfactor[kindex]*this->rhs[kindex-1][tindex];
-                }
-        }
-        for (int p=0; p<this->nparticles; p++)
-            this->synchronize_single_particle_state(p, y);
-        this->get_rhs(y, this->rhs[kindex]);
-        for (int p=0; p<this->nparticles; p++)
-            this->synchronize_single_particle_state(p, this->rhs[kindex]);
-    }
-    for (int p=0; p<this->nparticles; p++)
-    {
-        if (this->watching[this->fs->rd->myrank*this->nparticles+p])
-            for (int i=0; i<this->ncomponents; i++)
-            {
-                ptrdiff_t tindex = ptrdiff_t(p)*this->ncomponents + i;
-                this->state[tindex] += this->dt*(this->rhs[0][tindex] +
-                                              2*(this->rhs[1][tindex] + this->rhs[2][tindex]) +
-                                                 this->rhs[3][tindex])/6;
-            }
-    }
-    delete[] y;
-}
-
-template <class rnumber>
-void slab_field_particles<rnumber>::get_grid_coordinates(double *x, int *xg, double *xx)
-{
-    static double grid_size[] = {this->dx, this->dy, this->dz};
-    double tval;
-    std::fill_n(xg, this->nparticles*3, 0);
-    std::fill_n(xx, this->nparticles*3, 0.0);
-    for (int p=0; p<this->nparticles; p++) if (this->watching[this->fs->rd->myrank*this->nparticles+p])
-    {
-        for (int c=0; c<3; c++)
-        {
-            tval = floor(x[p*this->ncomponents+c]/grid_size[c]);
-            xg[p*3+c] = MOD(int(tval), this->fs->rd->sizes[2-c]);
-            xx[p*3+c] = (x[p*this->ncomponents+c] - tval*grid_size[c]) / grid_size[c];
-        }
-        xg[p*3+2] -= this->fs->rd->starts[0];
-        if (this->fs->rd->myrank == this->fs->rd->rank[0] &&
-            xg[p*3+2] > this->fs->rd->subsizes[0])
-            xg[p*3+2] -= this->fs->rd->sizes[0];
-        //DEBUG_MSG(
-        //        "particle %d x is %lg %lg %lg xx is %lg %lg %lg xg is %d %d %d\n",
-        //        p,
-        //         x[p*3],  x[p*3+1],  x[p*3+2],
-        //        xx[p*3], xx[p*3+1], xx[p*3+2],
-        //        xg[p*3], xg[p*3+1], xg[p*3+2]);
-    }
-}
-
-template <class rnumber>
-void slab_field_particles<rnumber>::interpolation_formula(rnumber *field, int *xg, double *xx, double *dest, int *deriv)
-{
-    double bx[this->interp_neighbours*2+2], by[this->interp_neighbours*2+2], bz[this->interp_neighbours*2+2];
-    this->compute_beta(deriv[0], xx[0], bx);
-    this->compute_beta(deriv[1], xx[1], by);
-    this->compute_beta(deriv[2], xx[2], bz);
-    //DEBUG_MSG("computed beta polynomials\n");
-    std::fill_n(dest, 3, 0);
-    for (int iz = -this->interp_neighbours; iz <= this->interp_neighbours+1; iz++)
-    for (int iy = -this->interp_neighbours; iy <= this->interp_neighbours+1; iy++)
-    for (int ix = -this->interp_neighbours; ix <= this->interp_neighbours+1; ix++)
-        for (int c=0; c<3; c++)
-        {
-            //DEBUG_MSG(
-            //        "%d %d %d %d %d %d %d %ld %ld\n",
-            //        xg[2], xg[1], xg[0], iz, iy, ix, c,
-            //        ((ptrdiff_t(xg[2]+iz) *this->fs->rd->subsizes[1] +
-            //          ptrdiff_t(xg[1]+iy))*this->fs->rd->subsizes[2] +
-            //          ptrdiff_t(xg[0]+ix))*3+c,
-            //        this->buffered_field_descriptor->local_size
-            //        );
-            dest[c] += field[((ptrdiff_t(    xg[2]+iz                         ) *this->fs->rd->subsizes[1] +
-                               ptrdiff_t(MOD(xg[1]+iy, this->fs->rd->sizes[1])))*this->fs->rd->subsizes[2] +
-                               ptrdiff_t(MOD(xg[0]+ix, this->fs->rd->sizes[2])))*3+c]*(bz[iz+this->interp_neighbours]*
-                                                                                       by[iy+this->interp_neighbours]*
-                                                                                       bx[ix+this->interp_neighbours]);
-        }
-}
-
-template <class rnumber>
-void slab_field_particles<rnumber>::linear_interpolation(rnumber *field, int *xg, double *xx, double *dest, int *deriv)
-{
-    //ptrdiff_t tindex, tmp;
-    //tindex = ((ptrdiff_t(xg[2]  )*this->fs->rd->subsizes[1]+xg[1]  )*this->fs->rd->subsizes[2]+xg[0]  )*3;
-    //tmp = ptrdiff_t(xg[2]);
-    //DEBUG_MSG(
-    //        "linear interpolation xx is %lg %lg %lg xg is %d %d %d,"
-    //        " corner index is ((%ld*%d+%d)*%d+%d)*3 = %ld\n",
-    //        xx[0], xx[1], xx[2],
-    //        xg[0], xg[1], xg[2],
-    //        tmp, this->fs->rd->subsizes[1], xg[1], this->fs->rd->subsizes[2], xg[0],
-    //        tindex);
-    for (int c=0; c<3; c++)
-        dest[c] = (field[((ptrdiff_t(xg[2]  )*this->fs->rd->subsizes[1]+xg[1]  )*this->fs->rd->subsizes[2]+xg[0]  )*3+c]*((1-xx[0])*(1-xx[1])*(1-xx[2])) +
-                   field[((ptrdiff_t(xg[2]  )*this->fs->rd->subsizes[1]+xg[1]  )*this->fs->rd->subsizes[2]+xg[0]+1)*3+c]*((  xx[0])*(1-xx[1])*(1-xx[2])) +
-                   field[((ptrdiff_t(xg[2]  )*this->fs->rd->subsizes[1]+xg[1]+1)*this->fs->rd->subsizes[2]+xg[0]  )*3+c]*((1-xx[0])*(  xx[1])*(1-xx[2])) +
-                   field[((ptrdiff_t(xg[2]  )*this->fs->rd->subsizes[1]+xg[1]+1)*this->fs->rd->subsizes[2]+xg[0]+1)*3+c]*((  xx[0])*(  xx[1])*(1-xx[2])) +
-                   field[((ptrdiff_t(xg[2]+1)*this->fs->rd->subsizes[1]+xg[1]  )*this->fs->rd->subsizes[2]+xg[0]  )*3+c]*((1-xx[0])*(1-xx[1])*(  xx[2])) +
-                   field[((ptrdiff_t(xg[2]+1)*this->fs->rd->subsizes[1]+xg[1]  )*this->fs->rd->subsizes[2]+xg[0]+1)*3+c]*((  xx[0])*(1-xx[1])*(  xx[2])) +
-                   field[((ptrdiff_t(xg[2]+1)*this->fs->rd->subsizes[1]+xg[1]+1)*this->fs->rd->subsizes[2]+xg[0]  )*3+c]*((1-xx[0])*(  xx[1])*(  xx[2])) +
-                   field[((ptrdiff_t(xg[2]+1)*this->fs->rd->subsizes[1]+xg[1]+1)*this->fs->rd->subsizes[2]+xg[0]+1)*3+c]*((  xx[0])*(  xx[1])*(  xx[2])));
-}
-
-template <class rnumber>
-void slab_field_particles<rnumber>::read(hid_t data_file_id)
-{
-    //DEBUG_MSG("aloha\n");
-    if (this->fs->rd->myrank == 0)
-    {
-        std::string temp_string = (std::string("/particles/") +
-                                   std::string(this->name) +
-                                   std::string("/state"));
-        hid_t Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT);
-        hid_t mspace, rspace;
-        hsize_t count[4], offset[4];
-        rspace = H5Dget_space(Cdset);
-        H5Sget_simple_extent_dims(rspace, count, NULL);
-        count[0] = 1;
-        offset[0] = this->iteration / this->traj_skip;
-        offset[1] = 0;
-        offset[2] = 0;
-        mspace = H5Screate_simple(3, count, NULL);
-        H5Sselect_hyperslab(rspace, H5S_SELECT_SET, offset, NULL, count, NULL);
-        H5Dread(Cdset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, this->state);
-        H5Sclose(mspace);
-        H5Sclose(rspace);
-        H5Dclose(Cdset);
-        if (this->iteration > 0)
-        {
-            temp_string = (std::string("/particles/") +
-                           std::string(this->name) +
-                           std::string("/rhs"));
-            Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT);
-            rspace = H5Dget_space(Cdset);
-            H5Sget_simple_extent_dims(rspace, count, NULL);
-            //reading from last available position
-            offset[0] = count[0] - 1;
-            offset[3] = 0;
-            count[0] = 1;
-            count[1] = 1;
-            mspace = H5Screate_simple(4, count, NULL);
-            for (int i=0; i<this->integration_steps; i++)
-            {
-                offset[1] = i;
-                H5Sselect_hyperslab(rspace, H5S_SELECT_SET, offset, NULL, count, NULL);
-                H5Dread(Cdset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, this->rhs[i]);
-            }
-            H5Sclose(mspace);
-            H5Sclose(rspace);
-            H5Dclose(Cdset);
-        }
-    }
-    MPI_Bcast(
-            this->state,
-            this->array_size,
-            MPI_DOUBLE,
-            0,
-            this->fs->rd->comm);
-    for (int i = 0; i<this->integration_steps; i++)
-    {
-        MPI_Bcast(
-                this->rhs[i],
-                this->array_size,
-                MPI_DOUBLE,
-                0,
-                this->fs->rd->comm);
-    }
-    // initial assignment of particles
-    for (int p=0; p<this->nparticles; p++)
-    {
-        this->computing[p] = this->get_rank(this->state[p*this->ncomponents + 2]);
-        //DEBUG_MSG("reading particles, particle %d computing is %d\n", p, this->computing[p]);
-    }
-    // now actual synchronization
-    this->synchronize();
-}
-
-template <class rnumber>
-void slab_field_particles<rnumber>::write(hid_t data_file_id, bool write_rhs)
-{
-    if (this->fs->rd->myrank == 0)
-    {
-        std::string temp_string = (std::string("/particles/") +
-                                   std::string(this->name) +
-                                   std::string("/state"));
-        hid_t Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT);
-        hid_t mspace, wspace;
-        hsize_t count[4], offset[4];
-        wspace = H5Dget_space(Cdset);
-        H5Sget_simple_extent_dims(wspace, count, NULL);
-        count[0] = 1;
-        offset[0] = this->iteration / this->traj_skip;
-        offset[1] = 0;
-        offset[2] = 0;
-        mspace = H5Screate_simple(3, count, NULL);
-        H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
-        H5Dwrite(Cdset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, this->state);
-        H5Sclose(mspace);
-        H5Sclose(wspace);
-        H5Dclose(Cdset);
-        if (write_rhs)
-        {
-            temp_string = (std::string("/particles/") +
-                           std::string(this->name) +
-                           std::string("/rhs"));
-            Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT);
-            wspace = H5Dget_space(Cdset);
-            H5Sget_simple_extent_dims(wspace, count, NULL);
-            //writing to last available position
-            offset[0] = count[0] - 1;
-            count[0] = 1;
-            count[1] = 1;
-            offset[3] = 0;
-            mspace = H5Screate_simple(4, count, NULL);
-            for (int i=0; i<this->integration_steps; i++)
-            {
-                offset[1] = i;
-                H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
-                H5Dwrite(Cdset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, this->rhs[i]);
-            }
-            H5Sclose(mspace);
-            H5Sclose(wspace);
-            H5Dclose(Cdset);
-        }
-    }
-}
-
-
-
-/*****************************************************************************/
-/* macro for specializations to numeric types compatible with FFTW           */
-#define SLAB_FIELD_PARTICLES_DEFINITIONS(FFTW, R, MPI_RNUM) \
- \
-template <> \
-void slab_field_particles<R>::rFFTW_to_buffered(R *src, R *dst) \
-{ \
-    /* do big copy of middle stuff */ \
-    std::copy(src, \
-              src + this->fs->rd->local_size, \
-              dst + this->buffer_size); \
-    int rsrc; \
-    /* get upper slices */ \
-    for (int rdst = 0; rdst < this->fs->rd->nprocs; rdst++) \
-    { \
-        rsrc = this->fs->rd->rank[(this->fs->rd->all_start0[rdst] + \
-                                   this->fs->rd->all_size0[rdst]) % \
-                                   this->fs->rd->sizes[0]]; \
-        if (this->fs->rd->myrank == rsrc) \
-            MPI_Send( \
-                    (void*)(src), \
-                    this->buffer_size, \
-                    MPI_RNUM, \
-                    rdst, \
-                    2*(rsrc*this->fs->rd->nprocs + rdst), \
-                    this->fs->rd->comm); \
-        if (this->fs->rd->myrank == rdst) \
-            MPI_Recv( \
-                    (void*)(dst + this->buffer_size + this->fs->rd->local_size), \
-                    this->buffer_size, \
-                    MPI_RNUM, \
-                    rsrc, \
-                    2*(rsrc*this->fs->rd->nprocs + rdst), \
-                    this->fs->rd->comm, \
-                    MPI_STATUS_IGNORE); \
-    } \
-    /* get lower slices */ \
-    for (int rdst = 0; rdst < this->fs->rd->nprocs; rdst++) \
-    { \
-        rsrc = this->fs->rd->rank[MOD(this->fs->rd->all_start0[rdst] - 1, \
-                                      this->fs->rd->sizes[0])]; \
-        if (this->fs->rd->myrank == rsrc) \
-            MPI_Send( \
-                    (void*)(src + this->fs->rd->local_size - this->buffer_size), \
-                    this->buffer_size, \
-                    MPI_RNUM, \
-                    rdst, \
-                    2*(rsrc*this->fs->rd->nprocs + rdst)+1, \
-                    this->fs->rd->comm); \
-        if (this->fs->rd->myrank == rdst) \
-            MPI_Recv( \
-                    (void*)(dst), \
-                    this->buffer_size, \
-                    MPI_RNUM, \
-                    rsrc, \
-                    2*(rsrc*this->fs->rd->nprocs + rdst)+1, \
-                    this->fs->rd->comm, \
-                    MPI_STATUS_IGNORE); \
-    } \
-} \
-/*****************************************************************************/
-
-
-
-/*****************************************************************************/
-/* now actually use the macro defined above                                  */
-SLAB_FIELD_PARTICLES_DEFINITIONS(
-        FFTW_MANGLE_FLOAT,
-        float,
-        MPI_FLOAT)
-SLAB_FIELD_PARTICLES_DEFINITIONS(
-        FFTW_MANGLE_DOUBLE,
-        double,
-        MPI_DOUBLE)
-/*****************************************************************************/
-
-
-
-/*****************************************************************************/
-/* finally, force generation of code for single precision                    */
-template class slab_field_particles<float>;
-template class slab_field_particles<double>;
-/*****************************************************************************/
diff --git a/bfps/cpp/slab_field_particles.hpp b/bfps/cpp/slab_field_particles.hpp
deleted file mode 100644
index 15f9477bbfb680be17390447ce88bc40cd7471e2..0000000000000000000000000000000000000000
--- a/bfps/cpp/slab_field_particles.hpp
+++ /dev/null
@@ -1,149 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <iostream>
-#include <hdf5.h>
-#include "base.hpp"
-#include "fluid_solver_base.hpp"
-#include "interpolator.hpp"
-
-#ifndef SLAB_FIELD_PARTICLES
-
-#define SLAB_FIELD_PARTICLES
-
-extern int myrank, nprocs;
-
-template <class rnumber>
-class slab_field_particles
-{
-    protected:
-        //typedef void (slab_field_particles<rnumber>::*tensor_product_interpolation_formula)(
-        //        rnumber *field,
-        //        int *xg,
-        //        double *xx,
-        //        double *dest,
-        //        int *deriv);
-    public:
-        fluid_solver_base<rnumber> *fs;
-        field_descriptor<rnumber> *buffered_field_descriptor;
-
-        /* watching is an array of shape [nparticles], with
-         * watching[p] being true if particle p is in the domain of myrank
-         * or in the buffer regions.
-         * watching is not really being used right now, since I don't do partial
-         * synchronizations of particles.
-         * we may do this at some point in the future, if it seems needed...
-         * */
-        bool *watching;
-        /* computing is an array of shape [nparticles], with
-         * computing[p] being the rank that is currently working on particle p
-         * */
-        int *computing;
-
-        /* state will generally hold all the information about the particles.
-         * in the beginning, we will only need to solve 3D ODEs, but I figured
-         * a general ncomponents is better, since we may change our minds.
-         * */
-        double *state;
-        double *rhs[6];
-        int nparticles;
-        int ncomponents;
-        int array_size;
-        int interp_neighbours;
-        int buffer_width;
-        int integration_steps;
-        int traj_skip;
-        ptrdiff_t buffer_size;
-        double *lbound;
-        double *ubound;
-        //tensor_product_interpolation_formula spline_formula;
-        base_polynomial_values compute_beta;
-
-        /* simulation parameters */
-        char name[256];
-        int iteration;
-        double dt;
-
-        /* physical parameters of field */
-        rnumber dx, dy, dz;
-
-        /* methods */
-
-        /* constructor and destructor.
-         * allocate and deallocate:
-         *  this->state
-         *  this->lbound
-         *  this->ubound
-         *  this->watching
-         * */
-        slab_field_particles(
-                const char *NAME,
-                fluid_solver_base<rnumber> *FSOLVER,
-                const int NPARTICLES,
-                const int NCOMPONENTS,
-                base_polynomial_values BETA_POLYS,
-                const int INTERP_NEIGHBOURS,
-                const int TRAJ_SKIP,
-                const int INTEGRATION_STEPS = 2);
-        ~slab_field_particles();
-
-        /* an Euler step is needed to compute an estimate of future positions,
-         * which is needed for synchronization.
-         * */
-        virtual void jump_estimate(double *jump_length);
-        /* function get_rhs is virtual since we want children to do different things,
-         * depending on the type of particle.
-         * */
-        virtual void get_rhs(double *x, double *rhs);
-
-        /* generic methods, should work for all children of this class */
-        int get_rank(double z); // get rank for given value of z
-        void synchronize();
-        void synchronize_single_particle_state(int p, double *x, int source_id = -1);
-        void get_grid_coordinates(double *x, int *xg, double *xx);
-        void linear_interpolation(rnumber *field, int *xg, double *xx, double *dest, int *deriv);
-        void interpolation_formula(rnumber *field, int *xg, double *xx, double *dest, int *deriv);
-
-        void rFFTW_to_buffered(rnumber *src, rnumber *dst);
-
-        /* generic methods, should work for all children of this class */
-        void read(hid_t data_file_id);
-        void write(hid_t data_file_id, bool write_rhs = true);
-
-        /* solver stuff */
-        void step();
-        void roll_rhs();
-        void AdamsBashforth(int nsteps);
-        void Euler();
-        void Heun();
-        void cRK4();
-};
-
-
-#endif//SLAB_FIELD_PARTICLES
-
diff --git a/bfps/cpp/spline.hpp b/bfps/cpp/spline.hpp
deleted file mode 100644
index d66d2b1eb42278b987072ffff24d0123c86a1e2f..0000000000000000000000000000000000000000
--- a/bfps/cpp/spline.hpp
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef SPLINE_HPP
-#define SPLINE_HPP
-
-#include "spline_n1.hpp"
-#include "spline_n2.hpp"
-#include "spline_n3.hpp"
-#include "spline_n4.hpp"
-#include "spline_n5.hpp"
-#include "spline_n6.hpp"
-#include "spline_n7.hpp"
-#include "spline_n8.hpp"
-#include "spline_n9.hpp"
-#include "spline_n10.hpp"
-
-#endif
diff --git a/bfps/cpp/tracers.cpp b/bfps/cpp/tracers.cpp
deleted file mode 100644
index 3d9fbfb6a1e357d70452466b6cc901659444539d..0000000000000000000000000000000000000000
--- a/bfps/cpp/tracers.cpp
+++ /dev/null
@@ -1,204 +0,0 @@
-/**********************************************************************
-*                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
-*                 for Dynamics and Self-Organization                  *
-*                                                                     *
-*  This file is part of bfps.                                         *
-*                                                                     *
-*  bfps is free software: you can redistribute it and/or modify       *
-*  it under the terms of the GNU General Public License as published  *
-*  by the Free Software Foundation, either version 3 of the License,  *
-*  or (at your option) any later version.                             *
-*                                                                     *
-*  bfps is distributed in the hope that it will be useful,            *
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
-*  GNU General Public License for more details.                       *
-*                                                                     *
-*  You should have received a copy of the GNU General Public License  *
-*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
-*                                                                     *
-* Contact: Cristian.Lalescu@ds.mpg.de                                 *
-*                                                                     *
-**********************************************************************/
-
-
-
-#define NDEBUG
-
-
-#include <cmath>
-#include "base.hpp"
-#include "fftw_tools.hpp"
-#include "tracers.hpp"
-
-template <class rnumber>
-void tracers<rnumber>::jump_estimate(double *jump)
-{
-    int deriv[] = {0, 0, 0};
-    int *xg = new int[this->array_size];
-    double *xx = new double[this->array_size];
-    rnumber *vel = this->data + this->buffer_size;
-    double tmp[3];
-    /* get grid coordinates */
-    this->get_grid_coordinates(this->state, xg, xx);
-
-    /* perform interpolation */
-    for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p])
-    {
-        this->interpolation_formula(vel, xg + p*3, xx + p*3, tmp, deriv);
-        jump[p] = fabs(3*this->dt * tmp[2]);
-        if (jump[p] < this->dz*1.01)
-            jump[p] = this->dz*1.01;
-    }
-    delete[] xg;
-    delete[] xx;
-}
-
-template <class rnumber>
-void tracers<rnumber>::get_rhs(double *x, double *y)
-{
-    std::fill_n(y, this->array_size, 0.0);
-    int deriv[] = {0, 0, 0};
-    /* get grid coordinates */
-    int *xg = new int[this->array_size];
-    double *xx = new double[this->array_size];
-    rnumber *vel = this->data + this->buffer_size;
-    this->get_grid_coordinates(x, xg, xx);
-    //DEBUG_MSG(
-    //        "position is %g %g %g, grid_coords are %d %d %d %g %g %g\n",
-    //        x[0], x[1], x[2],
-    //        xg[0], xg[1], xg[2],
-    //        xx[0], xx[1], xx[2]);
-    /* perform interpolation */
-    for (int p=0; p<this->nparticles; p++)
-    {
-        if (this->watching[this->fs->rd->myrank*this->nparticles+p])
-        {
-            int crank = this->get_rank(x[p*3 + 2]);
-            if (this->fs->rd->myrank == crank)
-            {
-                this->interpolation_formula(vel, xg + p*3, xx + p*3, y + p*3, deriv);
-            DEBUG_MSG(
-                    "position is %g %g %g %d %d %d %g %g %g, result is %g %g %g\n",
-                    x[p*3], x[p*3+1], x[p*3+2],
-                    xg[p*3], xg[p*3+1], xg[p*3+2],
-                    xx[p*3], xx[p*3+1], xx[p*3+2],
-                    y[p*3], y[p*3+1], y[p*3+2]);
-            }
-            if (crank != this->computing[p])
-            {
-                this->synchronize_single_particle_state(p, y, crank);
-            }
-            //DEBUG_MSG(
-            //        "after synch crank is %d, computing rank is %d, position is %g %g %g, result is %g %g %g\n",
-            //        this->iteration, p,
-            //        crank, this->computing[p],
-            //        x[p*3], x[p*3+1], x[p*3+2],
-            //        y[p*3], y[p*3+1], y[p*3+2]);
-        }
-    }
-    delete[] xg;
-    delete[] xx;
-}
-
-template<class rnumber>
-void tracers<rnumber>::update_field(bool clip_on)
-{
-    if (clip_on)
-        clip_zero_padding<rnumber>(this->fs->rd, this->source_data, 3);
-    this->rFFTW_to_buffered(this->source_data, this->data);
-}
-
-/*****************************************************************************/
-/* macro for specializations to numeric types compatible with FFTW           */
-
-#define TRACERS_DEFINITIONS(FFTW, R, MPI_RNUM, MPI_CNUM) \
- \
-template <> \
-tracers<R>::tracers( \
-                const char *NAME, \
-                fluid_solver_base<R> *FSOLVER, \
-                const int NPARTICLES, \
-                base_polynomial_values BETA_POLYS, \
-                const int NEIGHBOURS, \
-                const int TRAJ_SKIP, \
-                const int INTEGRATION_STEPS, \
-                R *SOURCE_DATA) : slab_field_particles<R>( \
-                    NAME, \
-                    FSOLVER, \
-                    NPARTICLES, \
-                    3, \
-                    BETA_POLYS, \
-                    NEIGHBOURS, \
-                    TRAJ_SKIP, \
-                    INTEGRATION_STEPS) \
-{ \
-    this->source_data = SOURCE_DATA; \
-    this->data = FFTW(alloc_real)(this->buffered_field_descriptor->local_size); \
-} \
- \
-template<> \
-tracers<R>::~tracers() \
-{ \
-    FFTW(free)(this->data); \
-} \
- \
-template <> \
-void tracers<R>::sample_vec_field(R *vec_field, double *vec_values) \
-{ \
-    vec_field += this->buffer_size; \
-    double *vec_local =  new double[this->array_size]; \
-    std::fill_n(vec_local, this->array_size, 0.0); \
-    int deriv[] = {0, 0, 0}; \
-    /* get grid coordinates */ \
-    int *xg = new int[this->array_size]; \
-    double *xx = new double[this->array_size]; \
-    this->get_grid_coordinates(this->state, xg, xx); \
-    /* perform interpolation */ \
-    for (int p=0; p<this->nparticles; p++) \
-        if (this->fs->rd->myrank == this->computing[p]) \
-            this->interpolation_formula( \
-                    vec_field, \
-                    xg + p*3, \
-                    xx + p*3, \
-                    vec_local + p*3, \
-                    deriv); \
-    MPI_Allreduce( \
-            vec_local, \
-            vec_values, \
-            this->array_size, \
-            MPI_DOUBLE, \
-            MPI_SUM, \
-            this->fs->rd->comm); \
-    delete[] xg; \
-    delete[] xx; \
-    delete[] vec_local; \
-} \
-
-/*****************************************************************************/
-
-
-
-/*****************************************************************************/
-/* now actually use the macro defined above                                  */
-TRACERS_DEFINITIONS(
-        FFTW_MANGLE_FLOAT,
-        float,
-        MPI_FLOAT,
-        MPI_COMPLEX)
-TRACERS_DEFINITIONS(
-        FFTW_MANGLE_DOUBLE,
-        double,
-        MPI_DOUBLE,
-        BFPS_MPICXX_DOUBLE_COMPLEX)
-/*****************************************************************************/
-
-
-
-/*****************************************************************************/
-/* finally, force generation of code                                         */
-template class tracers<float>;
-template class tracers<double>;
-/*****************************************************************************/
-
diff --git a/bfps/test/test_Parseval.py b/bfps/test/test_Parseval.py
new file mode 100644
index 0000000000000000000000000000000000000000..00a88d24ca615375dcfc24b82db15b8f3496fcc1
--- /dev/null
+++ b/bfps/test/test_Parseval.py
@@ -0,0 +1,38 @@
+#! /usr/bin/env python
+
+import numpy as np
+import sys
+
+import bfps
+from bfps import DNS
+
+def main():
+    niterations = 10
+    nlist = [16, 32, 48, 24, 64, 12]
+    for ii in range(len(nlist)):
+        c = DNS()
+        c.launch(
+                ['NSVE',
+                 '--nx', str(nlist[ii]),
+                 '--ny', str(nlist[(ii+1)%(len(nlist))]),
+                 '--nz', str(nlist[(ii+2)%(len(nlist))]),
+                 '--Lx', str(2+np.random.random()),
+                 '--Ly', str(2+np.random.random()),
+                 '--Lz', str(2+np.random.random()),
+                 '--simname', 'test_Parseval_{0}'.format(ii),
+                 '--np', '4',
+                 '--ntpp', '1',
+                 '--niter_todo', '{0}'.format(niterations),
+                 '--niter_out', '{0}'.format(niterations),
+                 '--niter_stat', '1',
+                 '--wd', './'] +
+                 sys.argv[1:])
+        c.compute_statistics()
+        Parseval_error = np.abs((c.statistics['energy(t)'] - c.statistics['renergy(t)']) / c.statistics['renergy(t)'])
+        assert(np.max(Parseval_error) < 1e-6)
+    print('SUCCESS!!! Parseval test passed for unequal nx, ny, nz and random Lx, Ly, Lz')
+    return None
+
+if __name__ == '__main__':
+    main()
+
diff --git a/bfps/test/test_bfps_NSVEparticles.py b/bfps/test/test_bfps_NSVEparticles.py
index ab77e2103ccda7685cebe759f8e11cfe2a5b5ec9..fe1e7875a651b17dd9180f3cbe6d6bfe1f1b5c27 100644
--- a/bfps/test/test_bfps_NSVEparticles.py
+++ b/bfps/test/test_bfps_NSVEparticles.py
@@ -1,4 +1,29 @@
 #! /usr/bin/env python
+#######################################################################
+#                                                                     #
+#  Copyright 2019 Max Planck Institute                                #
+#                 for Dynamics and Self-Organization                  #
+#                                                                     #
+#  This file is part of bfps.                                         #
+#                                                                     #
+#  bfps is free software: you can redistribute it and/or modify       #
+#  it under the terms of the GNU General Public License as published  #
+#  by the Free Software Foundation, either version 3 of the License,  #
+#  or (at your option) any later version.                             #
+#                                                                     #
+#  bfps is distributed in the hope that it will be useful,            #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
+#  GNU General Public License for more details.                       #
+#                                                                     #
+#  You should have received a copy of the GNU General Public License  #
+#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
+#                                                                     #
+# Contact: Cristian.Lalescu@ds.mpg.de                                 #
+#                                                                     #
+#######################################################################
+
+
 
 import os
 import numpy as np
@@ -18,11 +43,13 @@ def main():
             ['NSVEparticles',
              '-n', '32',
              '--src-simname', 'B32p1e4',
+             '--forcing_type', 'linear',
              '--src-wd', bfps.lib_dir + '/test',
              '--src-iteration', '0',
              '--simname', 'dns_nsveparticles',
              '--np', '4',
              '--ntpp', '1',
+             '--fftw_plan_rigor', 'FFTW_PATIENT',
              '--niter_todo', '{0}'.format(niterations),
              '--niter_out', '{0}'.format(niterations),
              '--niter_stat', '1',
@@ -41,13 +68,16 @@ def main():
     for iteration in [0, 32, 64]:
         field0 = f0['vorticity/complex/{0}'.format(iteration)].value
         field1 = f1['vorticity/complex/{0}'.format(iteration)].value
-        assert(np.max(np.abs(field0 - field1)) < 1e-5)
+        field_error = np.max(np.abs(field0 - field1))
         x0 = f0['tracers0/state/{0}'.format(iteration)].value
         x1 = f1['tracers0/state/{0}'.format(iteration)].value
-        assert(np.max(np.abs(x0 - x1)) < 1e-5)
+        traj_error = np.max(np.abs(x0 - x1))
         y0 = f0['tracers0/rhs/{0}'.format(iteration)].value
         y1 = f1['tracers0/rhs/{0}'.format(iteration)].value
-        assert(np.max(np.abs(y0 - y1)) < 1e-5)
+        rhs_error = np.max(np.abs(y0 - y1))
+        assert(field_error < 1e-5)
+        assert(traj_error < 1e-5)
+        assert(rhs_error < 1e-5)
     print('SUCCESS! Basic test passed.')
     return None
 
diff --git a/bfps/test/test_bfps_resize.py b/bfps/test/test_bfps_resize.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce0a051da909a34b3be9d7ebd15e0d923c0a09f6
--- /dev/null
+++ b/bfps/test/test_bfps_resize.py
@@ -0,0 +1,113 @@
+#! /usr/bin/env python
+
+import os
+import numpy as np
+import h5py
+import sys
+
+import bfps
+from bfps import DNS
+from bfps import PP
+
+import matplotlib.pyplot as plt
+import pyfftw
+
+
+def main():
+    niterations = 2
+    c = DNS()
+    c.launch(
+            ['NSVE',
+             '-n', '32',
+             '--src-simname', 'B32p1e4',
+             '--src-wd', bfps.lib_dir + '/test',
+             '--src-iteration', '0',
+             '--simname', 'dns_test',
+             '--np', '4',
+             '--ntpp', '1',
+             '--niter_todo', '{0}'.format(niterations),
+             '--niter_out', '{0}'.format(niterations),
+             '--niter_stat', '1',
+             '--wd', './'] +
+             sys.argv[1:])
+    rr = PP()
+    rr.launch(
+            ['resize',
+             '--simname', 'dns_test',
+             '--new_nx', '64',
+             '--new_ny', '64',
+             '--new_nz', '64',
+             '--new_simname', 'pp_resize_test',
+             '--np', '4',
+             '--ntpp', '1',
+             '--iter0', '0',
+             '--iter1', '{0}'.format(niterations),
+             '--wd', './'] +
+             sys.argv[1:])
+    f0 = h5py.File(c.get_checkpoint_0_fname(), 'r')
+    f1 = h5py.File('pp_resize_test_fields.h5', 'r')
+    d0 = f0['vorticity/complex/0'].value
+    d1 = f1['vorticity/complex/0'].value
+    small_kdata = pyfftw.n_byte_align_empty(
+            (32, 32, 17, 3),
+            pyfftw.simd_alignment,
+            dtype = c.ctype)
+    small_rdata = pyfftw.n_byte_align_empty(
+            (32, 32, 32, 3),
+            pyfftw.simd_alignment,
+            dtype = c.rtype)
+    small_plan = pyfftw.FFTW(
+            small_kdata.transpose((1, 0, 2, 3)),
+            small_rdata,
+            axes = (0, 1, 2),
+            direction = 'FFTW_BACKWARD',
+            threads = 4)
+    big_kdata = pyfftw.n_byte_align_empty(
+            (64, 64, 33, 3),
+            pyfftw.simd_alignment,
+            dtype = c.ctype)
+    big_rdata = pyfftw.n_byte_align_empty(
+            (64, 64, 64, 3),
+            pyfftw.simd_alignment,
+            dtype = c.rtype)
+    big_plan = pyfftw.FFTW(
+            big_kdata.transpose((1, 0, 2, 3)),
+            big_rdata,
+            axes = (0, 1, 2),
+            direction = 'FFTW_BACKWARD',
+            threads = 4)
+    small_kdata[:] = d0
+    big_kdata[:] = d1
+    small_plan.execute()
+    big_plan.execute()
+
+    se = np.mean(small_rdata**2, axis = 3)**.5
+    be = np.mean(big_rdata**2, axis = 3)**.5
+
+    f = plt.figure(figsize = (6, 4))
+    a = f.add_subplot(231)
+    a.set_axis_off()
+    a.imshow(se[0])
+    a = f.add_subplot(234)
+    a.set_axis_off()
+    a.imshow(be[0])
+    a = f.add_subplot(232)
+    a.set_axis_off()
+    a.imshow(se[:, 0])
+    a = f.add_subplot(235)
+    a.set_axis_off()
+    a.imshow(be[:, 0])
+    a = f.add_subplot(233)
+    a.set_axis_off()
+    a.imshow(se[:, :, 0])
+    a = f.add_subplot(236)
+    a.set_axis_off()
+    a.imshow(be[:, :, 0])
+    f.tight_layout()
+    f.savefig('resize_test.pdf')
+    plt.close(f)
+    return None
+
+if __name__ == '__main__':
+    main()
+
diff --git a/bfps/test/test_fftw.py b/bfps/test/test_fftw.py
new file mode 100644
index 0000000000000000000000000000000000000000..3de2d97df167567899fbf8b19c1123e5bf35cbe7
--- /dev/null
+++ b/bfps/test/test_fftw.py
@@ -0,0 +1,66 @@
+#! /usr/bin/env python
+
+import numpy as np
+import h5py
+import sys
+
+import bfps
+from bfps import TEST
+
+try:
+    import matplotlib.pyplot as plt
+except:
+    plt = None
+
+def main():
+    niterations = 10
+    nlist = [16, 32, 48, 24, 64, 12]
+    for ii in range(len(nlist)):
+        c = TEST()
+        c.launch(
+                ['symmetrize_test',
+                 '--nx', str(nlist[ii]),
+                 '--ny', str(nlist[(ii+1)%(len(nlist))]),
+                 '--nz', str(nlist[(ii+2)%(len(nlist))]),
+                 '--Lx', str(2+np.random.random()),
+                 '--Ly', str(2+np.random.random()),
+                 '--Lz', str(2+np.random.random()),
+                 '--simname', 'fftw_vs_numpy_{0}'.format(ii),
+                 '--np', '4',
+                 '--ntpp', '1',
+                 '--wd', './'] +
+                 sys.argv[1:])
+        df = h5py.File(c.simname + '.h5', 'r')
+        df = h5py.File(c.simname + '_fields.h5', 'r')
+        field1_complex = df['field1/complex/0'].value
+        field1_real = df['field1/real/0'].value
+        npoints = field1_real.size//3
+
+        np_field1_real = np.fft.irfftn(field1_complex, axes = (0, 1, 2)).transpose(1, 0, 2, 3)
+        L2normr = np.sqrt(np.mean(np.sum(field1_real**2, axis = 3)))
+        np_L2normr = np.sqrt(np.mean(np.sum(np_field1_real**2, axis = 3)))
+        err = np.max(np.abs(field1_real - np_field1_real*npoints)) / L2normr
+        assert(err < 1e-5)
+
+        np_field1_complex = np.fft.rfftn(field1_real.transpose(1, 0, 2, 3), axes = (0, 1, 2)) / npoints
+
+        L2norm0 = np.sqrt(np.sum(np.abs(field1_complex[:, :, 0])**2) + 2*np.sum(np.abs(field1_complex[:, :, 1:])**2))
+        L2norm1 = np.sqrt(np.sum(np.abs(np_field1_complex[:, :, 0])**2) + 2*np.sum(np.abs(np_field1_complex[:, :, 1:])**2))
+        err = np.max(np.abs(np_field1_complex - field1_complex)) / L2norm0
+        assert(err < 1e-5)
+
+        err = abs(L2normr - L2norm0) / L2norm0
+        assert(err < 1e-5)
+
+        if not type(plt) == type(None):
+            f = plt.figure()
+            a = f.add_subplot(121)
+            a.imshow(np.log(np.abs(np_field1_complex[:, :, 0, 0])), interpolation = 'nearest')
+            a = f.add_subplot(122)
+            a.imshow(np.log(np.abs(field1_complex[:, :, 0, 0])), interpolation = 'nearest')
+            f.savefig(c.simname + '_complex_slice_kx0.pdf')
+    return None
+
+if __name__ == '__main__':
+    main()
+
diff --git a/bfps/test/test_interpolation.py b/bfps/test/test_interpolation.py
new file mode 100644
index 0000000000000000000000000000000000000000..eeb40248388d8a67d341f000b264bc9e7ac1dec0
--- /dev/null
+++ b/bfps/test/test_interpolation.py
@@ -0,0 +1,54 @@
+#! /usr/bin/env python
+
+import os
+import numpy as np
+import h5py
+import sys
+
+import bfps
+from bfps import TEST
+
+try:
+    import matplotlib.pyplot as plt
+    matplotlib_on = True
+except ImportError:
+    matplotlib_on = False
+
+
+def main():
+    nparticles = 100
+    c = TEST()
+    c.launch(
+            ['test_interpolation',
+             '-n', '32',
+             '--np', '4',
+             '--ntpp', '1',
+             #'--nparticles', '{0}'.format(nparticles),
+             '--wd', './'] +
+             sys.argv[3:])
+    ifile = h5py.File(
+            'test_input.h5',
+            'r')
+    ofile = h5py.File(
+            'test_output.h5',
+            'r')
+    pos0 = ifile['tracers0/state/0'].value
+    pos1 = ofile['tracers0/position/0'].value
+    assert(np.max(np.abs(pos0-pos1) / np.abs(pos0)) <= 1e-5)
+    vort0 = ofile['tracers0/vorticity/0'].value
+    vel_gradient = ofile['tracers0/velocity_gradient/0'].value
+    vort1 = vort0.copy()
+    vort1[:, 0] = vel_gradient[:, 5] - vel_gradient[:, 7]
+    vort1[:, 1] = vel_gradient[:, 6] - vel_gradient[:, 2]
+    vort1[:, 2] = vel_gradient[:, 1] - vel_gradient[:, 3]
+    assert(np.max(np.abs(vort0-vort1) / np.abs(vort0)) <= 1e-5)
+    divergence = vel_gradient[:, 0] + vel_gradient[:, 4] + vel_gradient[:, 8]
+    divergence_error = np.abs(divergence) / (vel_gradient[:, 0]**2 + vel_gradient[:, 1]**2 + vel_gradient[:, 2]**2)**.5
+    print('mean divergence error is ', np.mean(divergence_error))
+    print('maximum divergence error is ', np.max(divergence_error))
+    print('SUCCESS! Interpolated vorticity agrees with vorticity from interpolated velocity gradient.')
+    return None
+
+if __name__ == '__main__':
+    main()
+
diff --git a/bfps/test/test_particle_clouds.py b/bfps/test/test_particle_clouds.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d2045390f51e7f529f78a3eb7037acb3fcae3b9
--- /dev/null
+++ b/bfps/test/test_particle_clouds.py
@@ -0,0 +1,93 @@
+#! /usr/bin/env python
+#######################################################################
+#                                                                     #
+#  Copyright 2019 Max Planck Institute                                #
+#                 for Dynamics and Self-Organization                  #
+#                                                                     #
+#  This file is part of bfps.                                         #
+#                                                                     #
+#  bfps is free software: you can redistribute it and/or modify       #
+#  it under the terms of the GNU General Public License as published  #
+#  by the Free Software Foundation, either version 3 of the License,  #
+#  or (at your option) any later version.                             #
+#                                                                     #
+#  bfps is distributed in the hope that it will be useful,            #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
+#  GNU General Public License for more details.                       #
+#                                                                     #
+#  You should have received a copy of the GNU General Public License  #
+#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
+#                                                                     #
+# Contact: Cristian.Lalescu@ds.mpg.de                                 #
+#                                                                     #
+#######################################################################
+
+
+
+import os
+import numpy as np
+import h5py
+import sys
+
+import bfps
+from bfps import DNS
+
+
+def main():
+    nclouds = 10
+    nparticles_per_cloud = 1000
+    nparticles = nclouds*nparticles_per_cloud
+    niterations = 32
+    c = DNS()
+    c.dns_type = 'NSVEparticles'
+    c.parameters['nparticles'] = nparticles
+    c.parameters['tracers1_integration_steps'] = 4
+    c.generate_tracer_state(rseed = 2, species = 1)
+    del c.parameters['nparticles']
+    del c.parameters['tracers1_integration_steps']
+    ic_file = h5py.File(c.get_checkpoint_0_fname(), 'a')
+    ic_file['tracers0/state/0'] = ic_file['tracers1/state/0'].value.reshape(nclouds, nparticles_per_cloud, 3)
+    ic_file['tracers0/rhs/0'] = ic_file['tracers1/rhs/0'].value.reshape(4, nclouds, nparticles_per_cloud, 3)
+    ic_file.close()
+    c.launch(
+            ['NSVEparticles',
+             '-n', '32',
+             '--src-simname', 'B32p1e4',
+             '--forcing_type', 'linear',
+             '--src-wd', bfps.lib_dir + '/test',
+             '--src-iteration', '0',
+             '--np', '4',
+             '--ntpp', '1',
+             '--fftw_plan_rigor', 'FFTW_PATIENT',
+             '--niter_todo', '{0}'.format(niterations),
+             '--niter_out', '{0}'.format(niterations),
+             '--niter_stat', '1',
+             '--nparticles', '{0}'.format(nparticles),
+             '--njobs', '2',
+             '--wd', './'])
+    f0 = h5py.File(
+            os.path.join(
+                os.path.join(bfps.lib_dir, 'test'),
+                'B32p1e4_checkpoint_0.h5'),
+            'r')
+    f1 = h5py.File(c.get_checkpoint_0_fname(), 'r')
+    for iteration in [0, 32, 64]:
+        field0 = f0['vorticity/complex/{0}'.format(iteration)].value
+        field1 = f1['vorticity/complex/{0}'.format(iteration)].value
+        field_error = np.max(np.abs(field0 - field1))
+        x0 = f0['tracers0/state/{0}'.format(iteration)].value
+        x1 = f1['tracers0/state/{0}'.format(iteration)].value.reshape(x0.shape)
+        traj_error = np.max(np.abs(x0 - x1))
+        y0 = f0['tracers0/rhs/{0}'.format(iteration)].value
+        y1 = f1['tracers0/rhs/{0}'.format(iteration)].value.reshape(y0.shape)
+        rhs_error = np.max(np.abs(y0 - y1))
+        assert(field_error < 1e-5)
+        assert(traj_error < 1e-5)
+        assert(rhs_error < 1e-5)
+    print('SUCCESS! Basic test passed.')
+    return None
+
+if __name__ == '__main__':
+    main()
+
diff --git a/bfps/test/test_particles.py b/bfps/test/test_particles.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d3abec14e1c822224290e247593eda9b02a8f6b
--- /dev/null
+++ b/bfps/test/test_particles.py
@@ -0,0 +1,133 @@
+#! /usr/bin/env python
+
+import os
+import numpy as np
+import h5py
+import sys
+
+import bfps
+from bfps import DNS
+
+try:
+    import matplotlib.pyplot as plt
+    matplotlib_on = True
+except ImportError:
+    matplotlib_on = False
+
+
+def main():
+    assert(sys.argv[1] in ['p2p_sampling'])
+    assert(sys.argv[2] in ['on', 'off'])
+    niterations = 32
+    nparticles = 1000
+    njobs = 1
+    if sys.argv[2] == 'on':
+        c = DNS()
+        c.launch(
+                ['NSVEcomplex_particles',
+                 '-n', '32',
+                 '--src-simname', 'B32p1e4',
+                 '--src-wd', bfps.lib_dir + '/test',
+                 '--src-iteration', '0',
+                 '--np', '4',
+                 '--ntpp', '1',
+                 '--niter_todo', '{0}'.format(niterations),
+                 '--niter_out', '{0}'.format(niterations),
+                 '--niter_stat', '1',
+                 '--checkpoints_per_file', '{0}'.format(3),
+                 '--nparticles', '{0}'.format(nparticles),
+                 '--particle-rand-seed', '2',
+                 '--njobs', '{0}'.format(njobs),
+                 '--wd', './'] +
+                 sys.argv[3:])
+    if sys.argv[1] == 'p2p_sampling':
+        cf = h5py.File(
+                'test_checkpoint_0.h5',
+                'r')
+        pf = h5py.File(
+                'test_particles.h5',
+                'r')
+        if matplotlib_on:
+            # initial condition:
+            # show a histogram of the orientations
+            f = plt.figure()
+            a = f.add_subplot(111)
+            for iteration in range(1):
+                x = cf['tracers0/state/{0}'.format(iteration)][:, 3:]
+                hist, bins = np.histogram(
+                        np.sum(x**2, axis = -1).flatten()**.5,
+                        bins = np.linspace(0, 2, 40))
+                bb = (bins[:-1] + bins[1:])/2
+                pp = hist.astype(np.float) / (np.sum(hist) * (bb[1] - bb[0]))
+                a.plot(bb, pp, label = '{0}'.format(iteration))
+            a.legend(loc = 'best')
+            f.tight_layout()
+            f.savefig('orientation_histogram.pdf')
+            plt.close(f)
+            # show a histogram of the positions
+            f = plt.figure()
+            a = f.add_subplot(111)
+            for iteration in range(0, niterations*njobs+1, niterations//2):
+                x = pf['tracers0/position/{0}'.format(iteration)].value
+                hist, bins = np.histogram(
+                        np.sum(x**2, axis = -1).flatten()**.5,
+                        bins = 40)
+                bb = (bins[:-1] + bins[1:])/2
+                pp = hist.astype(np.float) / (np.sum(hist) * (bb[1] - bb[0]))
+                a.plot(bb, pp, label = '{0}'.format(iteration))
+            a.legend(loc = 'best')
+            f.tight_layout()
+            f.savefig('position_histogram.pdf')
+            plt.close(f)
+            # show a histogram of the orientations
+            f = plt.figure()
+            a = f.add_subplot(111)
+            for iteration in range(0, niterations*njobs+1, niterations//2):
+                x = pf['tracers0/orientation/{0}'.format(iteration)].value
+                hist, bins = np.histogram(
+                        np.sum(x**2, axis = -1).flatten()**.5,
+                        bins = np.linspace(0, 2, 40))
+                bb = (bins[:-1] + bins[1:])/2
+                pp = hist.astype(np.float) / (np.sum(hist) * (bb[1] - bb[0]))
+                a.plot(bb, pp, label = '{0}'.format(iteration))
+            a.legend(loc = 'best')
+            f.tight_layout()
+            f.savefig('orientation_histogram.pdf')
+            plt.close(f)
+            # compared sampled positions with checkpoint positions
+            for iteration in range(0, niterations*njobs+1, niterations):
+                x = pf['tracers0/position/{0}'.format(iteration)].value
+                s = cf['tracers0/state/{0}'.format(iteration)].value
+                distance = (np.max(np.abs(x - s[..., :3]) /
+                                   np.maximum(np.ones(x.shape),
+                                              np.maximum(np.abs(x),
+                                                         np.abs(s[..., :3])))))
+                assert(distance < 1e-14)
+                x = pf['tracers0/orientation/{0}'.format(iteration)].value
+                distance = (np.max(np.abs(x - s[..., 3:]) /
+                                   np.maximum(np.ones(x.shape),
+                                              np.maximum(np.abs(x),
+                                                         np.abs(s[..., 3:])))))
+                assert(distance < 1e-14)
+            # code relevant when velocity field is 0 everywhere.
+            # we check to see what happens to the orientation of the particles
+            # show a histogram of the orientations
+            f = plt.figure()
+            a = f.add_subplot(111)
+            for iteration in range(0, niterations*njobs+1, niterations//4):
+                x = pf['tracers0/orientation/{0}'.format(iteration)].value
+                hist, bins = np.histogram(
+                        x.flatten(),
+                        bins = 100)
+                bb = (bins[:-1] + bins[1:])/2
+                pp = hist.astype(np.float) / (np.sum(hist) * (bb[1] - bb[0]))
+                a.plot(bb, pp, label = '{0}'.format(iteration))
+            a.legend(loc = 'best')
+            f.tight_layout()
+            f.savefig('full_orientation_histogram.pdf')
+            plt.close(f)
+    return None
+
+if __name__ == '__main__':
+    main()
+
diff --git a/bfps/tools.py b/bfps/tools.py
index 69756ec648409ab52d57930d26b1ab1ca8b942c1..0acf51b539826a4ff18a6b6458d6ac5f777344a1 100644
--- a/bfps/tools.py
+++ b/bfps/tools.py
@@ -1,26 +1,25 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
+################################################################################
+#                                                                              #
+#  Copyright 2015-2019 Max Planck Institute for Dynamics and Self-Organization #
+#                                                                              #
+#  This file is part of bfps.                                                  #
+#                                                                              #
+#  bfps is free software: you can redistribute it and/or modify                #
+#  it under the terms of the GNU General Public License as published           #
+#  by the Free Software Foundation, either version 3 of the License,           #
+#  or (at your option) any later version.                                      #
+#                                                                              #
+#  bfps is distributed in the hope that it will be useful,                     #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+#  GNU General Public License for more details.                                #
+#                                                                              #
+#  You should have received a copy of the GNU General Public License           #
+#  along with bfps.  If not, see <http://www.gnu.org/licenses/>                #
+#                                                                              #
+# Contact: Cristian.Lalescu@ds.mpg.de                                          #
+#                                                                              #
+################################################################################
 
 
 
@@ -143,6 +142,19 @@ def generate_data_3D(
     a[ii] = 0
     return a
 
+
+def generate_random_discontinuous_data_3D(
+        n0, n1, n2,
+        dtype = np.complex128,
+        p = 1.5,
+        amplitude = 0.5):
+    """returns the Fourier representation of a random field.
+    """
+    assert(n0 % 2 == 0 and n1 % 2 == 0 and n2 % 2 == 0)
+    a = np.random.randn(n1, n0, n2)
+    b = np.fft.rfftn(a).astype(dtype)
+    return b
+
 def randomize_phases(v):
     """randomize the phases of an FFTW complex field.
 
@@ -190,10 +202,10 @@ def padd_with_zeros(
     """
     if (type(odtype) == type(None)):
         odtype = a.dtype
-    assert(a.shape[0] <= n0 and
-           a.shape[1] <= n1 and
+    assert(a.shape[0] <= n1 and
+           a.shape[1] <= n0 and
            a.shape[2] <= n2//2+1)
-    b = np.zeros((n0, n1, n2//2 + 1) + a.shape[3:], dtype = odtype)
+    b = np.zeros((n1, n0, n2//2 + 1) + a.shape[3:], dtype = odtype)
     m0 = a.shape[1]
     m1 = a.shape[0]
     m2 = a.shape[2]
diff --git a/machine_settings_py.py b/cmake/BFPSConfig.cmake.in
similarity index 53%
rename from machine_settings_py.py
rename to cmake/BFPSConfig.cmake.in
index 787f1d5a10b9b0b260b42a1da18d35e67c56dacc..bd2af7160bbd8583b4d6ebd8cd6d710fc6fdfb9f 100644
--- a/machine_settings_py.py
+++ b/cmake/BFPSConfig.cmake.in
@@ -1,6 +1,6 @@
 #######################################################################
 #                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
+#  Copyright 2019 Max Planck Institute                                #
 #                 for Dynamics and Self-Organization                  #
 #                                                                     #
 #  This file is part of bfps.                                         #
@@ -23,41 +23,45 @@
 #######################################################################
 
 
+#-----------------------------------------------------------------------------
+#
+# BFPSConfig.cmake - BFPS CMake configuration file for external projects.
+#
+# This file is configured by BFPS and used by the BFPS.cmake module
+# to load BFPS's settings for an external project.
+#
+@BFPS_CONFIG_INSTALL_ONLY@
 
-import os
+#
+SET(BFPS_VERSION "@BFPS_VERSION@")
 
-########################################################################
-# these lists should be adapted for your different environment(s)
-# personally, I have access to setups where my home folder is shared
-# between different machines, including cluster and desktop, therefore
-# I check the host name when choosing libraries etc.
-# feel free to do your own thing to the copy of this file placed in
-# ./config/bfps
-########################################################################
+#
+SET(HAVE_BFPS TRUE)
+SET(BFPS_PREFIX        "@CMAKE_INSTALL_PREFIX@")
+SET(BFPS_INCLUDE_DIR   "@CMAKE_INSTALL_PREFIX@/include")
+SET(BFPS_LIBRARIES_DIR "@CMAKE_INSTALL_PREFIX@/lib")
 
-hostname = os.getenv('HOSTNAME')
+SET(BFPS_LINK_DIRECTORIES  "@ALL_LINK_DIRS@")
+SET(BFPS_INCLUDE_DIRECTORIES  "@ALL_INCLUDE_DIRS@")
 
-compiler = 'g++'
-extra_compile_args = ['-Wall', '-O2', '-g', '-mtune=native', '-ffast-math', '-std=c++11']
-extra_libraries = ['hdf5']
-include_dirs = []
-library_dirs = []
+SET(BFPS_CXX_COMPILE_FLAGS  "@CMAKE_CXX_COMPILE_FLAGS@")
+SET(BFPS_CXX_COMPILER  "@CMAKE_CXX_COMPILER@")
+SET(BFPS_C_COMPILER  "@CMAKE_C_COMPILER@")
+SET(BFPS_EXE_LINKER_FLAGS  "@CMAKE_EXE_LINKER_FLAGS@")
+SET(BFPS_LIBS  "@BFPS_LIBS@")
+set(BFPS_DEFINITIONS @COMPILE_DEFINITIONS@)
 
-if hostname == 'chichi-G':
-    include_dirs = ['/usr/local/include',
-                    '/usr/include/mpich']
-    library_dirs = ['/usr/local/lib',
-                    '/usr/lib/mpich']
-    extra_libraries += ['mpich']
+#
+SET(BFPS_SOURCE_DIR	   "@BFPS_SOURCE_DIR@")
 
-if hostname in ['tolima', 'misti']:
-    local_install_dir = '/scratch.local/chichi/installs'
+#
+SET(BFPS_BUILD_TYPE     "@CMAKE_BUILD_TYPE@")
 
-    include_dirs = ['/usr/lib64/mpi/gcc/openmpi/include',
-                    os.path.join(local_install_dir, 'include')]
+#
+SET(BFPS_HDF5_USE_SZIP  "@BFPS_HDF5_USE_SZIP@")
+SET(BFPS_HDF5_SZIP_LIB_PATH "@BFPS_HDF5_SZIP_LIB_PATH@")
 
-    library_dirs = ['/usr/lib64/mpi/gcc/openmpi/lib64',
-                    os.path.join(local_install_dir, 'lib'),
-                    os.path.join(local_install_dir, 'lib64')]
-    extra_libraries += ['mpi_cxx', 'mpi']
+#
+set(BFPS_SRC_INCLUDE_DIRS "@BFPS_INCLUDE_DIRS@")
+set(BFPS_BUILD_LIBRARY_DIRS "@BFPS_LIB_DIR@")
 
diff --git a/cmake/morse/FindCommon.cmake b/cmake/morse/FindCommon.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..95d8c1f5404c0d7ea2384d84bd12c2e4a3cc3418
--- /dev/null
+++ b/cmake/morse/FindCommon.cmake
@@ -0,0 +1,47 @@
+###
+#
+# @copyright (c) 2018 Inria. All rights reserved.
+#
+###
+#
+#  @file FindCommon.cmake
+#
+#  @project MORSE
+#  MORSE is a software package provided by:
+#     Inria Bordeaux - Sud-Ouest,
+#     Univ. of Tennessee,
+#     King Abdullah Univesity of Science and Technology
+#     Univ. of California Berkeley,
+#     Univ. of Colorado Denver.
+#
+#  @version 1.0.0
+#  @author Florent Pruvost
+#  @date 13-04-2018
+#
+###
+
+# clean these variables before using them in CMAKE_REQUIRED_* variables in
+# check_function_exists
+macro(finds_remove_duplicates)
+  if (REQUIRED_DEFINITIONS)
+    list(REMOVE_DUPLICATES REQUIRED_DEFINITIONS)
+  endif()
+  if (REQUIRED_INCDIRS)
+    list(REMOVE_DUPLICATES REQUIRED_INCDIRS)
+  endif()
+  if (REQUIRED_FLAGS)
+    list(REMOVE_DUPLICATES REQUIRED_FLAGS)
+  endif()
+  if (REQUIRED_LDFLAGS)
+    list(REMOVE_DUPLICATES REQUIRED_LDFLAGS)
+  endif()
+  if (REQUIRED_LIBS)
+    list(REVERSE REQUIRED_LIBS)
+    list(REMOVE_DUPLICATES REQUIRED_LIBS)
+    list(REVERSE REQUIRED_LIBS)
+  endif()
+endmacro()
+
+##
+## @end file FindCommon
+##
diff --git a/cmake/morse/FindFFTW.cmake b/cmake/morse/FindFFTW.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..37450baea9f52a9a4e8a1236d6234d3c3840ba79
--- /dev/null
+++ b/cmake/morse/FindFFTW.cmake
@@ -0,0 +1,832 @@
+###
+#
+# @copyright (c) 2009-2014 The University of Tennessee and The University
+#                          of Tennessee Research Foundation.
+#                          All rights reserved.
+# @copyright (c) 2012-2018 Inria. All rights reserved.
+# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+#
+###
+#
+# - Find FFTW Version 3 include dirs and libraries
+# Default configuration will find the real double precision fftw library version
+# without THREADS|OMP.
+# Use this module by invoking find_package with the form:
+#  find_package(FFTW
+#               [REQUIRED] # Fail with error if fftw is not found
+#               [COMPONENTS MKL]
+#
+#  COMPONENTS can be some of the following:
+#   - MKL:     to detect the FFTW from Intel MKL
+#   - ESSL:    to detect the FFTW from IBM ESSL
+#   - THREADS: to detect the Threads version of FFTW
+#   - OMP:     to detect the OpenMP version of FFTW
+#   - SIMPLE:  to detect the FFTW simple precision fftw3f
+#   - LONG:    to detect the FFTW long double precision fftw3l
+#   - QUAD:    to detect the FFTW quadruple precision fftw3q
+#
+# This module finds headers and fftw library.
+# Results are reported in variables:
+#  FFTW_FOUND            - True if headers and requested libraries were found
+#  FFTW_CFLAGS_OTHER      - fftw compiler flags without headers paths
+#  FFTW_LDFLAGS_OTHER     - fftw linker flags without libraries
+#  FFTW_INCLUDE_DIRS      - fftw include directories
+#  FFTW_LIBRARY_DIRS      - fftw link directories
+#  FFTW_LIBRARIES         - fftw libraries to be linked (absolute path)
+#  FFTW_CFLAGS_OTHER_DEP  - fftw + dependencies compiler flags without headers paths
+#  FFTW_LDFLAGS_OTHER_DEP - fftw + dependencies linker flags without libraries
+#  FFTW_INCLUDE_DIRS_DEP  - fftw + dependencies include directories
+#  FFTW_LIBRARY_DIRS_DEP  - fftw + dependencies link directories
+#  FFTW_LIBRARIES_DEP     - fftw + dependencies libraries
+#
+#  FFTW_FOUND_WITH_PKGCONFIG - True if found with pkg-config
+#  if found with pkg-config the following variables are set
+#  <PREFIX>  = FFTW3F or FFTW3 or FFTW3L or FFTW3Q
+#  <XPREFIX> = <PREFIX>        for common case
+#  <XPREFIX> = <PREFIX>_STATIC for static linking
+#  <XPREFIX>_FOUND          ... set to 1 if module(s) exist
+#  <XPREFIX>_LIBRARIES      ... only the libraries (w/o the '-l')
+#  <XPREFIX>_LIBRARY_DIRS   ... the paths of the libraries (w/o the '-L')
+#  <XPREFIX>_LDFLAGS        ... all required linker flags
+#  <XPREFIX>_LDFLAGS_OTHER  ... all other linker flags
+#  <XPREFIX>_INCLUDE_DIRS   ... the '-I' preprocessor flags (w/o the '-I')
+#  <XPREFIX>_CFLAGS         ... all required cflags
+#  <XPREFIX>_CFLAGS_OTHER   ... the other compiler flags
+#
+# The user can give specific paths where to find the libraries adding cmake
+# options at configure (ex: cmake path/to/project -DFFTW_DIR=path/to/fftw):
+#  FFTW_DIR             - Where to find the base directory of fftw
+#  FFTW_INCDIR          - Where to find the header files
+#  FFTW_LIBDIR          - Where to find the library files
+# The module can also look for the following environment variables if paths
+# are not given as cmake variable: FFTW_DIR, FFTW_INCDIR, FFTW_LIBDIR
+# For MKL case and if no paths are given as hints, we will try to use the MKLROOT
+# environment variable
+
+#=============================================================================
+# Copyright 2012-2018 Inria
+# Copyright 2012-2013 Emmanuel Agullo
+# Copyright 2012-2013 Mathieu Faverge
+# Copyright 2012      Cedric Castagnede
+# Copyright 2013-2018 Florent Pruvost
+#
+# Distributed under the OSI-approved BSD License (the "License");
+# see accompanying file MORSE-Copyright.txt for details.
+#
+# This software is distributed WITHOUT ANY WARRANTY; without even the
+# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the License for more information.
+#=============================================================================
+# (To distribute this file outside of Morse, substitute the full
+#  License text for the above reference.)
+
+# Common macros to use in finds
+include(FindInit)
+
+if (NOT FFTW_FOUND)
+  set(FFTW_DIR "" CACHE PATH "Installation directory of FFTW library given by user")
+  if (NOT FFTW_FIND_QUIETLY)
+    message(STATUS "A cache variable, namely FFTW_DIR, has been set to specify the install directory of FFTW")
+  endif()
+endif()
+
+# Set the version to find
+set(FFTW_LOOK_FOR_MKL OFF)
+set(FFTW_LOOK_FOR_ESSL OFF)
+set(FFTW_LOOK_FOR_THREADS OFF)
+set(FFTW_LOOK_FOR_OMP OFF)
+set(FFTW_LOOK_FOR_FFTW_SIMPLE OFF)
+set(FFTW_LOOK_FOR_FFTW_LONG OFF)
+set(FFTW_LOOK_FOR_FFTW_QUAD OFF)
+
+if( FFTW_FIND_COMPONENTS )
+  foreach( component ${FFTW_FIND_COMPONENTS} )
+    if (${component} STREQUAL "THREADS")
+      # means we look for the Threads version of FFTW
+      set(FFTW_LOOK_FOR_THREADS ON)
+    endif()
+    if (${component} STREQUAL "OMP")
+      # means we look for the OpenMP version of FFTW
+      set(FFTW_LOOK_FOR_OMP ON)
+    endif()
+    if (${component} STREQUAL "SIMPLE")
+      # means we look for FFTW simple precision (fftw3f)
+      set(FFTW_LOOK_FOR_FFTW_SIMPLE ON)
+      set(FFTW_LOOK_FOR_FFTW_LONG OFF)
+      set(FFTW_LOOK_FOR_FFTW_QUAD OFF)
+    endif()
+    if (${component} STREQUAL "LONG")
+      # means we look for FFTW long double precision (fftw3l)
+      set(FFTW_LOOK_FOR_FFTW_SIMPLE OFF)
+      set(FFTW_LOOK_FOR_FFTW_LONG ON)
+      set(FFTW_LOOK_FOR_FFTW_QUAD OFF)
+    endif()
+    if (${component} STREQUAL "QUAD")
+      # means we look for FFTW quad precision (fftw3q)
+      set(FFTW_LOOK_FOR_FFTW_SIMPLE OFF)
+      set(FFTW_LOOK_FOR_FFTW_LONG OFF)
+      set(FFTW_LOOK_FOR_FFTW_QUAD ON)
+    endif()
+    if (${component} STREQUAL "MKL")
+      # means we look for the Intel MKL version of FFTW
+      set(FFTW_LOOK_FOR_MKL ON)
+      if (FFTW_LOOK_FOR_FFTW_LONG)
+        if (NOT FFTW_FIND_QUIETLY)
+          message(WARNING "Looking for FFTW -- long precision functions do not exist in MKL FFTW")
+        endif()
+        set(FFTW_LOOK_FOR_FFTW_LONG OFF)
+      endif()
+      if (FFTW_LOOK_FOR_FFTW_QUAD)
+        if (NOT FFTW_FIND_QUIETLY)
+          message(WARNING "Looking for FFTW -- quadruple functions do not exist in MKL FFTW")
+        endif()
+        set(FFTW_LOOK_FOR_FFTW_QUAD OFF)
+      endif()
+    endif()
+    if (${component} STREQUAL "ESSL")
+      # means we look for the Intel MKL version of FFTW
+      set(FFTW_LOOK_FOR_ESSL ON)
+      if (FFTW_LOOK_FOR_FFTW_LONG)
+        if (NOT FFTW_FIND_QUIETLY)
+          message(WARNING "Looking for FFTW -- long precision functions do not exist in FFTW_ESSL")
+        endif()
+        set(FFTW_LOOK_FOR_FFTW_LONG OFF)
+      endif()
+      if (FFTW_LOOK_FOR_FFTW_QUAD)
+        if (NOT FFTW_FIND_QUIETLY)
+          message(WARNING "Looking for FFTW -- quadruple functions do not exist in FFTW_ESSL")
+        endif()
+        set(FFTW_LOOK_FOR_FFTW_QUAD OFF)
+      endif()
+      if (FFTW_LOOK_FOR_OMP)
+        if (NOT FFTW_FIND_QUIETLY)
+          message(WARNING "Looking for FFTW -- FFTW_ESSL does not use OpenMP")
+        endif()
+        set(FFTW_LOOK_FOR_OMP OFF)
+      endif()
+    endif()
+  endforeach()
+endif()
+
+if (FFTW_LOOK_FOR_THREADS)
+  if (NOT FFTW_FIND_QUIETLY)
+    message(STATUS "FFTW looks for threads")
+  endif()
+  if (FFTW_FIND_REQUIRED AND FFTW_FIND_REQUIRED_THREADS)
+    find_package(Threads REQUIRED)
+  else()
+    find_package(Threads)
+  endif()
+endif()
+
+if (FFTW_LOOK_FOR_OMP)
+  if (NOT FFTW_FIND_QUIETLY)
+    message(STATUS "FFTW looks for openmp")
+  endif()
+  if (FFTW_FIND_REQUIRED AND FFTW_FIND_REQUIRED_OMP)
+    find_package(OpenMP REQUIRED)
+  else()
+    find_package(OpenMP)
+  endif()
+endif()
+
+if (FFTW_LOOK_FOR_MKL)
+  if (NOT FFTW_FIND_QUIETLY)
+    message(STATUS "FFTW looks for threads and Intel MKL")
+  endif()
+  if (FFTW_LOOK_FOR_THREADS)
+    set(BLA_VENDOR "Intel10_64lp")
+  else()
+    set(BLA_VENDOR "Intel10_64lp_seq")
+  endif()
+  if (FFTW_FIND_REQUIRED AND FFTW_FIND_REQUIRED_MKL)
+    find_package(Threads REQUIRED)
+    find_package(BLAS REQUIRED)
+  else()
+    find_package(Threads)
+    find_package(BLAS)
+  endif()
+endif()
+
+if (FFTW_LOOK_FOR_ESSL)
+  if (NOT FFTW_FIND_QUIETLY)
+    message(STATUS "FFTW looks for IBM ESSL")
+  endif()
+  if (FFTW_LOOK_FOR_THREADS)
+    set(BLA_VENDOR "IBMESSLMT")
+  else()
+    set(BLA_VENDOR "IBMESSL")
+  endif()
+  if (FFTW_FIND_REQUIRED AND FFTW_FIND_REQUIRED_ESSL)
+    find_package(BLAS REQUIRED)
+  else()
+    find_package(BLAS)
+  endif()
+endif()
+
+
+if( THREADS_FOUND )
+  libraries_absolute_path(CMAKE_THREAD_LIBS_INIT "")
+endif ()
+  
+set(ENV_FFTW_DIR "$ENV{FFTW_DIR}")
+set(ENV_FFTW_INCDIR "$ENV{FFTW_INCDIR}")
+set(ENV_FFTW_LIBDIR "$ENV{FFTW_LIBDIR}")
+set(FFTW_GIVEN_BY_USER "FALSE")
+if ( FFTW_DIR OR ( FFTW_INCDIR AND FFTW_LIBDIR) OR ENV_FFTW_DIR OR (ENV_FFTW_INCDIR AND ENV_FFTW_LIBDIR) )
+  set(FFTW_GIVEN_BY_USER "TRUE")
+endif()
+
+
+# Optionally use pkg-config to detect include/library dirs (if pkg-config is available)
+# -------------------------------------------------------------------------------------
+if (NOT FFTW_LOOK_FOR_MKL AND NOT FFTW_LOOK_FOR_ESSL)
+  include(FindPkgConfig)
+  find_package(PkgConfig QUIET)
+  if( PKG_CONFIG_EXECUTABLE AND NOT FFTW_GIVEN_BY_USER )
+
+    set(FFTW_INCLUDE_DIRS)
+    set(FFTW_LIBRARY_DIRS)
+    set(FFTW_LIBRARIES)
+
+    if(FFTW_LOOK_FOR_FFTW_SIMPLE)
+      pkg_search_module(FFTW3F fftw3f)
+      pkg_search_module(FFTW3 fftw3)
+      if (FFTW3F_FOUND)
+        if (NOT FFTW_FIND_QUIETLY)
+          message(STATUS "Looking for FFTW3F - found using PkgConfig")
+        endif()
+        if (FFTW3F_LIBRARIES)
+          find_pkgconfig_libraries_absolute_path(FFTW3F)
+          list(APPEND FFTW_LIBRARIES "${FFTW3F_LIBRARIES}")
+        endif()
+        if(FFTW3F_INCLUDE_DIRS)
+          list(APPEND FFTW_INCLUDE_DIRS "${FFTW3F_INCLUDE_DIRS}")
+        else()
+          if (NOT FFTW_FIND_QUIETLY)
+            message(WARNING "FFTW3F_INCLUDE_DIRS is empty using PkgConfig."
+              "Perhaps the path to fftw3f headers is already present in your"
+              "CPATH/C(PLUS)_INCLUDE_PATH environment variables.")
+          endif()
+        endif()
+        if(FFTW3F_LIBRARY_DIRS)
+          list(APPEND FFTW_LIBRARY_DIRS "${FFTW3F_LIBRARY_DIRS}")
+        endif()
+      else(FFTW3F_FOUND)
+        if (NOT FFTW_FIND_QUIETLY)
+          message(STATUS "Looking for FFTW3F - not found using PkgConfig."
+            "\n   Perhaps you should add the directory containing fftw3f.pc to"
+            "\n   the PKG_CONFIG_PATH environment variable.")
+        endif()
+      endif(FFTW3F_FOUND)
+    elseif(FFTW_LOOK_FOR_FFTW_LONG)
+      pkg_search_module(FFTW3L fftw3l)
+      pkg_search_module(FFTW3 fftw3)
+      if (FFTW3L_FOUND)
+        if (NOT FFTW_FIND_QUIETLY)
+          message(STATUS "Looking for FFTW3L - found using PkgConfig")
+        endif()
+        if (FFTW3L_LIBRARIES)
+          find_pkgconfig_libraries_absolute_path(FFTW3L)
+          list(APPEND FFTW_LIBRARIES "${FFTW3L_LIBRARIES}")
+        endif()
+        if(FFTW3L_INCLUDE_DIRS)
+          list(APPEND FFTW_INCLUDE_DIRS "${FFTW3L_INCLUDE_DIRS}")
+        else()
+          if (NOT FFTW_FIND_QUIETLY)
+            message(WARNING "FFTW3L_INCLUDE_DIRS is empty using PkgConfig."
+              "Perhaps the path to fftw3l headers is already present in your"
+              "CPATH/C(PLUS)_INCLUDE_PATH environment variables.")
+          endif()
+        endif()
+        if(FFTW3L_LIBRARY_DIRS)
+          list(APPEND FFTW_LIBRARY_DIRS "${FFTW3L_LIBRARY_DIRS}")
+        endif()
+      else(FFTW3L_FOUND)
+        if (NOT FFTW_FIND_QUIETLY)
+          message(STATUS "Looking for FFTW3L - not found using PkgConfig."
+            "\n   Perhaps you should add the directory containing fftw3l.pc to"
+            "\n   the PKG_CONFIG_PATH environment variable.")
+        endif()
+      endif(FFTW3L_FOUND)
+    elseif(FFTW_LOOK_FOR_FFTW_QUAD)
+      pkg_search_module(FFTW3Q fftw3q)
+      pkg_search_module(FFTW3 fftw3)
+      if (FFTW3Q_FOUND)
+        if (NOT FFTW_FIND_QUIETLY)
+          message(STATUS "Looking for FFTW3Q - found using PkgConfig")
+        endif()
+        if (FFTW3Q_LIBRARIES)
+          find_pkgconfig_libraries_absolute_path(FFTW3Q)
+          list(APPEND FFTW_LIBRARIES "${FFTW3Q_LIBRARIES}")
+        endif()
+        if(FFTW3Q_INCLUDE_DIRS)
+          list(APPEND FFTW_INCLUDE_DIRS "${FFTW3Q_INCLUDE_DIRS}")
+        else()
+          if (NOT FFTW_FIND_QUIETLY)
+            message(WARNING "FFTW3Q_INCLUDE_DIRS is empty using PkgConfig."
+              "Perhaps the path to fftw3q headers is already present in your"
+              "CPATH/C(PLUS)_INCLUDE_PATH environment variables.")
+          endif()
+        endif()
+        if(FFTW3Q_LIBRARY_DIRS)
+          list(APPEND FFTW_LIBRARY_DIRS "${FFTW3Q_LIBRARY_DIRS}")
+        endif()
+      else(FFTW3Q_FOUND)
+        if (NOT FFTW_FIND_QUIETLY)
+          message(STATUS "Looking for FFTW3Q - not found using PkgConfig."
+            "\n   Perhaps you should add the directory containing fftw3q.pc to"
+            "\n   the PKG_CONFIG_PATH environment variable.")
+        endif()
+      endif(FFTW3Q_FOUND)
+    else()
+      pkg_search_module(FFTW3 fftw3)
+      if (FFTW3_FOUND AND FFTW3_LIBRARIES)
+        find_pkgconfig_libraries_absolute_path(FFTW3)
+      endif()
+    endif()
+    if (FFTW3_FOUND)
+      if (NOT FFTW_FIND_QUIETLY)
+        message(STATUS "Looking for FFTW3 - found using PkgConfig")
+      endif()
+      if (FFTW3_LIBRARIES)
+        find_pkgconfig_libraries_absolute_path(FFTW3)
+        list(APPEND FFTW_LIBRARIES "${FFTW3_LIBRARIES}")
+      endif()
+      if(FFTW3_INCLUDE_DIRS)
+            list(APPEND FFTW_INCLUDE_DIRS "${FFTW3_INCLUDE_DIRS}")
+      else()
+        if (NOT FFTW_FIND_QUIETLY)
+          message(WARNING "FFTW3_INCLUDE_DIRS is empty using PkgConfig."
+            "Perhaps the path to fftw3 headers is already present in your"
+            "CPATH/C(PLUS)_INCLUDE_PATH environment variables.")
+        endif()
+      endif()
+      if(FFTW3_LIBRARY_DIRS)
+            list(APPEND FFTW_LIBRARY_DIRS "${FFTW3_LIBRARY_DIRS}")
+      endif()
+    else(FFTW3_FOUND)
+      if (NOT FFTW_FIND_QUIETLY)
+        message(STATUS "Looking for FFTW3 - not found using PkgConfig."
+          "\n   Perhaps you should add the directory containing fftw3.pc to"
+          "\n   the PKG_CONFIG_PATH environment variable.")
+      endif()
+    endif(FFTW3_FOUND)
+
+    if (FFTW_FOUND AND FFTW_LIBRARIES)
+      set(FFTW_FOUND_WITH_PKGCONFIG "TRUE")
+    else()
+      set(FFTW_FOUND_WITH_PKGCONFIG "FALSE")
+    endif()
+
+  endif( PKG_CONFIG_EXECUTABLE AND NOT FFTW_GIVEN_BY_USER )
+
+endif(NOT FFTW_LOOK_FOR_MKL AND NOT FFTW_LOOK_FOR_ESSL)
+
+if( (NOT PKG_CONFIG_EXECUTABLE) OR
+    (PKG_CONFIG_EXECUTABLE AND NOT FFTW_FOUND) OR
+    FFTW_GIVEN_BY_USER OR
+    FFTW_LOOK_FOR_MKL  OR
+    FFTW_LOOK_FOR_ESSL
+    )
+
+  # Looking for include
+  # -------------------
+
+  # Add system include paths to search include
+  # ------------------------------------------
+  unset(_inc_env)
+  set(ENV_MKLROOT "$ENV{MKLROOT}")
+  set(ENV_FFTW_DIR "$ENV{FFTW_DIR}")
+  set(ENV_FFTW_INCDIR "$ENV{FFTW_INCDIR}")
+  if(ENV_FFTW_INCDIR)
+    list(APPEND _inc_env "${ENV_FFTW_INCDIR}")
+  elseif(ENV_FFTW_DIR)
+    list(APPEND _inc_env "${ENV_FFTW_DIR}")
+    list(APPEND _inc_env "${ENV_FFTW_DIR}/include")
+    list(APPEND _inc_env "${ENV_FFTW_DIR}/include/fftw")
+  else()
+    if (ENV_MKLROOT)
+      list(APPEND _inc_env "${ENV_MKLROOT}/include/fftw")
+    endif()
+    # system variables
+    if(WIN32)
+      string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}")
+      list(APPEND _inc_env "${_path_env}")
+    else()
+      string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}")
+      list(APPEND _inc_env "${_path_env}")
+      string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}")
+      list(APPEND _inc_env "${_path_env}")
+      string(REPLACE ":" ";" _path_env "$ENV{CPATH}")
+      list(APPEND _inc_env "${_path_env}")
+      string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}")
+      list(APPEND _inc_env "${_path_env}")
+    endif()
+  endif()
+  list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}")
+  list(REMOVE_DUPLICATES _inc_env)
+
+  # set paths where to look for
+  set(PATH_TO_LOOK_FOR "${_inc_env}")
+
+  if (FFTW_LOOK_FOR_ESSL)
+    set(FFTW3_HEADER_TO_FIND "fftw3_essl.h")
+  else()
+    set(FFTW3_HEADER_TO_FIND "fftw3.h")
+  endif()
+
+  # Try to find the fftw header in the given paths
+  # -------------------------------------------------
+  # call cmake macro to find the header path
+  if(FFTW_INCDIR)
+    set(FFTW_${FFTW3_HEADER_TO_FIND}_DIRS "FFTW_${FFTW3_HEADER_TO_FIND}_DIRS-NOTFOUND")
+    find_path(FFTW_${FFTW3_HEADER_TO_FIND}_DIRS
+      NAMES ${FFTW3_HEADER_TO_FIND}
+      HINTS ${FFTW_INCDIR})
+  else()
+    if(FFTW_DIR)
+      set(FFTW_${FFTW3_HEADER_TO_FIND}_DIRS "FFTW_${FFTW3_HEADER_TO_FIND}_DIRS-NOTFOUND")
+      find_path(FFTW_${FFTW3_HEADER_TO_FIND}_DIRS
+        NAMES ${FFTW3_HEADER_TO_FIND}
+        HINTS ${FFTW_DIR}
+        PATH_SUFFIXES "include" "include/fftw")
+    else()
+      set(FFTW_${FFTW3_HEADER_TO_FIND}_DIRS "FFTW_${FFTW3_HEADER_TO_FIND}_DIRS-NOTFOUND")
+      find_path(FFTW_${FFTW3_HEADER_TO_FIND}_DIRS
+        NAMES ${FFTW3_HEADER_TO_FIND}
+        HINTS ${PATH_TO_LOOK_FOR}
+        PATH_SUFFIXES "fftw")
+    endif()
+  endif()
+  mark_as_advanced(FFTW_${FFTW3_HEADER_TO_FIND}_DIRS)
+
+  # Add path to cmake variable
+  # ------------------------------------
+  if (FFTW_${FFTW3_HEADER_TO_FIND}_DIRS)
+    set(FFTW_INCLUDE_DIRS "${FFTW_${FFTW3_HEADER_TO_FIND}_DIRS}")
+  else ()
+    set(FFTW_INCLUDE_DIRS "FFTW_INCLUDE_DIRS-NOTFOUND")
+    if(NOT FFTW_FIND_QUIETLY)
+      message(STATUS "Looking for FFTW -- ${FFTW3_HEADER_TO_FIND} not found")
+    endif()
+  endif ()
+
+
+  # Looking for lib
+  # ---------------
+
+  # Add system library paths to search lib
+  # --------------------------------------
+  unset(_lib_env)
+  set(ENV_FFTW_LIBDIR "$ENV{FFTW_LIBDIR}")
+  if(ENV_FFTW_LIBDIR)
+    list(APPEND _lib_env "${ENV_FFTW_LIBDIR}")
+  elseif(ENV_FFTW_DIR)
+    list(APPEND _lib_env "${ENV_FFTW_DIR}")
+    list(APPEND _lib_env "${ENV_FFTW_DIR}/lib")
+    if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
+      list(APPEND _lib_env "${ENV_FFTW_DIR}/lib64")
+      list(APPEND _lib_env "${ENV_FFTW_DIR}/lib/intel64")
+    else()
+      list(APPEND _lib_env "${ENV_FFTW_DIR}/lib32")
+      list(APPEND _lib_env "${ENV_FFTW_DIR}/lib/ia32")
+    endif()
+  else()
+    if (ENV_MKLROOT)
+      list(APPEND _lib_env "${ENV_MKLROOT}/lib")
+      if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
+        list(APPEND _lib_env "${ENV_MKLROOT}/lib64")
+        list(APPEND _lib_env "${ENV_MKLROOT}/lib/intel64")
+      else()
+        list(APPEND _lib_env "${ENV_MKLROOT}/lib32")
+        list(APPEND _lib_env "${ENV_MKLROOT}/lib/ia32")
+      endif()
+    endif()
+    list(APPEND _lib_env "$ENV{LIBRARY_PATH}")
+    if(WIN32)
+      string(REPLACE ":" ";" _lib_env2 "$ENV{LIB}")
+    elseif(APPLE)
+      string(REPLACE ":" ";" _lib_env2 "$ENV{DYLD_LIBRARY_PATH}")
+    else()
+      string(REPLACE ":" ";" _lib_env2 "$ENV{LD_LIBRARY_PATH}")
+    endif()
+    list(APPEND _lib_env "${_lib_env2}")
+    list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}")
+  endif()
+  list(REMOVE_DUPLICATES _lib_env)
+
+  # set paths where to look for
+  set(PATH_TO_LOOK_FOR "${_lib_env}")
+
+  if(FFTW_LOOK_FOR_FFTW_SIMPLE)
+    set(FFTW_PREC "f")
+    set(FFTW_PREC_TESTFUNC "s")
+  elseif(FFTW_LOOK_FOR_FFTW_LONG)
+    set(FFTW_PREC "l")
+    set(FFTW_PREC_TESTFUNC "l")
+  elseif(FFTW_LOOK_FOR_FFTW_QUAD)
+    set(FFTW_PREC "q")
+    set(FFTW_PREC_TESTFUNC "q")
+  else()
+    set(FFTW_PREC "")
+    set(FFTW_PREC_TESTFUNC "d")
+  endif()
+
+  set(FFTW_LIBRARIES "")
+  set(FFTW_LIBRARY_DIRS "")
+
+  if(NOT FFTW_LOOK_FOR_MKL)
+
+    if (FFTW_LOOK_FOR_THREADS)
+      set(FFTW_libs_to_find "fftw3${FFTW_PREC}_threads;fftw3${FFTW_PREC};fftw3")
+    elseif (FFTW_LOOK_FOR_OMP)
+      set(FFTW_libs_to_find "fftw3${FFTW_PREC}_omp;fftw3${FFTW_PREC};fftw3")
+    else()
+      set(FFTW_libs_to_find "fftw3${FFTW_PREC};fftw3")
+    endif()
+    if (FFTW_LOOK_FOR_FFTW_QUAD)
+      if (NOT FFTW_LOOK_FOR_MKL AND NOT FFTW_LOOK_FOR_ESSL)
+        list(APPEND FFTW_libs_to_find "quadmath")
+      endif()
+    endif()
+
+    if (FFTW_LOOK_FOR_ESSL)
+      set(FFTW_libs_to_find "fftw3_essl")
+    endif()
+
+    # Try to find the fftw lib in the given paths
+    # ----------------------------------------------
+
+    # call cmake macro to find the lib path
+    if(FFTW_LIBDIR)
+      foreach(fftw_lib ${FFTW_libs_to_find})
+        set(FFTW_${fftw_lib}_LIBRARY "FFTW_${fftw_lib}_LIBRARY-NOTFOUND")
+        find_library(FFTW_${fftw_lib}_LIBRARY
+          NAMES ${fftw_lib}
+          HINTS ${FFTW_LIBDIR})
+      endforeach()
+    else()
+      if(FFTW_DIR)
+        foreach(fftw_lib ${FFTW_libs_to_find})
+          set(FFTW_${fftw_lib}_LIBRARY "FFTW_${fftw_lib}_LIBRARY-NOTFOUND")
+          find_library(FFTW_${fftw_lib}_LIBRARY
+            NAMES ${fftw_lib}
+            HINTS ${FFTW_DIR}
+            PATH_SUFFIXES lib lib32 lib64)
+        endforeach()
+      else()
+        foreach(fftw_lib ${FFTW_libs_to_find})
+          set(FFTW_${fftw_lib}_LIBRARY "FFTW_${fftw_lib}_LIBRARY-NOTFOUND")
+          find_library(FFTW_${fftw_lib}_LIBRARY
+            NAMES ${fftw_lib}
+            HINTS ${PATH_TO_LOOK_FOR})
+        endforeach()
+      endif()
+    endif()
+
+    # If found, add path to cmake variable
+    # ------------------------------------
+    foreach(fftw_lib ${FFTW_libs_to_find})
+
+      if (FFTW_${fftw_lib}_LIBRARY)
+        get_filename_component(${fftw_lib}_lib_path "${FFTW_${fftw_lib}_LIBRARY}" PATH)
+        # set cmake variables
+        list(APPEND FFTW_LIBRARIES "${FFTW_${fftw_lib}_LIBRARY}")
+        list(APPEND FFTW_LIBRARY_DIRS "${${fftw_lib}_lib_path}")
+      else ()
+        list(APPEND FFTW_LIBRARIES "${FFTW_${fftw_lib}_LIBRARY}")
+        if (NOT FFTW_FIND_QUIETLY)
+          message(STATUS "Looking for FFTW -- lib ${fftw_lib} not found")
+        endif()
+      endif ()
+      mark_as_advanced(FFTW_${fftw_lib}_LIBRARY)
+
+    endforeach()
+
+    # check if one lib is NOTFOUND
+    foreach(lib ${FFTW_LIBRARIES})
+      if (NOT lib)
+        set(FFTW_LIBRARIES "FFTW_LIBRARIES-NOTFOUND")
+      endif()
+    endforeach()
+
+  endif(NOT FFTW_LOOK_FOR_MKL)
+
+  if (FFTW_LOOK_FOR_MKL OR FFTW_LOOK_FOR_ESSL)
+
+    # FFTW relies on blas libs
+    if (FFTW_LOOK_FOR_THREADS)
+      if (FFTW_LOOK_FOR_MKL)
+        if (BLAS_LIBRARIES_PAR)
+          list(APPEND FFTW_LIBRARIES "${BLAS_LIBRARIES_PAR}")
+          if (NOT FFTW_FIND_QUIETLY)
+            message(STATUS "Multithreaded FFTW has been found: ${FFTW_LIBRARIES}")
+          endif()
+        else()
+          if (NOT FFTW_FIND_QUIETLY)
+            if (FFTW_FIND_REQUIRED AND FFTW_FIND_REQUIRED_MKL)
+              message(FATAL_ERROR "FFTW is required but not found.")
+            else()
+              message(STATUS "Multithreaded FFTW not found.")
+            endif()
+          endif()
+        endif(BLAS_LIBRARIES_PAR)
+      elseif (FFTW_LOOK_FOR_ESSL)
+        if (FFTW_LIBRARIES AND BLAS_LIBRARIES_PAR)
+          list(APPEND FFTW_LIBRARIES "${BLAS_LIBRARIES_PAR}")
+          if (NOT FFTW_FIND_QUIETLY)
+            message(STATUS "Multithreaded FFTW has been found: ${FFTW_LIBRARIES}")
+          endif()
+        else()
+          if (NOT FFTW_FIND_QUIETLY)
+            if (FFTW_FIND_REQUIRED AND FFTW_FIND_REQUIRED_MKL)
+              message(FATAL_ERROR "FFTW is required but not found.")
+            else()
+              message(STATUS "Multithreaded FFTW not found.")
+            endif()
+          endif()
+        endif(FFTW_LIBRARIES AND BLAS_LIBRARIES_PAR)
+      endif()
+    else(FFTW_LOOK_FOR_THREADS)
+      if (FFTW_LOOK_FOR_MKL)
+        if (BLAS_LIBRARIES_SEQ)
+          list(APPEND FFTW_LIBRARIES "${BLAS_LIBRARIES_SEQ}")
+          if (NOT FFTW_FIND_QUIETLY)
+            message(STATUS "FFTW has been found: ${FFTW_LIBRARIES}")
+          endif()
+        else()
+          if (NOT FFTW_FIND_QUIETLY)
+            if (FFTW_FIND_REQUIRED AND FFTW_FIND_REQUIRED_MKL)
+              message(FATAL_ERROR "FFTW is required but not found.")
+            else()
+              message(STATUS "FFTW not found.")
+            endif()
+          endif()
+        endif(BLAS_LIBRARIES_SEQ)
+      elseif (FFTW_LOOK_FOR_ESSL)
+        if (FFTW_LIBRARIES AND BLAS_LIBRARIES_SEQ)
+          list(APPEND FFTW_LIBRARIES "${BLAS_LIBRARIES_SEQ}")
+          if (NOT FFTW_FIND_QUIETLY)
+            message(STATUS "FFTW has been found: ${FFTW_LIBRARIES}")
+          endif()
+        else()
+          if (NOT FFTW_FIND_QUIETLY)
+            if (FFTW_FIND_REQUIRED AND FFTW_FIND_REQUIRED_MKL)
+              message(FATAL_ERROR "FFTW is required but not found.")
+            else()
+              message(STATUS "FFTW not found.")
+            endif()
+          endif()
+        endif(FFTW_LIBRARIES AND BLAS_LIBRARIES_SEQ)
+      endif()
+    endif(FFTW_LOOK_FOR_THREADS)
+
+    if (BLAS_LIBRARY_DIRS)
+      list(APPEND FFTW_LIBRARY_DIRS "${BLAS_LIBRARY_DIRS}")
+    else()
+      if (NOT FFTW_FIND_QUIETLY)
+        message(WARNING "FFTW_LIBRARY_DIRS may not be complete because BLAS_LIBRARY_DIRS is empty.")
+      endif()
+    endif()
+
+  endif(FFTW_LOOK_FOR_MKL OR FFTW_LOOK_FOR_ESSL)
+
+  list(REMOVE_DUPLICATES FFTW_INCLUDE_DIRS)
+  list(REMOVE_DUPLICATES FFTW_LIBRARY_DIRS)
+
+  # check if one lib is NOTFOUND
+  foreach(lib ${FFTW_LIBRARIES})
+    if (NOT lib)
+      set(FFTW_LIBRARIES "FFTW_LIBRARIES-NOTFOUND")
+    endif()
+  endforeach()
+
+endif( (NOT PKG_CONFIG_EXECUTABLE) OR
+  (PKG_CONFIG_EXECUTABLE AND NOT FFTW_FOUND) OR
+  FFTW_GIVEN_BY_USER OR
+  FFTW_LOOK_FOR_MKL  OR
+  FFTW_LOOK_FOR_ESSL
+  )
+
+# check a function to validate the find
+if(FFTW_LIBRARIES)
+
+  set(REQUIRED_FLAGS)
+  set(REQUIRED_LDFLAGS)
+  set(REQUIRED_INCDIRS)
+  set(REQUIRED_LIBDIRS)
+  set(REQUIRED_LIBS)
+
+  # FFTW
+  if (FFTW_INCLUDE_DIRS)
+    set(REQUIRED_INCDIRS "${FFTW_INCLUDE_DIRS}")
+  endif()
+  if (FFTW_CFLAGS_OTHER)
+    set(REQUIRED_FLAGS "${FFTW_CFLAGS_OTHER}")
+  endif()
+  if (FFTW_LDFLAGS_OTHER)
+    set(REQUIRED_LDFLAGS "${FFTW_LDFLAGS_OTHER}")
+  endif()
+  if (FFTW_LIBRARY_DIRS)
+    set(REQUIRED_LIBDIRS "${FFTW_LIBRARY_DIRS}")
+  endif()
+  set(REQUIRED_LIBS "${FFTW_LIBRARIES}")
+  # THREADS
+  if (FFTW_LOOK_FOR_THREADS)
+    list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}")
+  endif()
+  # OMP
+  if(FFTW_LOOK_FOR_OMP)
+    list(APPEND REQUIRED_FLAGS "${OPENMP_C_FLAGS}")
+  endif()
+  # MKL
+  if(FFTW_LOOK_FOR_MKL)
+    list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}")
+    if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
+      list(APPEND REQUIRED_LDFLAGS "-Wl,--no-as-needed")
+    endif()
+  endif()
+  # m
+  find_library(M_LIBRARY NAMES m)
+  mark_as_advanced(M_LIBRARY)
+  if(M_LIBRARY)
+    list(APPEND REQUIRED_LIBS "-lm")
+  endif()
+
+  # set required libraries for link
+  set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}")
+  if (REQUIRED_FLAGS)
+    set(REQUIRED_FLAGS_COPY "${REQUIRED_FLAGS}")
+    set(REQUIRED_FLAGS)
+    set(REQUIRED_DEFINITIONS)
+    foreach(_flag ${REQUIRED_FLAGS_COPY})
+      if (_flag MATCHES "^-D")
+       list(APPEND REQUIRED_DEFINITIONS "${_flag}")
+      endif()
+      string(REGEX REPLACE "^-D.*" "" _flag "${_flag}")
+      list(APPEND REQUIRED_FLAGS "${_flag}")
+    endforeach()
+  endif()
+  finds_remove_duplicates()
+  set(CMAKE_REQUIRED_DEFINITIONS "${REQUIRED_DEFINITIONS}")
+  set(CMAKE_REQUIRED_FLAGS "${REQUIRED_FLAGS}")
+  set(CMAKE_REQUIRED_LIBRARIES)
+  list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LDFLAGS}")
+  list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}")
+  list(APPEND CMAKE_REQUIRED_FLAGS "${REQUIRED_FLAGS}")
+  string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}")
+
+  # test link
+  unset(FFTW_WORKS CACHE)
+  include(CheckFunctionExists)
+  if (FFTW_LOOK_FOR_ESSL)
+    check_function_exists(${FFTW_PREC_TESTFUNC}fftw_execute FFTW_WORKS)
+  else()
+    check_function_exists(${FFTW_PREC_TESTFUNC}fftw_execute_ FFTW_WORKS)
+  endif()
+  mark_as_advanced(FFTW_WORKS)
+
+  if(FFTW_WORKS)
+    # save link with dependencies
+    set(FFTW_LIBRARIES_DEP "${REQUIRED_LIBS}")
+    set(FFTW_LIBRARY_DIRS_DEP "${REQUIRED_LIBDIRS}")
+    set(FFTW_INCLUDE_DIRS_DEP "${REQUIRED_INCDIRS}")
+    set(FFTW_CFLAGS_OTHER_DEP "${REQUIRED_FLAGS}")
+    set(FFTW_LDFLAGS_OTHER_DEP "${REQUIRED_LDFLAGS}")
+  else()
+    if(NOT FFTW_FIND_QUIETLY)
+      message(STATUS "Looking for FFTW : test of ${FFTW_PREC_TESTFUNC}fftw_execute_ with fftw library fails")
+      message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}")
+      message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}")
+      message(STATUS "CMAKE_REQUIRED_FLAGS: ${CMAKE_REQUIRED_FLAGS}")
+      message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails")
+    endif()
+  endif()
+  set(CMAKE_REQUIRED_INCLUDES)
+  set(CMAKE_REQUIRED_FLAGS)
+  set(CMAKE_REQUIRED_LIBRARIES)
+endif(FFTW_LIBRARIES)
+
+if (FFTW_LIBRARIES)
+  list(GET FFTW_LIBRARIES 0 first_lib)
+  get_filename_component(first_lib_path "${first_lib}" PATH)
+  if (NOT FFTW_LIBRARY_DIRS)
+    set(FFTW_LIBRARY_DIRS "${first_lib_path}")
+  endif()
+  if (${first_lib_path} MATCHES "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)")
+    string(REGEX REPLACE "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)" "" not_cached_dir "${first_lib_path}")
+    set(FFTW_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of FFTW library" FORCE)
+  else()
+    set(FFTW_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of FFTW library" FORCE)
+  endif()
+endif()
+mark_as_advanced(FFTW_DIR)
+mark_as_advanced(FFTW_DIR_FOUND)
+
+# check that FFTW has been found
+# -------------------------------
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(FFTW DEFAULT_MSG
+  FFTW_LIBRARIES
+  FFTW_WORKS)
diff --git a/cmake/morse/FindHeadersAndLibs.cmake b/cmake/morse/FindHeadersAndLibs.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..64144bdbf8a35f966f1ac802e5765e6ad81abf7c
--- /dev/null
+++ b/cmake/morse/FindHeadersAndLibs.cmake
@@ -0,0 +1,94 @@
+###
+#
+# @copyright (c) 2009-2014 The University of Tennessee and The University
+#                          of Tennessee Research Foundation.
+#                          All rights reserved.
+# @copyright (c) 2012-2014 Inria. All rights reserved.
+# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+#
+###
+#
+#  @file FindHeadersAndLibs.cmake
+#
+#  @project MORSE
+#  MORSE is a software package provided by:
+#     Inria Bordeaux - Sud-Ouest,
+#     Univ. of Tennessee,
+#     King Abdullah Univesity of Science and Technology
+#     Univ. of California Berkeley,
+#     Univ. of Colorado Denver.
+#
+#  @version 0.9.0
+#  @author Cedric Castagnede
+#  @author Emmanuel Agullo
+#  @author Mathieu Faverge
+#  @author Florent Pruvost
+#  @date 13-07-2012
+#
+###
+
+# Some macros to print status when search for headers and libs
+include(PrintFindStatus)
+
+function(FindHeader _libname _header_to_find)
+
+  # save _libname upper and lower case
+  string(TOUPPER ${_libname} LIBNAME)
+  string(TOLOWER ${_libname} libname)
+
+  # Looking for include
+  # -------------------
+
+  # Add system include paths to search include
+  # ------------------------------------------
+  unset(_inc_env)
+  if(WIN32)
+    string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}")
+  else()
+    string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}")
+    list(APPEND _inc_env "${_path_env}")
+    string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}")
+    list(APPEND _inc_env "${_path_env}")
+    string(REPLACE ":" ";" _path_env "$ENV{CPATH}")
+    list(APPEND _inc_env "${_path_env}")
+    string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}")
+    list(APPEND _inc_env "${_path_env}")
+  endif()
+  list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}")
+  list(REMOVE_DUPLICATES _inc_env)
+
+
+  # Try to find the _header_to_find in the given paths
+  # --------------------------------------------------
+  # call cmake macro to find the header path
+  if(${LIBNAME}_INCDIR)
+    set(${LIBNAME}_${_header_to_find}_DIRS "${LIBNAME}_${_header_to_find}_DIRS-NOTFOUND")
+    find_path(${LIBNAME}_${_header_to_find}_DIRS
+      NAMES ${_header_to_find}
+      HINTS ${${LIBNAME}_INCDIR})
+  elseif(${LIBNAME}_DIR)
+    set(${LIBNAME}_${_header_to_find}_DIRS "${LIBNAME}_${_header_to_find}_DIRS-NOTFOUND")
+    find_path(${LIBNAME}_${_header_to_find}_DIRS
+      NAMES ${_header_to_find}
+      HINTS ${${LIBNAME}_DIR}
+      PATH_SUFFIXES include)
+  else()
+    set(${LIBNAME}_${_header_to_find}_DIRS "${LIBNAME}_${_header_to_find}_DIRS-NOTFOUND")
+    find_path(${LIBNAME}_${_header_to_find}_DIRS
+      NAMES ${_header_to_find}
+      HINTS ${_inc_env})
+  endif()
+  mark_as_advanced(${LIBNAME}_${_header_to_find}_DIRS)
+
+  # Print status if not found
+  # -------------------------
+  if (NOT ${LIBNAME}_${_header_to_find}_DIRS)
+    Print_Find_Header_Status(${libname} ${_header_to_find})
+  endif ()
+
+endfunction(FindHeader)
+
+
+##
+## @end file FindHeadersAndLibs.cmake
+##
diff --git a/cmake/morse/FindInit.cmake b/cmake/morse/FindInit.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..e59d41a077848029e04065d5f46bba57bcf0277d
--- /dev/null
+++ b/cmake/morse/FindInit.cmake
@@ -0,0 +1,45 @@
+###
+#
+# @copyright (c) 2018 Inria. All rights reserved.
+#
+###
+#
+#  @file FindInit.cmake
+#
+#  @project MORSE
+#  MORSE is a software package provided by:
+#     Inria Bordeaux - Sud-Ouest,
+#     Univ. of Tennessee,
+#     King Abdullah Univesity of Science and Technology
+#     Univ. of California Berkeley,
+#     Univ. of Colorado Denver.
+#
+#  @version 1.0.0
+#  @author Florent Pruvost
+#  @date 24-04-2018
+#
+###
+
+
+# This include is required to check symbols of libs
+include(CheckFunctionExists)
+
+# This include is required to check defines in headers
+include(CheckIncludeFiles)
+
+# Factorize some piece of code
+include(FindCommon)
+
+# To find headers and libs
+include(FindHeadersAndLibs)
+
+# To transform relative path into absolute for a list of libraries
+include(LibrariesAbsolutePath)
+include(FindPkgconfigLibrariesAbsolutePath)
+
+# Some macros to print status when search for headers and libs
+include(PrintFindStatus)
+
+##
+## @end file FindInit.cmake
+##
diff --git a/cmake/morse/FindPkgconfigLibrariesAbsolutePath.cmake b/cmake/morse/FindPkgconfigLibrariesAbsolutePath.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..51b08ce59853459f493a0892874f71678467c392
--- /dev/null
+++ b/cmake/morse/FindPkgconfigLibrariesAbsolutePath.cmake
@@ -0,0 +1,99 @@
+###
+#
+# @copyright (c) 2018 Inria. All rights reserved.
+#
+###
+#
+#  @file FindPkgconfigLibrariesAbsolutePath.cmake
+#
+#  @project MORSE
+#  MORSE is a software package provided by:
+#     Inria Bordeaux - Sud-Ouest,
+#     Univ. of Tennessee,
+#     King Abdullah Univesity of Science and Technology
+#     Univ. of California Berkeley,
+#     Univ. of Colorado Denver.
+#
+#  @version 1.0.0
+#  @author Florent Pruvost
+#  @date 06-04-2018
+#
+###
+
+# Transform relative path into absolute path for libraries found with the
+# pkg_search_module cmake macro
+# _prefix: the name of the CMake variable used when pkg_search_module was called
+# e.g. for pkg_search_module(BLAS blas) _prefix would be BLAS
+macro(FIND_PKGCONFIG_LIBRARIES_ABSOLUTE_PATH _prefix)
+  list(APPEND _lib_env "$ENV{LIBRARY_PATH}")
+  if(WIN32)
+    string(REPLACE ":" ";" _lib_env2 "$ENV{LIB}")
+  elseif(APPLE)
+    string(REPLACE ":" ";" _lib_env2 "$ENV{DYLD_LIBRARY_PATH}")
+  else()
+    string(REPLACE ":" ";" _lib_env2 "$ENV{LD_LIBRARY_PATH}")
+  endif()
+  list(APPEND _lib_env "${_lib_env2}")
+  list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}")
+  # non static case
+  set(${_prefix}_LIBRARIES_COPY "${${_prefix}_LIBRARIES}")
+  set(${_prefix}_LIBRARIES "")
+  foreach(_library ${${_prefix}_LIBRARIES_COPY})
+    if(EXISTS "${_library}")
+      list(APPEND ${_prefix}_LIBRARIES ${_library})
+    else()
+      get_filename_component(_ext "${_library}" EXT)
+      set(_lib_extensions ".so" ".a" ".dyld" ".dll")
+      list(FIND _lib_extensions "${_ext}" _index)
+      if (${_index} GREATER -1)
+        get_filename_component(_library "${_library}" NAME_WE)
+      endif()
+      find_library(_library_path NAMES ${_library}
+          HINTS ${${_prefix}_LIBDIR} ${${_prefix}_LIBRARY_DIRS} ${_lib_env})
+      if (_library_path)
+          list(APPEND ${_prefix}_LIBRARIES ${_library_path})
+      else()
+          message(FATAL_ERROR "Dependency of ${_prefix} '${_library}' NOT FOUND")
+      endif()
+      unset(_library_path CACHE)
+    endif()
+  endforeach()
+  set (${_prefix}_LIBRARIES "${${_prefix}_LIBRARIES}" CACHE INTERNAL "" FORCE)
+  ## static case
+  #set(${_prefix}_STATIC_LIBRARIES_COPY "${${_prefix}_STATIC_LIBRARIES}")
+  #set(${_prefix}_STATIC_LIBRARIES "")
+  #foreach(_library ${${_prefix}_STATIC_LIBRARIES_COPY})
+  #  if(EXISTS "${_library}")
+  #    list(APPEND ${_prefix}_STATIC_LIBRARIES ${_library})
+  #  else()
+  #    get_filename_component(_ext "${_library}" EXT)
+  #    set(_lib_extensions ".so" ".a" ".dyld" ".dll")
+  #    list(FIND _lib_extensions "${_ext}" _index)
+  #    if (${_index} GREATER -1)
+  #      get_filename_component(_library "${_library}" NAME_WE)
+  #    endif()
+  #    # try static first
+  #    set (default_find_library_suffixes ${CMAKE_FIND_LIBRARY_SUFFIXES})
+  #    set (CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_STATIC_LIBRARY_SUFFIX})
+  #    find_library(_library_path NAMES ${_library}
+  #        HINTS ${${_prefix}_STATIC_LIBDIR} ${${_prefix}_STATIC_LIBRARY_DIRS} ${_lib_env})
+  #    set (CMAKE_FIND_LIBRARY_SUFFIXES ${default_find_library_suffixes})
+  #    # if not found try dynamic
+  #    if (NOT _library_path)
+  #      find_library(_library_path NAMES ${_library}
+  #          HINTS ${${_prefix}_STATIC_LIBDIR} ${${_prefix}_STATIC_LIBRARY_DIRS} ${_lib_env})
+  #    endif()
+  #    if (_library_path)
+  #        list(APPEND ${_prefix}_STATIC_LIBRARIES ${_library_path})
+  #    else()
+  #        message(FATAL_ERROR "Dependency of ${_prefix} '${_library}' NOT FOUND")
+  #    endif()
+  #    unset(_library_path CACHE)
+  #  endif()
+  #endforeach()
+  #set (${_prefix}_STATIC_LIBRARIES "${${_prefix}_STATIC_LIBRARIES}" CACHE INTERNAL "" FORCE)
+endmacro()
+
+##
+## @end file FindPkgconfigLibrariesAbsolutePath.cmake
+##
diff --git a/cmake/morse/LICENCE.txt b/cmake/morse/LICENCE.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b95821f36afa3579a5f1de4fe840aec43b7a4b96
--- /dev/null
+++ b/cmake/morse/LICENCE.txt
@@ -0,0 +1,42 @@
+###
+#
+# @copyright (c) 2009-2014 The University of Tennessee and The University
+#                          of Tennessee Research Foundation.
+#                          All rights reserved.
+# @copyright (c) 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+#                          Univ. Bordeaux. All rights reserved.
+# @copyright (c) 2016      KAUST. All rights reserved.
+#
+###
+#
+# This software is a computer program whose purpose is to process
+# Matrices Over Runtime Systems @ Exascale (MORSE). More information
+# can be found on the following website: http://www.inria.fr/en/teams/morse.
+#
+# This software is governed by the CeCILL-C license under French law and
+# abiding by the rules of distribution of free software.  You can  use,
+# modify and/ or redistribute the software under the terms of the CeCILL-C
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability.
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and,  more generally, to use and operate it in the
+# same conditions as regards security.
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL-C license and that you accept its terms.
+#
+###
diff --git a/cmake/morse/LibrariesAbsolutePath.cmake b/cmake/morse/LibrariesAbsolutePath.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..7aaab504d7348090e36c502755020b0b1439f123
--- /dev/null
+++ b/cmake/morse/LibrariesAbsolutePath.cmake
@@ -0,0 +1,70 @@
+###
+#
+# @copyright (c) 2018 Inria. All rights reserved.
+#
+###
+#
+#  @file LibrariesAbsolutePath.cmake
+#
+#  @project MORSE
+#  MORSE is a software package provided by:
+#     Inria Bordeaux - Sud-Ouest,
+#     Univ. of Tennessee,
+#     King Abdullah Univesity of Science and Technology
+#     Univ. of California Berkeley,
+#     Univ. of Colorado Denver.
+#
+#  @version 1.0.0
+#  @author Florent Pruvost
+#  @date 13-04-2018
+#
+###
+
+# Transform relative path into absolute path for libraries
+# lib_list (input/output): the name of the CMake variable containing libraries, e.g. BLAS_LIBRARIES
+# hints_paths (input): additional paths to add when looking for libraries
+macro(LIBRARIES_ABSOLUTE_PATH lib_list hints_paths)
+  # collect environment paths to dig 
+  list(APPEND _lib_env "$ENV{LIBRARY_PATH}")
+  if(WIN32)
+    string(REPLACE ":" ";" _lib_env2 "$ENV{LIB}")
+  elseif(APPLE)
+    string(REPLACE ":" ";" _lib_env2 "$ENV{DYLD_LIBRARY_PATH}")
+  else()
+    string(REPLACE ":" ";" _lib_env2 "$ENV{LD_LIBRARY_PATH}")
+  endif()
+  list(APPEND _lib_env "${_lib_env2}")
+  list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}")
+  # copy the lib list 
+  set (${lib_list}_COPY "${${lib_list}}")
+  # reset the lib list to populate
+  set(${lib_list} "")
+  foreach(_library ${${lib_list}_COPY})
+    if(EXISTS "${_library}")
+      # if already an absolute path, nothing special to do
+      list(APPEND ${lib_list} ${_library})
+    else()
+      # replace pattern -lfoo -> foo
+      string(REGEX REPLACE "^-l" "" _library "${_library}")
+      # remove extensions if exist
+      get_filename_component(_ext "${_library}" EXT)
+      set(_lib_extensions ".so" ".a" ".dyld" ".dll")
+      list(FIND _lib_extensions "${_ext}" _index)
+      if (${_index} GREATER -1)
+        get_filename_component(_library "${_library}" NAME_WE)
+      endif()
+      # try to find the lib
+      find_library(_library_path NAMES ${_library} HINTS ${hints_paths} ${_lib_env})
+      if (_library_path)
+          list(APPEND ${lib_list} ${_library_path})
+      else()
+          message(FATAL_ERROR "Dependency of ${lib_list} '${_library}' NOT FOUND")
+      endif()
+      unset(_library_path CACHE)
+    endif()
+  endforeach()
+endmacro()
+
+##
+## @end file LibrariesAbsolutePath.cmake
+##
diff --git a/cmake/morse/MorseInit.cmake b/cmake/morse/MorseInit.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..fc51170401cc17045854de4ee51f477eff1b66c1
--- /dev/null
+++ b/cmake/morse/MorseInit.cmake
@@ -0,0 +1,67 @@
+###
+#
+# @copyright (c) 2009-2014 The University of Tennessee and The University
+#                          of Tennessee Research Foundation.
+#                          All rights reserved.
+# @copyright (c) 2012-2018 Inria. All rights reserved.
+# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+#
+###
+#
+#  @file MorseInit.cmake
+#
+#  @project MORSE
+#  MORSE is a software package provided by:
+#     Inria Bordeaux - Sud-Ouest,
+#     Univ. of Tennessee,
+#     King Abdullah Univesity of Science and Technology
+#     Univ. of California Berkeley,
+#     Univ. of Colorado Denver.
+#
+#  @version 1.0.0
+#  @author Cedric Castagnede
+#  @author Emmanuel Agullo
+#  @author Mathieu Faverge
+#  @author Florent Pruvost
+#  @date 13-07-2012
+#
+###
+
+# Path to Morse modules
+get_filename_component(MORSE_CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_FILE} DIRECTORY CACHE)
+
+# Global Morse options
+option(MORSE_ENABLE_WARNING       "Enable warning messages" OFF)
+option(MORSE_ENABLE_COVERAGE      "Enable flags for coverage test" OFF)
+option(MORSE_ENABLE_COLOR_MESSAGE "Enable colors in messages" OFF)
+#option(MORSE_VERBOSE_FIND_PACKAGE "Add additional messages concerning packages not found" OFF)
+#message(STATUS "MORSE_VERBOSE_FIND_PACKAGE is set to OFF, turn it ON to get"
+#        "   information about packages not found")
+
+
+# This include is required to check symbols of libs in the main CMakeLists.txt
+include(CheckFunctionExists)
+
+# This include is required to check defines in headers
+include(CheckIncludeFiles)
+
+if (MORSE_ENABLE_COLOR_MESSAGE)
+  # colorize messages
+  include(ColorizeMessage)
+endif()
+
+# Define some auxilary flags
+include(AuxilaryFlags)
+
+# Define some variables to et info about ressources
+include(Ressources)
+
+# Add the path where we handle our FindFOO.cmake to seek for liraries
+list(APPEND CMAKE_MODULE_PATH ${MORSE_CMAKE_MODULE_PATH}/find)
+
+# To load some macros used in Finds (could be useful for other projects)
+include(FindInit)
+
+##
+## @end file MorseInit.cmake
+##
diff --git a/cmake/morse/PrintFindStatus.cmake b/cmake/morse/PrintFindStatus.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..1fdd403b7de11a8b946b178c5aada2da5e6fe33e
--- /dev/null
+++ b/cmake/morse/PrintFindStatus.cmake
@@ -0,0 +1,207 @@
+###
+#
+# @copyright (c) 2009-2014 The University of Tennessee and The University
+#                          of Tennessee Research Foundation.
+#                          All rights reserved.
+# @copyright (c) 2012-2014 Inria. All rights reserved.
+# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+#
+###
+#
+# - Some macros to print status when search for headers and libs
+# Main parameters of macros
+#  _libname: name of the lib you seek, foo for example
+#  _header_to_find: name of the header you seek, foo.h for example
+#  _lib_to_find: name of the library you seek, libfoo for example
+#  _pc_to_find: name of the pkg-config file zyou seek, foo.pc for example
+
+
+#=============================================================================
+# Copyright 2012-2013 Inria
+# Copyright 2012-2013 Emmanuel Agullo
+# Copyright 2012-2013 Mathieu Faverge
+# Copyright 2012      Cedric Castagnede
+# Copyright 2013      Florent Pruvost
+#
+# Distributed under the OSI-approved BSD License (the "License");
+# see accompanying file MORSE-Copyright.txt for details.
+#
+# This software is distributed WITHOUT ANY WARRANTY; without even the
+# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the License for more information.
+
+#=============================================================================
+# (To distribute this file outside of Morse, substitute the full
+#  License text for the above reference.)
+
+
+# Set some colors
+#if(NOT WIN32)
+#    string(ASCII 27 Esc)
+#    set(ColourReset "${Esc}[m")
+#    set(ColourBold  "${Esc}[1m")
+#    set(Red         "${Esc}[31m")
+#    set(Green       "${Esc}[32m")
+#    set(Yellow      "${Esc}[33m")
+#    set(Blue        "${Esc}[34m")
+#    set(Magenta     "${Esc}[35m")
+#    set(Cyan        "${Esc}[36m")
+#    set(White       "${Esc}[37m")
+#    set(BoldRed     "${Esc}[1;31m")
+#    set(BoldGreen   "${Esc}[1;32m")
+#    set(BoldYellow  "${Esc}[1;33m")
+#    set(BoldBlue    "${Esc}[1;34m")
+#    set(BoldMagenta "${Esc}[1;35m")
+#    set(BoldCyan    "${Esc}[1;36m")
+#    set(BoldWhite   "${Esc}[1;37m")
+#endif()
+
+
+# This macro informs why the _header_to_find file has not been found
+macro(Print_Find_Header_Status _libname _header_to_find)
+
+  # save _libname upper and lower case
+  string(TOUPPER ${_libname} LIBNAME)
+  string(TOLOWER ${_libname} libname)
+
+  # print status
+  #message(" ")
+  if(${LIBNAME}_INCDIR)
+    message("${Blue}${LIBNAME}_INCDIR is defined but ${_header_to_find}"
+      "has not been found in ${${LIBNAME}_INCDIR}${ColourReset}")
+  else()
+    if(${LIBNAME}_DIR)
+      message("${Blue}${LIBNAME}_DIR is defined but"
+	"${_header_to_find} has not been found in"
+	"${${LIBNAME}_DIR}/include${ColourReset}")
+    else()
+      message("${Blue}${_header_to_find} not found."
+	"Nor ${LIBNAME}_DIR neither ${LIBNAME}_INCDIR"
+	"are defined so that we looked for ${_header_to_find} in"
+	"system paths (INCLUDE, CPATH, C_INCLUDE_PATH,"
+	"INCLUDE_PATH, CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES)${ColourReset}")
+      if(_inc_env)
+	message("${Blue}${_header_to_find} has not been found in"
+	  "${_inc_env}${ColourReset}")
+      endif()
+    endif()
+  endif()
+  message("${BoldBlue}Please indicate where to find ${_header_to_find}. You have three options:\n"
+    "- Option 1: Provide the root directory of the library with cmake option: -D${LIBNAME}_DIR=your/path/to/${libname}/\n"
+    "- Option 2: Provide the directory where to find the headers with cmake option: -D${LIBNAME}_INCDIR=your/path/to/${libname}/include/\n"
+    "- Option 3: Update your environment variable (INCLUDE or CPATH)\n"
+    "- Option 4: If your library provides a PkgConfig file, make sure pkg-config finds your library${ColourReset}")
+  #message(" ")
+
+endmacro()
+
+# This macro informs why the _lib_to_find file has not been found
+macro(Print_Find_Library_Status _libname _lib_to_find)
+
+  # save _libname upper/lower case
+  string(TOUPPER ${_libname} LIBNAME)
+  string(TOLOWER ${_libname} libname)
+
+  # print status
+  #message(" ")
+  if(${LIBNAME}_LIBDIR)
+    message("${Yellow}${LIBNAME}_LIBDIR is defined but ${_lib_to_find}"
+      "has not been found in ${${LIBNAME}_LIBDIR}${ColourReset}")
+  else()
+    if(${LIBNAME}_DIR)
+      message("${Yellow}${LIBNAME}_DIR is defined but ${_lib_to_find}"
+	"has not been found in ${${LIBNAME}_DIR}/lib(or /lib32 or"
+	"/lib64)${ColourReset}")
+    else()
+      message("${Yellow}${_lib_to_find} not found."
+	"Nor ${LIBNAME}_DIR neither ${LIBNAME}_LIBDIR"
+	"are defined so that we looked for ${_lib_to_find} in"
+	"system paths (Linux: LD_LIBRARY_PATH, Windows: LIB,"
+	"Mac: DYLD_LIBRARY_PATH,"
+	"CMAKE_C_IMPLICIT_LINK_DIRECTORIES)${ColourReset}")
+      if(_lib_env)
+	message("${Yellow}${_lib_to_find} has not been found in"
+	  "${_lib_env}${ColourReset}")
+      endif()
+    endif()
+  endif()
+  message("${BoldYellow}Please indicate where to find ${_lib_to_find}. You have three options:\n"
+    "- Option 1: Provide the root directory of the library with cmake option: -D${LIBNAME}_DIR=your/path/to/${libname}/\n"
+    "- Option 2: Provide the directory where to find the library with cmake option: -D${LIBNAME}_LIBDIR=your/path/to/${libname}/lib/\n"
+    "- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n"
+    "- Option 4: If your library provides a PkgConfig file, make sure pkg-config finds your library${ColourReset}")
+
+endmacro()
+
+# This macro informs why the _lib_to_find file has not been found
+macro(Print_Find_Library_Blas_Status _libname _lib_to_find)
+
+  # save _libname upper/lower case
+  string(TOUPPER ${_libname} LIBNAME)
+  string(TOLOWER ${_libname} libname)
+
+  # print status
+  #message(" ")
+  if(${LIBNAME}_LIBDIR)
+    message("${Yellow}${LIBNAME}_LIBDIR is defined but ${_lib_to_find}"
+      "has not been found in ${ARGN}${ColourReset}")
+  else()
+    if(${LIBNAME}_DIR)
+      message("${Yellow}${LIBNAME}_DIR is defined but ${_lib_to_find}"
+	"has not been found in ${ARGN}${ColourReset}")
+    else()
+      message("${Yellow}${_lib_to_find} not found."
+	"Nor ${LIBNAME}_DIR neither ${LIBNAME}_LIBDIR"
+	"are defined so that we look for ${_lib_to_find} in"
+	"system paths (Linux: LD_LIBRARY_PATH, Windows: LIB,"
+	"Mac: DYLD_LIBRARY_PATH,"
+	"CMAKE_C_IMPLICIT_LINK_DIRECTORIES)${ColourReset}")
+      if(_lib_env)
+	message("${Yellow}${_lib_to_find} has not been found in"
+	  "${_lib_env}${ColourReset}")
+      endif()
+    endif()
+  endif()
+  message("${BoldYellow}Please indicate where to find ${_lib_to_find}. You have three options:\n"
+    "- Option 1: Provide the root directory of the library with cmake option: -D${LIBNAME}_DIR=your/path/to/${libname}/\n"
+    "- Option 2: Provide the directory where to find the library with cmake option: -D${LIBNAME}_LIBDIR=your/path/to/${libname}/lib/\n"
+    "- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n"
+    "- Option 4: If your library provides a PkgConfig file, make sure pkg-config finds your library${ColourReset}")
+
+endmacro()
+
+# This macro informs why the _lib_to_find file has not been found
+macro(Print_Find_Library_Blas_CheckFunc_Status _name)
+
+  # save _libname upper/lower case
+  string(TOUPPER ${_name} FUNCNAME)
+  string(TOLOWER ${_name} funcname)
+
+  # print status
+  #message(" ")
+  message("${Red}Libs have been found but check of symbol ${_name} failed "
+    "with following libraries ${ARGN}${ColourReset}")
+  message("${BoldRed}Please open your error file CMakeFiles/CMakeError.log"
+    "to figure out why it fails${ColourReset}")
+  #message(" ")
+
+endmacro()
+
+# This macro informs that _pc_to_find file has not been found in the list
+# path you give as last argument (read in ${ARGN})
+# ex: Print_Find_Pkgconfig_Status(foo foo.pc ${PATHLIST}
+macro(Print_Find_Pkgconfig_Status _libname _pc_to_find)
+
+  # save _libname lower case
+  string(TOLOWER ${_libname} libname)
+
+  # print status
+  #message(" ")
+  message("${Magenta}${_pc_to_find} has not been found in"
+    "${ARGN}${ColourReset}")
+  message("${BoldMagenta}If you really want to use the pkg-config file of"
+    "${libname}, please update your PKG_CONFIG_PATH with the path"
+    "where ${_pc_to_find} states${ColourReset}")
+  #message(" ")
+
+endmacro()
diff --git a/bfps/cpp/Lagrange_polys.cpp b/cpp/Lagrange_polys.cpp
similarity index 100%
rename from bfps/cpp/Lagrange_polys.cpp
rename to cpp/Lagrange_polys.cpp
diff --git a/bfps/cpp/Lagrange_polys.hpp b/cpp/Lagrange_polys.hpp
similarity index 100%
rename from bfps/cpp/Lagrange_polys.hpp
rename to cpp/Lagrange_polys.hpp
diff --git a/bfps/cpp/base.hpp b/cpp/base.hpp
similarity index 100%
rename from bfps/cpp/base.hpp
rename to cpp/base.hpp
diff --git a/bfps/cpp/bfps_timer.hpp b/cpp/bfps_timer.hpp
similarity index 100%
rename from bfps/cpp/bfps_timer.hpp
rename to cpp/bfps_timer.hpp
diff --git a/cpp/fftw_interface.hpp b/cpp/fftw_interface.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..0a840dd5ba3d864b36271515faa7cb81f3042c01
--- /dev/null
+++ b/cpp/fftw_interface.hpp
@@ -0,0 +1,779 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2015 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+#ifndef FFTW_INTERFACE_HPP
+#define FFTW_INTERFACE_HPP
+
+#include <fftw3-mpi.h>
+#include <map>
+#include <string>
+
+#ifdef USE_FFTWESTIMATE
+#define DEFAULT_FFTW_FLAG FFTW_ESTIMATE
+#warning You are using FFTW estimate
+#else
+#define DEFAULT_FFTW_FLAG FFTW_PATIENT
+#endif
+
+// To have multiple calls to c2r/r2c
+// you must define SPLIT_FFTW_MANY
+// by calling setup.py --split-fftw-many
+#ifdef SPLIT_FFTW_MANY
+#include <vector>
+#include <memory>
+#include <algorithm>
+#include <cassert>
+#include <cstring>
+#include <type_traits>
+
+// To mix unique ptr with allocation from fftw
+struct fftw_free_deleter{
+    template <typename T>
+    void operator()(T *p) const {
+        fftwf_free(const_cast<typename std::remove_const<T>::type*>(p));
+    }
+};
+
+#endif
+
+template <class realtype>
+class fftw_interface;
+
+template <>
+class fftw_interface<float>
+{
+public:
+    using real = float;
+    using complex = fftwf_complex;
+    using plan = fftwf_plan;
+    using iodim = fftwf_iodim;
+#ifdef SPLIT_FFTW_MANY
+    struct many_plan_container{
+        int rnk;
+        std::vector<ptrdiff_t> n;
+        int howmany;
+        ptrdiff_t iblock;
+        ptrdiff_t oblock;
+        std::unique_ptr<real[], fftw_free_deleter> buffer;
+        plan plan_to_use;
+
+        ptrdiff_t local_n0, local_0_start;
+        ptrdiff_t local_n1, local_1_start;
+
+        bool is_r2c;
+        void* in;
+        void* out;
+
+        ptrdiff_t nb_sections_real;
+        ptrdiff_t size_real_section;
+        ptrdiff_t nb_sections_complex;
+        ptrdiff_t size_complex_section;
+
+        ptrdiff_t sizeBuffer;
+    };
+
+    using many_plan = many_plan_container;
+#else
+    using many_plan = fftwf_plan;
+#endif
+
+    static complex* alloc_complex(const size_t in_size){
+        return fftwf_alloc_complex(in_size);
+    }
+
+    static real* alloc_real(const size_t in_size){
+        return fftwf_alloc_real(in_size);
+    }
+
+    static void free(void* ptr){
+        fftwf_free(ptr);
+    }
+
+    static void execute(plan in_plan){
+        fftwf_execute(in_plan);
+    }
+
+    static void destroy_plan(plan in_plan){
+        fftwf_destroy_plan(in_plan);
+    }
+
+    template <class ... Params>
+    static ptrdiff_t mpi_local_size_many(Params ... params){
+        return fftwf_mpi_local_size_many(params...);
+    }
+
+    template <class ... Params>
+    static plan mpi_plan_transpose(Params ... params){
+        return fftwf_mpi_plan_transpose(params...);
+    }
+
+    template <class ... Params>
+    static plan mpi_plan_many_transpose(Params ... params){
+        return fftwf_mpi_plan_many_transpose(params...);
+    }
+
+    template <class ... Params>
+    static plan plan_guru_r2r(Params ... params){
+        return fftwf_plan_guru_r2r(params...);
+    }
+
+    template <class ... Params>
+    static plan plan_guru_dft(Params ... params){
+        return fftwf_plan_guru_dft(params...);
+    }
+
+    template <class ... Params>
+    static ptrdiff_t mpi_local_size_transposed(Params ... params){
+        return fftwf_mpi_local_size_transposed(params...);
+    }
+
+#ifdef SPLIT_FFTW_MANY
+    static ptrdiff_t mpi_local_size_many_transposed(int rnk, const ptrdiff_t *n, ptrdiff_t howmany,
+                                                    ptrdiff_t block0, ptrdiff_t block1, MPI_Comm comm,
+                                                    ptrdiff_t *local_n0, ptrdiff_t *local_0_start,
+                                                    ptrdiff_t *local_n1, ptrdiff_t *local_1_start){
+        assert(block0 == FFTW_MPI_DEFAULT_BLOCK);
+        assert(block1 == FFTW_MPI_DEFAULT_BLOCK);
+        return howmany*mpi_local_size_transposed(rnk, n, comm,
+                                                   local_n0, local_0_start,
+                                                   local_n1, local_1_start);
+    }
+
+    static many_plan mpi_plan_many_dft_c2r(int rnk, const ptrdiff_t *n, ptrdiff_t howmany,
+                                                         ptrdiff_t iblock, ptrdiff_t oblock,
+                                                         complex *in, real *out,
+                                                         MPI_Comm comm, unsigned flags){
+        assert(iblock == FFTW_MPI_DEFAULT_BLOCK);
+        assert(oblock == FFTW_MPI_DEFAULT_BLOCK);
+
+        many_plan c2r_plan;
+        c2r_plan.rnk = rnk;
+        c2r_plan.n.insert(c2r_plan.n.end(), n, n+rnk);
+        c2r_plan.howmany = howmany;
+        c2r_plan.iblock = iblock;
+        c2r_plan.oblock = oblock;
+        c2r_plan.is_r2c = false;
+        c2r_plan.in = in;
+        c2r_plan.out = out;
+        c2r_plan.sizeBuffer = 0;
+
+        // If 1 then use default without copy
+        if(howmany == 1){
+            c2r_plan.plan_to_use = mpi_plan_dft_c2r(rnk, n,
+                                           (complex*)in,
+                                           out,
+                                           comm, flags);
+            return c2r_plan;
+        }
+
+        // We need to find out the size of the buffer to allocate
+        mpi_local_size_transposed(
+                rnk, n, comm,
+                &c2r_plan.local_n0, &c2r_plan.local_0_start,
+                &c2r_plan.local_n1, &c2r_plan.local_1_start);
+
+        ptrdiff_t sizeBuffer = c2r_plan.local_n0;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            sizeBuffer *= n[idxrnk];
+        }
+        sizeBuffer *= n[rnk-1]+2;
+
+        c2r_plan.buffer.reset(alloc_real(sizeBuffer));
+        memset(c2r_plan.buffer.get(), 0, sizeof(real)*sizeBuffer);
+        c2r_plan.sizeBuffer = sizeBuffer;
+        // Init the plan
+        c2r_plan.plan_to_use = mpi_plan_dft_c2r(rnk, n,
+                                         (complex*)c2r_plan.buffer.get(),
+                                         c2r_plan.buffer.get(),
+                                         comm, flags);
+
+        c2r_plan.nb_sections_real = c2r_plan.local_n0;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            c2r_plan.nb_sections_real *= n[idxrnk];
+            c2r_plan.nb_sections_complex *= n[idxrnk];
+        }
+        c2r_plan.size_real_section = (n[rnk-1] + 2);
+
+        c2r_plan.nb_sections_complex = c2r_plan.local_n1;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            if(idxrnk == 1){
+                c2r_plan.nb_sections_complex *= n[0];
+            }
+            else{
+                c2r_plan.nb_sections_complex *= n[idxrnk];
+            }
+        }
+        c2r_plan.size_complex_section = (n[rnk-1]/2 + 1);
+
+        return c2r_plan;
+    }
+
+    static many_plan mpi_plan_many_dft_r2c(int rnk, const ptrdiff_t *n, ptrdiff_t howmany,
+                                                         ptrdiff_t iblock, ptrdiff_t oblock,
+                                                         real *in, complex *out,
+                                                         MPI_Comm comm, unsigned flags){
+        assert(iblock == FFTW_MPI_DEFAULT_BLOCK);
+        assert(oblock == FFTW_MPI_DEFAULT_BLOCK);
+
+        many_plan r2c_plan;
+        r2c_plan.rnk = rnk;
+        r2c_plan.n.insert(r2c_plan.n.end(), n, n+rnk);
+        r2c_plan.howmany = howmany;
+        r2c_plan.iblock = iblock;
+        r2c_plan.oblock = oblock;
+        r2c_plan.is_r2c = true;
+        r2c_plan.in = in;
+        r2c_plan.out = out;
+        r2c_plan.sizeBuffer = 0;
+
+        // If 1 then use default without copy
+        if(howmany == 1){
+            r2c_plan.plan_to_use = mpi_plan_dft_r2c(rnk, n,
+                                           in,
+                                           (complex*)out,
+                                           comm, flags);
+            return r2c_plan;
+        }
+
+        // We need to find out the size of the buffer to allocate
+        mpi_local_size_transposed(
+                rnk, n, comm,
+                &r2c_plan.local_n0, &r2c_plan.local_0_start,
+                &r2c_plan.local_n1, &r2c_plan.local_1_start);
+
+        ptrdiff_t sizeBuffer = r2c_plan.local_n0;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            sizeBuffer *= n[idxrnk];
+        }
+        sizeBuffer *= n[rnk-1]+2;
+
+        r2c_plan.buffer.reset(alloc_real(sizeBuffer));
+        memset(r2c_plan.buffer.get(), 0, sizeof(real)*sizeBuffer);
+        r2c_plan.sizeBuffer = sizeBuffer;
+        // Init the plan
+        r2c_plan.plan_to_use = mpi_plan_dft_r2c(rnk, n,
+                                         r2c_plan.buffer.get(),
+                                         (complex*)r2c_plan.buffer.get(),
+                                         comm, flags);
+
+        r2c_plan.nb_sections_real = r2c_plan.local_n0;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            r2c_plan.nb_sections_real *= n[idxrnk];
+            r2c_plan.nb_sections_complex *= n[idxrnk];
+        }
+        r2c_plan.size_real_section = (n[rnk-1] + 2);
+
+        r2c_plan.nb_sections_complex = r2c_plan.local_n1;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            if(idxrnk == 1){
+                r2c_plan.nb_sections_complex *= n[0];
+            }
+            else{
+                r2c_plan.nb_sections_complex *= n[idxrnk];
+            }
+        }
+        r2c_plan.size_complex_section = (n[rnk-1]/2 + 1);
+
+        return r2c_plan;
+    }
+
+    static void execute(many_plan& in_plan){
+        if(in_plan.howmany == 1){
+            execute(in_plan.plan_to_use);
+            return;
+        }
+
+        std::unique_ptr<real[]> in_copy;
+        if(in_plan.is_r2c){
+            in_copy.reset(new real[in_plan.nb_sections_real * in_plan.size_real_section * in_plan.howmany]);
+
+            for(int idx_section = 0 ; idx_section < in_plan.nb_sections_real ; ++idx_section){
+                for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1] ; ++idx_copy){
+                    for(int idx_howmany = 0 ; idx_howmany < in_plan.howmany ; ++idx_howmany){
+                        in_copy[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_real_section*in_plan.howmany] =
+                                ((const real*)in_plan.in)[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_real_section*in_plan.howmany];
+                    }
+                }
+            }
+        }
+        else{
+            in_copy.reset((real*)new complex[in_plan.nb_sections_complex * in_plan.size_complex_section * in_plan.howmany]);
+
+            for(int idx_section = 0 ; idx_section < in_plan.nb_sections_complex ; ++idx_section){
+                for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1]/2+1 ; ++idx_copy){
+                    for(int idx_howmany = 0 ; idx_howmany < in_plan.howmany ; ++idx_howmany){
+                        ((complex*)in_copy.get())[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_complex_section*in_plan.howmany][0] =
+                                ((const complex*)in_plan.in)[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_complex_section*in_plan.howmany][0];
+                        ((complex*)in_copy.get())[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_complex_section*in_plan.howmany][1] =
+                                ((const complex*)in_plan.in)[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_complex_section*in_plan.howmany][1];
+                    }
+                }
+            }
+        }
+
+        for(int idx_howmany = 0 ; idx_howmany < in_plan.howmany ; ++idx_howmany){
+            // Copy to buffer
+            if(in_plan.is_r2c){
+                for(int idx_section = 0 ; idx_section < in_plan.nb_sections_real ; ++idx_section){
+                    real* dest = in_plan.buffer.get() + idx_section*in_plan.size_real_section;
+                    const real* src = in_copy.get()+idx_howmany + idx_section*in_plan.size_real_section*in_plan.howmany;
+
+                    for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1] ; ++idx_copy){
+                        dest[idx_copy] = src[idx_copy*in_plan.howmany];
+                    }
+                }
+            }
+            else{
+                for(int idx_section = 0 ; idx_section < in_plan.nb_sections_complex ; ++idx_section){
+                    complex* dest = ((complex*)in_plan.buffer.get()) + idx_section*in_plan.size_complex_section;
+                    const complex* src = ((const complex*)in_copy.get()) + idx_howmany + idx_section*in_plan.size_complex_section*in_plan.howmany;
+                    for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1]/2+1 ; ++idx_copy){
+                        dest[idx_copy][0] = src[idx_copy*in_plan.howmany][0];
+                        dest[idx_copy][1] = src[idx_copy*in_plan.howmany][1];
+                    }
+                }
+            }
+
+            execute(in_plan.plan_to_use);
+            // Copy result from buffer
+            if(in_plan.is_r2c){
+                for(int idx_section = 0 ; idx_section < in_plan.nb_sections_complex ; ++idx_section){
+                    complex* dest = ((complex*)in_plan.out) + idx_howmany + idx_section*in_plan.size_complex_section*in_plan.howmany;
+                    const complex* src = ((const complex*)in_plan.buffer.get()) + idx_section*in_plan.size_complex_section;
+                    for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1]/2+1 ; ++idx_copy){
+                        dest[idx_copy*in_plan.howmany][0] = src[idx_copy][0];
+                        dest[idx_copy*in_plan.howmany][1] = src[idx_copy][1];
+                    }
+                }
+            }
+            else{
+                for(int idx_section = 0 ; idx_section < in_plan.nb_sections_real ; ++idx_section){
+                    real* dest = ((real*)in_plan.out)+idx_howmany + idx_section*in_plan.size_real_section*in_plan.howmany;
+                    const real* src = in_plan.buffer.get() + idx_section*in_plan.size_real_section;
+
+                    for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1] ; ++idx_copy){
+                        dest[idx_copy*in_plan.howmany] = src[idx_copy];
+                    }
+                }
+            }
+        }
+    }
+
+    static void destroy_plan(many_plan& in_plan){
+        destroy_plan(in_plan.plan_to_use);
+    }
+#else
+
+    template <class ... Params>
+    static ptrdiff_t mpi_local_size_many_transposed(Params ... params){
+        return fftwf_mpi_local_size_many_transposed(params...);
+    }
+
+    template <class ... Params>
+    static plan mpi_plan_many_dft_c2r(Params ... params){
+        return fftwf_mpi_plan_many_dft_c2r(params...);
+    }
+
+    template <class ... Params>
+    static plan mpi_plan_many_dft_r2c(Params ... params){
+        return fftwf_mpi_plan_many_dft_r2c(params...);
+    }
+#endif
+
+    template <class ... Params>
+    static plan mpi_plan_dft_c2r(Params ... params){
+        return fftwf_mpi_plan_dft_c2r(params...);
+    }
+
+    template <class ... Params>
+    static plan mpi_plan_dft_r2c(Params ... params){
+        return fftwf_mpi_plan_dft_r2c(params...);
+    }
+
+    template <class ... Params>
+    static plan mpi_plan_dft_c2r_3d(Params ... params){
+        return fftwf_mpi_plan_dft_c2r_3d(params...);
+    }
+};
+
+template <>
+class fftw_interface<double>
+{
+public:
+    using real = double;
+    using complex = fftw_complex;
+    using plan = fftw_plan;
+    using iodim = fftw_iodim;
+#ifdef SPLIT_FFTW_MANY
+    struct many_plan_container{
+        int rnk;
+        std::vector<ptrdiff_t> n;
+        int howmany;
+        ptrdiff_t iblock;
+        ptrdiff_t oblock;
+        std::unique_ptr<real[], fftw_free_deleter> buffer;
+        plan plan_to_use;
+
+        ptrdiff_t local_n0, local_0_start;
+        ptrdiff_t local_n1, local_1_start;
+
+        bool is_r2c;
+        void* in;
+        void* out;
+
+        ptrdiff_t nb_sections_real;
+        ptrdiff_t size_real_section;
+        ptrdiff_t nb_sections_complex;
+        ptrdiff_t size_complex_section;
+
+        ptrdiff_t sizeBuffer;
+    };
+
+    using many_plan = many_plan_container;
+#else
+    using many_plan = fftw_plan;
+#endif
+
+    static complex* alloc_complex(const size_t in_size){
+        return fftw_alloc_complex(in_size);
+    }
+
+    static real* alloc_real(const size_t in_size){
+        return fftw_alloc_real(in_size);
+    }
+
+    static void free(void* ptr){
+        fftw_free(ptr);
+    }
+
+    static void execute(plan in_plan){
+        fftw_execute(in_plan);
+    }
+
+    static void destroy_plan(plan in_plan){
+        fftw_destroy_plan(in_plan);
+    }
+
+    template <class ... Params>
+    static ptrdiff_t mpi_local_size_many(Params ... params){
+        return fftw_mpi_local_size_many(params...);
+    }
+
+    template <class ... Params>
+    static plan mpi_plan_transpose(Params ... params){
+        return fftw_mpi_plan_transpose(params...);
+    }
+
+    template <class ... Params>
+    static plan mpi_plan_many_transpose(Params ... params){
+        return fftw_mpi_plan_many_transpose(params...);
+    }
+
+    template <class ... Params>
+    static plan plan_guru_r2r(Params ... params){
+        return fftw_plan_guru_r2r(params...);
+    }
+
+    template <class ... Params>
+    static plan plan_guru_dft(Params ... params){
+        return fftw_plan_guru_dft(params...);
+    }
+
+    template <class ... Params>
+    static ptrdiff_t mpi_local_size_transposed(Params ... params){
+        return fftw_mpi_local_size_transposed(params...);
+    }
+
+#ifdef SPLIT_FFTW_MANY
+    static ptrdiff_t mpi_local_size_many_transposed(int rnk, const ptrdiff_t *n, ptrdiff_t howmany,
+                                                    ptrdiff_t block0, ptrdiff_t block1, MPI_Comm comm,
+                                                    ptrdiff_t *local_n0, ptrdiff_t *local_0_start,
+                                                    ptrdiff_t *local_n1, ptrdiff_t *local_1_start){
+        assert(block0 == FFTW_MPI_DEFAULT_BLOCK);
+        assert(block1 == FFTW_MPI_DEFAULT_BLOCK);
+        return howmany*mpi_local_size_transposed(rnk, n, comm,
+                                                           local_n0, local_0_start,
+                                                           local_n1, local_1_start);
+    }
+
+    static many_plan mpi_plan_many_dft_c2r(int rnk, const ptrdiff_t *n, ptrdiff_t howmany,
+                                                         ptrdiff_t iblock, ptrdiff_t oblock,
+                                                         complex *in, real *out,
+                                                         MPI_Comm comm, unsigned flags){
+        assert(iblock == FFTW_MPI_DEFAULT_BLOCK);
+        assert(oblock == FFTW_MPI_DEFAULT_BLOCK);
+
+        many_plan c2r_plan;
+        c2r_plan.rnk = rnk;
+        c2r_plan.n.insert(c2r_plan.n.end(), n, n+rnk);
+        c2r_plan.howmany = howmany;
+        c2r_plan.iblock = iblock;
+        c2r_plan.oblock = oblock;
+        c2r_plan.is_r2c = false;
+        c2r_plan.in = in;
+        c2r_plan.out = out;
+        c2r_plan.sizeBuffer = 0;
+
+        // If 1 then use default without copy
+        if(howmany == 1){
+            c2r_plan.plan_to_use = mpi_plan_dft_c2r(rnk, n,
+                                           (complex*)in,
+                                           out,
+                                           comm, flags);
+            return c2r_plan;
+        }
+
+        // We need to find out the size of the buffer to allocate
+        mpi_local_size_transposed(
+                rnk, n, comm,
+                &c2r_plan.local_n0, &c2r_plan.local_0_start,
+                &c2r_plan.local_n1, &c2r_plan.local_1_start);
+
+        ptrdiff_t sizeBuffer = c2r_plan.local_n0;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            sizeBuffer *= n[idxrnk];
+        }
+        sizeBuffer *= n[rnk-1]+2;
+
+        c2r_plan.buffer.reset(alloc_real(sizeBuffer));
+        memset(c2r_plan.buffer.get(), 0, sizeof(real)*sizeBuffer);
+        c2r_plan.sizeBuffer = sizeBuffer;
+        // Init the plan
+        c2r_plan.plan_to_use = mpi_plan_dft_c2r(rnk, n,
+                                         (complex*)c2r_plan.buffer.get(),
+                                         c2r_plan.buffer.get(),
+                                         comm, flags);
+
+        c2r_plan.nb_sections_real = c2r_plan.local_n0;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            c2r_plan.nb_sections_real *= n[idxrnk];
+            c2r_plan.nb_sections_complex *= n[idxrnk];
+        }
+        c2r_plan.size_real_section = (n[rnk-1] + 2);
+
+        c2r_plan.nb_sections_complex = c2r_plan.local_n1;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            if(idxrnk == 1){
+                c2r_plan.nb_sections_complex *= n[0];
+            }
+            else{
+                c2r_plan.nb_sections_complex *= n[idxrnk];
+            }
+        }
+        c2r_plan.size_complex_section = (n[rnk-1]/2 + 1);
+
+        return c2r_plan;
+    }
+
+    static many_plan mpi_plan_many_dft_r2c(int rnk, const ptrdiff_t *n, ptrdiff_t howmany,
+                                                         ptrdiff_t iblock, ptrdiff_t oblock,
+                                                         real *in, complex *out,
+                                                         MPI_Comm comm, unsigned flags){
+        assert(iblock == FFTW_MPI_DEFAULT_BLOCK);
+        assert(oblock == FFTW_MPI_DEFAULT_BLOCK);
+
+        many_plan r2c_plan;
+        r2c_plan.rnk = rnk;
+        r2c_plan.n.insert(r2c_plan.n.end(), n, n+rnk);
+        r2c_plan.howmany = howmany;
+        r2c_plan.iblock = iblock;
+        r2c_plan.oblock = oblock;
+        r2c_plan.is_r2c = true;
+        r2c_plan.in = in;
+        r2c_plan.out = out;
+        r2c_plan.sizeBuffer = 0;
+
+        // If 1 then use default without copy
+        if(howmany == 1){
+            r2c_plan.plan_to_use = mpi_plan_dft_r2c(rnk, n,
+                                           in,
+                                           (complex*)out,
+                                           comm, flags);
+            return r2c_plan;
+        }
+
+        // We need to find out the size of the buffer to allocate
+        mpi_local_size_transposed(
+                rnk, n, comm,
+                &r2c_plan.local_n0, &r2c_plan.local_0_start,
+                &r2c_plan.local_n1, &r2c_plan.local_1_start);
+
+        ptrdiff_t sizeBuffer = r2c_plan.local_n0;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            sizeBuffer *= n[idxrnk];
+        }
+        sizeBuffer *= n[rnk-1]+2;
+
+        r2c_plan.buffer.reset(alloc_real(sizeBuffer));
+        memset(r2c_plan.buffer.get(), 0, sizeof(real)*sizeBuffer);
+        r2c_plan.sizeBuffer = sizeBuffer;
+        // Init the plan
+        r2c_plan.plan_to_use = mpi_plan_dft_r2c(rnk, n,
+                                         r2c_plan.buffer.get(),
+                                         (complex*)r2c_plan.buffer.get(),
+                                         comm, flags);
+
+        r2c_plan.nb_sections_real = r2c_plan.local_n0;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            r2c_plan.nb_sections_real *= n[idxrnk];
+            r2c_plan.nb_sections_complex *= n[idxrnk];
+        }
+        r2c_plan.size_real_section = (n[rnk-1] + 2);
+
+        r2c_plan.nb_sections_complex = r2c_plan.local_n1;
+        for(int idxrnk = 1 ; idxrnk < rnk-1 ; ++idxrnk){
+            if(idxrnk == 1){
+                r2c_plan.nb_sections_complex *= n[0];
+            }
+            else{
+                r2c_plan.nb_sections_complex *= n[idxrnk];
+            }
+        }
+        r2c_plan.size_complex_section = (n[rnk-1]/2 + 1);
+
+        return r2c_plan;
+    }
+
+    static void execute(many_plan& in_plan){
+        if(in_plan.howmany == 1){
+            execute(in_plan.plan_to_use);
+            return;
+        }
+
+        std::unique_ptr<real[]> in_copy;
+        if(in_plan.is_r2c){
+            in_copy.reset(new real[in_plan.nb_sections_real * in_plan.size_real_section * in_plan.howmany]);
+
+            for(int idx_section = 0 ; idx_section < in_plan.nb_sections_real ; ++idx_section){
+                for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1] ; ++idx_copy){
+                    for(int idx_howmany = 0 ; idx_howmany < in_plan.howmany ; ++idx_howmany){
+                        in_copy[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_real_section*in_plan.howmany] =
+                                ((const real*)in_plan.in)[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_real_section*in_plan.howmany];
+                    }
+                }
+            }
+        }
+        else{
+            in_copy.reset((real*)new complex[in_plan.nb_sections_complex * in_plan.size_complex_section * in_plan.howmany]);
+
+            for(int idx_section = 0 ; idx_section < in_plan.nb_sections_complex ; ++idx_section){
+                for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1]/2+1 ; ++idx_copy){
+                    for(int idx_howmany = 0 ; idx_howmany < in_plan.howmany ; ++idx_howmany){
+                        ((complex*)in_copy.get())[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_complex_section*in_plan.howmany][0] =
+                                ((const complex*)in_plan.in)[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_complex_section*in_plan.howmany][0];
+                        ((complex*)in_copy.get())[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_complex_section*in_plan.howmany][1] =
+                                ((const complex*)in_plan.in)[idx_howmany + idx_copy*in_plan.howmany + idx_section*in_plan.size_complex_section*in_plan.howmany][1];
+                    }
+                }
+            }
+        }
+
+        for(int idx_howmany = 0 ; idx_howmany < in_plan.howmany ; ++idx_howmany){
+            // Copy to buffer
+            if(in_plan.is_r2c){
+                for(int idx_section = 0 ; idx_section < in_plan.nb_sections_real ; ++idx_section){
+                    real* dest = in_plan.buffer.get() + idx_section*in_plan.size_real_section;
+                    const real* src = in_copy.get()+idx_howmany + idx_section*in_plan.size_real_section*in_plan.howmany;
+
+                    for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1] ; ++idx_copy){
+                        dest[idx_copy] = src[idx_copy*in_plan.howmany];
+                    }
+                }
+            }
+            else{
+                for(int idx_section = 0 ; idx_section < in_plan.nb_sections_complex ; ++idx_section){
+                    complex* dest = ((complex*)in_plan.buffer.get()) + idx_section*in_plan.size_complex_section;
+                    const complex* src = ((const complex*)in_copy.get()) + idx_howmany + idx_section*in_plan.size_complex_section*in_plan.howmany;
+                    for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1]/2+1 ; ++idx_copy){
+                        dest[idx_copy][0] = src[idx_copy*in_plan.howmany][0];
+                        dest[idx_copy][1] = src[idx_copy*in_plan.howmany][1];
+                    }
+                }
+            }
+
+            execute(in_plan.plan_to_use);
+            // Copy result from buffer
+            if(in_plan.is_r2c){
+                for(int idx_section = 0 ; idx_section < in_plan.nb_sections_complex ; ++idx_section){
+                    complex* dest = ((complex*)in_plan.out) + idx_howmany + idx_section*in_plan.size_complex_section*in_plan.howmany;
+                    const complex* src = ((const complex*)in_plan.buffer.get()) + idx_section*in_plan.size_complex_section;
+                    for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1]/2+1 ; ++idx_copy){
+                        dest[idx_copy*in_plan.howmany][0] = src[idx_copy][0];
+                        dest[idx_copy*in_plan.howmany][1] = src[idx_copy][1];
+                    }
+                }
+            }
+            else{
+                for(int idx_section = 0 ; idx_section < in_plan.nb_sections_real ; ++idx_section){
+                    real* dest = ((real*)in_plan.out)+idx_howmany + idx_section*in_plan.size_real_section*in_plan.howmany;
+                    const real* src = in_plan.buffer.get() + idx_section*in_plan.size_real_section;
+
+                    for(ptrdiff_t idx_copy = 0 ; idx_copy < in_plan.n[in_plan.rnk-1] ; ++idx_copy){
+                        dest[idx_copy*in_plan.howmany] = src[idx_copy];
+                    }
+                }
+            }
+        }
+    }
+
+    static void destroy_plan(many_plan& in_plan){
+        destroy_plan(in_plan.plan_to_use);
+    }
+#else    
+    template <class ... Params>
+    static ptrdiff_t mpi_local_size_many_transposed(Params ... params){
+        return fftw_mpi_local_size_many_transposed(params...);
+    }
+
+    template <class ... Params>
+    static plan mpi_plan_many_dft_c2r(Params ... params){
+        return fftw_mpi_plan_many_dft_c2r(params...);
+    }
+
+    template <class ... Params>
+    static plan mpi_plan_many_dft_r2c(Params ... params){
+        return fftw_mpi_plan_many_dft_r2c(params...);
+    }
+#endif
+
+    template <class ... Params>
+    static plan mpi_plan_dft_c2r(Params ... params){
+        return fftw_mpi_plan_dft_c2r(params...);
+    }
+
+    template <class ... Params>
+    static plan mpi_plan_dft_r2c(Params ... params){
+        return fftw_mpi_plan_dft_r2c(params...);
+    }
+
+    template <class ... Params>
+    static plan mpi_plan_dft_c2r_3d(Params ... params){
+        return fftw_mpi_plan_dft_c2r_3d(params...);
+    }
+};
+
+
+
+#endif // FFTW_INTERFACE_HPP
+
diff --git a/bfps/cpp/fftw_tools.hpp b/cpp/fftw_tools.cpp
similarity index 58%
rename from bfps/cpp/fftw_tools.hpp
rename to cpp/fftw_tools.cpp
index d0f3dbf30df3ee95f3d7934f0dd7fca633858b44..55794b41ebf8ebfa03977d5a79704aa38b39af52 100644
--- a/bfps/cpp/fftw_tools.hpp
+++ b/cpp/fftw_tools.cpp
@@ -22,49 +22,19 @@
 *                                                                     *
 **********************************************************************/
 
-
-
-#include <mpi.h>
-#include <fftw3-mpi.h>
-#include "field_descriptor.hpp"
-
-#ifndef FFTW_TOOLS
-
-#define FFTW_TOOLS
-
-extern int myrank, nprocs;
-
-/* given two arrays of the same dimension, we do a simple resize in
- * Fourier space: either chop off high modes, or pad with zeros.
- * the arrays are assumed to use 3D mpi fftw layout.
- * */
-template <class rnumber>
-int copy_complex_array(
-        field_descriptor<rnumber> *fi,
-        rnumber (*ai)[2],
-        field_descriptor<rnumber> *fo,
-        rnumber (*ao)[2],
-        int howmany=1);
-
-template <class rnumber>
-int clip_zero_padding(
-        field_descriptor<rnumber> *f,
-        rnumber *a,
-        int howmany=1);
-
-/* function to get pair of descriptors for real and Fourier space
- * arrays used with fftw.
- * the n0, n1, n2 correspond to the real space data WITHOUT the zero
- * padding that FFTW needs.
- * IMPORTANT: the real space array must be allocated with
- * 2*fc->local_size, and then the zeros cleaned up before trying
- * to write data.
- * */
-template <class rnumber>
-int get_descriptors_3D(
-        int n0, int n1, int n2,
-        field_descriptor<rnumber> **fr,
-        field_descriptor<rnumber> **fc);
-
-#endif//FFTW_TOOLS
+#include <stdlib.h>
+#include <algorithm>
+#include <iostream>
+#include "base.hpp"
+#include "fftw_tools.hpp"
+#include "fftw_interface.hpp"
+
+#define NDEBUG
+
+std::map<std::string, unsigned> fftw_planner_string_to_flag = {
+    {"FFTW_ESTIMATE", FFTW_ESTIMATE},
+    {"FFTW_MEASURE", FFTW_MEASURE},
+    {"FFTW_PATIENT", FFTW_PATIENT},
+    {"parameter does not exist", DEFAULT_FFTW_FLAG},
+};
 
diff --git a/bfps/cpp/tracers.hpp b/cpp/fftw_tools.hpp
similarity index 66%
rename from bfps/cpp/tracers.hpp
rename to cpp/fftw_tools.hpp
index 1a063e026578dd71b9a223ee46b55d2c86d4399f..b41cd2a453c2c0aa34f56febb17f2a650a2a9685 100644
--- a/bfps/cpp/tracers.hpp
+++ b/cpp/fftw_tools.hpp
@@ -24,40 +24,17 @@
 
 
 
-#include "slab_field_particles.hpp"
+#include <mpi.h>
+#include <fftw3-mpi.h>
+#include <map>
 
-#ifndef TRACERS
+#ifndef FFTW_TOOLS
 
-#define TRACERS
+#define FFTW_TOOLS
 
 extern int myrank, nprocs;
 
-template <class rnumber>
-class tracers final:public slab_field_particles<rnumber>
-{
-    public:
-        rnumber *source_data;
-        rnumber *data;
-
-        /* methods */
-        tracers(
-                const char *NAME,
-                fluid_solver_base<rnumber> *FSOLVER,
-                const int NPARTICLES,
-                base_polynomial_values BETA_POLYS,
-                const int NEIGHBOURS,
-                const int TRAJ_SKIP,
-                const int INTEGRATION_STEPS,
-                rnumber *SOURCE_DATA);
-        ~tracers();
-
-        void update_field(bool clip_on = true);
-        virtual void get_rhs(double *x, double *rhs);
-        virtual void jump_estimate(double *jump_length);
-
-        void sample_vec_field(rnumber *vec_field, double *vec_values);
-};
-
-
-#endif//TRACERS
+extern std::map<std::string, unsigned> fftw_planner_string_to_flag;
+
+#endif//FFTW_TOOLS
 
diff --git a/bfps/cpp/field.cpp b/cpp/field.cpp
similarity index 63%
rename from bfps/cpp/field.cpp
rename to cpp/field.cpp
index 197ccb5da26dabf9f35d84bdc627a31f20ee49ad..04eaa008e0c37b37b382335b6069425e1ce5d731 100644
--- a/bfps/cpp/field.cpp
+++ b/cpp/field.cpp
@@ -23,6 +23,9 @@
 **********************************************************************/
 
 
+
+#define NDEBUG
+
 #include <sys/stat.h>
 #include <cmath>
 #include <cstdlib>
@@ -73,11 +76,11 @@ field<rnumber, be, fc>::field(
             nfftw[0] = nz;
             nfftw[1] = ny;
             nfftw[2] = nx;
-            //ptrdiff_t tmp_local_size;
+            hsize_t tmp_local_size;
             ptrdiff_t local_n0, local_0_start;
             ptrdiff_t local_n1, local_1_start;
-            //tmp_local_size = fftw_mpi_local_size_many_transposed(
-            fftw_mpi_local_size_many_transposed(
+            variable_used_only_in_assert(tmp_local_size);
+            tmp_local_size = fftw_interface<rnumber>::mpi_local_size_many_transposed(
                     3, nfftw, ncomp(fc),
                     FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, this->comm,
                     &local_n0, &local_0_start,
@@ -88,6 +91,7 @@ field<rnumber, be, fc>::field(
             starts[0] = local_0_start; starts[1] = 0; starts[2] = 0;
             this->rlayout = new field_layout<fc>(
                     sizes, subsizes, starts, this->comm);
+            assert(tmp_local_size == this->rlayout->local_size);
             this->npoints = this->rlayout->full_size / ncomp(fc);
             sizes[0] = nz; sizes[1] = ny; sizes[2] = nx+2;
             subsizes[0] = local_n0; subsizes[1] = ny; subsizes[2] = nx+2;
@@ -224,6 +228,7 @@ int field<rnumber, be, fc>::io(
                 H5Tequal(dset_type, H5T_IEEE_F64LE) ||
                 H5Tequal(dset_type, H5T_INTEL_F64) ||
                 H5Tequal(dset_type, H5T_NATIVE_DOUBLE));
+        variable_used_only_in_assert(io_for_real);
         H5Tclose(dset_type);
         assert(this->real_space_representation == io_for_real);
     }
@@ -304,6 +309,7 @@ int field<rnumber, be, fc>::io(
 
     /* check file space */
     int ndims_fspace = H5Sget_simple_extent_dims(fspace, dims, NULL);
+    variable_used_only_in_assert(ndims_fspace);
     assert(((unsigned int)(ndims_fspace)) == ndim(fc));
     if (this->real_space_representation)
     {
@@ -414,6 +420,7 @@ int field<rnumber, be, fc>::io_database(
                 H5Tequal(dset_type, H5T_IEEE_F64LE) ||
                 H5Tequal(dset_type, H5T_INTEL_F64) ||
                 H5Tequal(dset_type, H5T_NATIVE_DOUBLE));
+        variable_used_only_in_assert(io_for_real);
         H5Tclose(dset_type);
         assert(this->real_space_representation == io_for_real);
     }
@@ -490,6 +497,7 @@ int field<rnumber, be, fc>::io_database(
 
     /* check file space */
     int ndims_fspace = H5Sget_simple_extent_dims(fspace, dims, NULL);
+    variable_used_only_in_assert(ndims_fspace);
     assert(ndims_fspace == int(ndim(fc) + 1));
     offset[0] = toffset;
     if (this->real_space_representation)
@@ -576,7 +584,7 @@ int field<rnumber, be, fc>::write_0slice(
         count[1] = this->rmemlayout->sizes[1];
         count[2] = this->rmemlayout->sizes[2];
         count[3] = 3;
-        count[3] = 3;
+        count[4] = 3;
         mspace = H5Screate_simple(ndims, count, NULL);
         // array in file should not have the extra 2 points
         count[1] = this->rlayout->sizes[1];
@@ -612,6 +620,198 @@ int field<rnumber, be, fc>::write_0slice(
     return EXIT_SUCCESS;
 }
 
+template <typename rnumber,
+          field_backend be,
+          field_components fc>
+int field<rnumber, be, fc>::write_filtered(
+        const std::string fname,
+        const std::string field_name,
+        const int iteration,
+        int nx,
+        int ny,
+        int nz)
+{
+    /* file dataset has same dimensions as field */
+    TIMEZONE("field::write_filtered");
+    // only works in Fourier representation
+    assert(!this->real_space_representation);
+    assert(hsize_t(nx) <= this->rlayout->sizes[2]);
+    assert(hsize_t(ny) <= this->rlayout->sizes[1]);
+    assert(hsize_t(nz) <= this->rlayout->sizes[0]);
+    // current algorithm only works for more than one process
+    assert(this->nprocs >= 2);
+    hid_t file_id, dset_id, plist_id;
+    dset_id = H5I_BADID;
+    std::string dset_name = (
+            "/" + field_name +
+            "/complex" +
+            "/" + std::to_string(iteration));
+
+    /* open/create file */
+    plist_id = H5Pcreate(H5P_FILE_ACCESS);
+    H5Pset_fapl_mpio(plist_id, this->comm, MPI_INFO_NULL);
+    bool file_exists = false;
+    struct stat file_buffer;
+    file_exists = (stat(fname.c_str(), &file_buffer) == 0);
+    if (file_exists)
+        file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id);
+    else
+        file_id = H5Fcreate(fname.c_str(), H5F_ACC_EXCL, H5P_DEFAULT, plist_id);
+    assert(file_id >= 0);
+    H5Pclose(plist_id);
+
+    /* generic space initialization */
+    hid_t fspace, mspace;
+    hsize_t count[ndim(fc)], offset[ndim(fc)], dims[ndim(fc)], fdims[ndim(fc)];
+    hsize_t memoffset[ndim(fc)], memshape[ndim(fc)];
+
+    // set up dimensions
+    for (unsigned int i=3; i<ndim(fc); i++)
+    {
+        count [i] = this->clayout->subsizes[i];
+        offset[i] = this->clayout->starts[i];
+        dims  [i] = this->clayout->sizes[i];
+        memshape [i] = count[i];
+        memoffset[i] = 0;
+    }
+    // these are dimensions of dataset, needed
+    // to create dataset
+    //dims[0] = nz;
+    dims[0] = ny;
+    dims[1] = nz;
+    dims[2] = nx/2+1;
+
+    /* open/create data set */
+    if (!H5Lexists(file_id, field_name.c_str(), H5P_DEFAULT))
+    {
+        hid_t gid_tmp = H5Gcreate(
+                file_id, field_name.c_str(),
+                H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+        H5Gclose(gid_tmp);
+    }
+    if (!H5Lexists(file_id, (field_name + "/complex").c_str(), H5P_DEFAULT))
+    {
+        hid_t gid_tmp = H5Gcreate(
+                file_id, ("/" + field_name + "/complex").c_str(),
+                H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+        H5Gclose(gid_tmp);
+    }
+    if (H5Lexists(file_id, dset_name.c_str(), H5P_DEFAULT))
+    {
+        dset_id = H5Dopen(file_id, dset_name.c_str(), H5P_DEFAULT);
+        fspace = H5Dget_space(dset_id);
+    }
+    else
+    {
+        fspace = H5Screate_simple(
+                ndim(fc),
+                dims,
+                NULL);
+        /* chunking needs to go in here */
+        dset_id = H5Dcreate(
+                file_id,
+                dset_name.c_str(),
+                this->cnumber_H5T,
+                fspace,
+                H5P_DEFAULT,
+                H5P_DEFAULT,
+                H5P_DEFAULT);
+    }
+    /* check file space */
+    int ndims_fspace = H5Sget_simple_extent_dims(fspace, fdims, NULL);
+    variable_used_only_in_assert(ndims_fspace);
+    assert(((unsigned int)(ndims_fspace)) == ndim(fc));
+    for (unsigned int i=0; i<ndim(fc); i++)
+    {
+        assert(dims[i] == fdims[i]);
+    }
+    /* both dset_id and fspace now have sane values */
+
+    /// set up counts and offsets
+    /// x is easy, since only positive modes are present
+    count [2] = nx/2+1;
+    offset[2] = 0;
+    memshape [2] = this->clayout->subsizes[2];
+    memoffset[2] = 0;
+
+    /// three options for y:
+    /// this->starts[0] <= ny/2
+    /// ny / 2 < this->starts[0] +this->clayout->subsizes[0] < this->sizes[0] - ny/2
+    /// this->starts[0] >= this->sizes[0] - ny/2
+    /// we don't care about saving the ny/2 mode, because of symmetry
+    hsize_t y0 = this->clayout->starts[0];
+    hsize_t y1 = this->clayout->starts[0] + this->clayout->subsizes[0];
+    memshape[0] = this->clayout->subsizes[0];
+    if (y1 <= hsize_t(ny/2))
+    {
+        count[0] = this->clayout->subsizes[0];
+        offset[0] = y0;
+        memoffset[0] = 0;
+    }
+    else
+    {
+        if (y0 < hsize_t(ny)/2)
+        {
+            count[0] = ny/2 - y0;
+            offset[0] = y0;
+            memoffset[0] = 0;
+        }
+        else
+        {
+            if (y1 <= hsize_t(this->clayout->sizes[0] - ny/2 + 1))
+            { // y0 < y1 therefore y0 <= this->clayout->sizes[0] - ny/2
+                count[0] = 0;
+                offset[0] = ny/2;
+                memoffset[0] = 0;
+            }
+            else
+            {
+                if (y0 <= hsize_t(this->clayout->sizes[0] - ny/2))
+                {
+                    count[0] = y1 - (this->clayout->sizes[0] - ny/2);
+                    offset[0] = ny - (this->clayout->sizes[0] - y0);
+                    memoffset[0] = this->clayout->subsizes[0] - count[0];
+                }
+                else
+                {
+                    count[0] = this->clayout->subsizes[0];
+                    offset[0] = ny - (this->clayout->sizes[0] - y0);
+                    memoffset[0] = 0;
+                }
+            }
+        }
+    }
+    DEBUG_MSG("count[0] = %ld, offset[0] = %ld\n",
+            count[0], offset[0]);
+    /// for z, we need to take into account that there are
+    /// both positive and negative modes
+    for (int cz = 0; cz < 2; cz++)
+    {
+        count [1] = nz/2;
+        offset[1] = cz*nz/2;
+        memshape [1] = this->clayout->sizes[1];
+        memoffset[1] = cz*(this->clayout->sizes[1] - nz/2);
+        DEBUG_MSG("cz = %d, count[1] + offset[1] = %ld\n",
+                cz, count[1] + offset[1]);
+
+        //now write data
+        mspace = H5Screate_simple(ndim(fc), memshape, NULL);
+        H5Sselect_hyperslab(mspace, H5S_SELECT_SET, memoffset, NULL, count, NULL);
+        H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, count, NULL);
+        H5Dwrite(dset_id, this->cnumber_H5T, mspace, fspace, H5P_DEFAULT, this->data);
+        H5Sclose(mspace);
+    }
+
+
+    /* close file data space */
+    H5Sclose(fspace);
+    /* close data set */
+    H5Dclose(dset_id);
+    /* close file */
+    H5Fclose(file_id);
+    return EXIT_SUCCESS;
+}
+
 
 template <typename rnumber,
           field_backend be,
@@ -869,25 +1069,138 @@ void field<rnumber, be, fc>::compute_rspace_stats(
         H5Sclose(wspace);
         H5Sclose(mspace);
         H5Dclose(dset);
-        if (H5Lexists(
-                    group,
-                    "0slices",
-                    H5P_DEFAULT))
-        {
-            if (H5Lexists(
-                        group,
-                        (std::string("0slices/") + dset_name).c_str(),
-                        H5P_DEFAULT))
-            this->write_0slice(
-                    group,
-                    dset_name,
-                    toffset);
-        }
     }
     delete[] moments;
     delete[] hist;
 }
 
+
+
+template <typename rnumber,
+          field_backend be,
+          field_components fc>
+void field<rnumber, be, fc>::compute_rspace_zaverage(
+                const hid_t group,
+                const std::string dset_name,
+                const hsize_t toffset)
+{
+    TIMEZONE("field::compute_rspace_zaverage");
+    assert(this->real_space_representation);
+    const hsize_t slice_size = this->rlayout->local_size / this->rlayout->subsizes[0];
+
+    // initial arrays MUST be 0, because I'm just adding to them afterwards.
+    shared_array<double> local_zaverage_threaded(
+            slice_size, [&](double* local_zaverage){
+        std::fill_n(local_zaverage, slice_size, 0);
+    });
+
+    // sum along z direction
+    {
+        TIMEZONE("field::RLOOP");
+        this->RLOOP(
+                [&](ptrdiff_t rindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex){
+
+            double *local_zaverage = local_zaverage_threaded.getMine();
+            ptrdiff_t zaverage_index = (yindex*this->rlayout->subsizes[2]+xindex)*ncomp(fc);
+
+            switch(fc)
+            {
+                case ONE:
+                    local_zaverage[zaverage_index] += this->rval(rindex);
+                    break;
+                case THREE:
+                    local_zaverage[zaverage_index+0] += this->rval(rindex, 0);
+                    local_zaverage[zaverage_index+1] += this->rval(rindex, 1);
+                    local_zaverage[zaverage_index+2] += this->rval(rindex, 2);
+                    break;
+                case THREExTHREE:
+                    local_zaverage[zaverage_index+0 + 0] += this->rval(rindex, 0, 0);
+                    local_zaverage[zaverage_index+0 + 1] += this->rval(rindex, 0, 1);
+                    local_zaverage[zaverage_index+0 + 2] += this->rval(rindex, 0, 2);
+                    local_zaverage[zaverage_index+3 + 0] += this->rval(rindex, 1, 0);
+                    local_zaverage[zaverage_index+3 + 1] += this->rval(rindex, 1, 1);
+                    local_zaverage[zaverage_index+3 + 2] += this->rval(rindex, 1, 2);
+                    local_zaverage[zaverage_index+6 + 0] += this->rval(rindex, 2, 0);
+                    local_zaverage[zaverage_index+6 + 1] += this->rval(rindex, 2, 1);
+                    local_zaverage[zaverage_index+6 + 2] += this->rval(rindex, 2, 2);
+                    break;
+            }
+                });
+
+        TIMEZONE("FIELD_RLOOP::Merge");
+        local_zaverage_threaded.mergeParallel();
+    }
+    // sum along MPI processes
+    double *zaverage = new double[slice_size];
+    {
+        TIMEZONE("MPI_Allreduce");
+        MPI_Allreduce(
+                (void*)local_zaverage_threaded.getMasterData(),
+                (void*)zaverage,
+                slice_size,
+                MPI_DOUBLE, MPI_SUM, this->comm);
+    }
+    // divide by total number of slices
+    for (ptrdiff_t n=0; n < ptrdiff_t(slice_size); n++)
+            zaverage[n] /= this->rlayout->sizes[0];
+
+    if (this->myrank == 0)
+    {
+        TIMEZONE("root-work");
+        hid_t dset, wspace, mspace;
+        int ndims;
+        hsize_t count[5], offset[5], dims[5];
+        offset[0] = toffset;
+        offset[1] = 0;
+        offset[2] = 0;
+        offset[3] = 0;
+        offset[4] = 0;
+        dset = H5Dopen(
+                group,
+                ("zaverage/" + dset_name).c_str(),
+                H5P_DEFAULT);
+        wspace = H5Dget_space(dset);
+        ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);
+        count[0] = 1;
+        count[1] = this->rlayout->sizes[1];
+        count[2] = this->rlayout->sizes[2];
+        count[3] = 3;
+        count[4] = 3;
+        // select right slice in file
+        H5Sselect_hyperslab(
+            wspace,
+            H5S_SELECT_SET,
+            offset,
+            NULL,
+            count,
+            NULL);
+        offset[0] = 0;
+        // select proper regions of memory
+        mspace = H5Screate_simple(ndims-1, count+1, NULL);
+        H5Sselect_hyperslab(
+            mspace,
+            H5S_SELECT_SET,
+            offset+1,
+            NULL,
+            count+1,
+            NULL);
+        H5Dwrite(
+            dset,
+            H5T_NATIVE_DOUBLE,
+            mspace,
+            wspace,
+            H5P_DEFAULT,
+            zaverage);
+        H5Dclose(dset);
+        H5Sclose(mspace);
+        H5Sclose(wspace);
+    }
+    delete[] zaverage;
+}
+
 template <typename rnumber,
           field_backend be,
           field_components fc>
@@ -904,77 +1217,129 @@ void field<rnumber, be, fc>::symmetrize()
 {
     TIMEZONE("field::symmetrize");
     assert(!this->real_space_representation);
-    ptrdiff_t ii, cc;
-    typename fftw_interface<rnumber>::complex *data = this->get_cdata();
+    // for debugging, just use FFTW
+    //this->ift();
+    //this->dft();
+    //this->normalize();
+    //return;
+    typename fftw_interface<rnumber>::complex *cdata = this->get_cdata();
+    // symmetrize kx = 0 plane, line by line, for ky != 0
     MPI_Status *mpistatus = new MPI_Status;
-    if (this->myrank == this->clayout->rank[0][0])
-    {
-        for (cc = 0; cc < ncomp(fc); cc++)
-            data[cc][1] = 0.0;
-        for (ii = 1; ii < ptrdiff_t(this->clayout->sizes[1]/2); ii++)
-            for (cc = 0; cc < ncomp(fc); cc++) {
-                ( *(data + cc + ncomp(fc)*(this->clayout->sizes[1] - ii)*this->clayout->sizes[2]))[0] =
-                 (*(data + cc + ncomp(fc)*(                          ii)*this->clayout->sizes[2]))[0];
-                ( *(data + cc + ncomp(fc)*(this->clayout->sizes[1] - ii)*this->clayout->sizes[2]))[1] =
-                -(*(data + cc + ncomp(fc)*(                          ii)*this->clayout->sizes[2]))[1];
-            }
-    }
-    typename fftw_interface<rnumber>::complex *buffer;
-    buffer = fftw_interface<rnumber>::alloc_complex(ncomp(fc)*this->clayout->sizes[1]);
-    ptrdiff_t yy;
+    typename fftw_interface<rnumber>::complex *buffer = new typename fftw_interface<rnumber>::complex[ncomp(fc)*this->clayout->sizes[1]];
+    //typename fftw_interface<rnumber>::complex *buffer;
+    //buffer = fftw_interface<rnumber>::alloc_complex(ncomp(fc)*this->clayout->sizes[1]);
     /*ptrdiff_t tindex;*/
     int ranksrc, rankdst;
-    for (yy = 1; yy < ptrdiff_t(this->clayout->sizes[0]/2); yy++) {
-        ranksrc = this->clayout->rank[0][yy];
-        rankdst = this->clayout->rank[0][this->clayout->sizes[0] - yy];
+    for (ptrdiff_t iy = 1; iy < ptrdiff_t(this->clayout->sizes[0]/2); iy++)
+    {
+        ranksrc = this->clayout->rank[0][iy];
+        rankdst = this->clayout->rank[0][this->clayout->sizes[0] - iy];
         if (this->clayout->myrank == ranksrc)
-            for (ii = 0; ii < ptrdiff_t(this->clayout->sizes[1]); ii++)
-                for (cc = 0; cc < ncomp(fc); cc++)
+        {
+            ptrdiff_t iyy = iy - this->clayout->starts[0];
+            for (ptrdiff_t iz = 0; iz < ptrdiff_t(this->clayout->sizes[1]); iz++)
+            {
+                ptrdiff_t cindex = this->get_cindex(0, iyy, iz);
+                for (int cc = 0; cc < int(ncomp(fc)); cc++)
                     for (int imag_comp=0; imag_comp<2; imag_comp++)
-                        (*(buffer + ncomp(fc)*ii+cc))[imag_comp] =
-                            (*(data + ncomp(fc)*((yy - this->clayout->starts[0])*this->clayout->sizes[1] + ii)*this->clayout->sizes[2] + cc))[imag_comp];
+                        (*(buffer + ncomp(fc)*iz+cc))[imag_comp] =
+                            (*(cdata + ncomp(fc)*cindex + cc))[imag_comp];
+            }
+        }
         if (ranksrc != rankdst)
         {
             if (this->clayout->myrank == ranksrc)
                 MPI_Send((void*)buffer,
-                         ncomp(fc)*this->clayout->sizes[1], mpi_real_type<rnumber>::complex(), rankdst, yy,
-                        this->clayout->comm);
+                         ncomp(fc)*this->clayout->sizes[1],
+                         mpi_real_type<rnumber>::complex(),
+                         rankdst, iy,
+                         this->clayout->comm);
             if (this->clayout->myrank == rankdst)
                 MPI_Recv((void*)buffer,
-                         ncomp(fc)*this->clayout->sizes[1], mpi_real_type<rnumber>::complex(), ranksrc, yy,
-                        this->clayout->comm, mpistatus);
+                         ncomp(fc)*this->clayout->sizes[1],
+                         mpi_real_type<rnumber>::complex(),
+                         ranksrc, iy,
+                         this->clayout->comm,
+                         mpistatus);
         }
         if (this->clayout->myrank == rankdst)
         {
-            for (ii = 1; ii < ptrdiff_t(this->clayout->sizes[1]); ii++)
-                for (cc = 0; cc < ncomp(fc); cc++)
+            ptrdiff_t iyy = (this->clayout->sizes[0] - iy) - this->clayout->starts[0];
+            for (ptrdiff_t iz = 1; iz < ptrdiff_t(this->clayout->sizes[1]); iz++)
+            {
+                ptrdiff_t izz = (this->clayout->sizes[1] - iz);
+                ptrdiff_t cindex = this->get_cindex(0, iyy, izz);
+                //DEBUG_MSG("iy = %ld, iz = %ld\n", iy, iz);
+                for (int cc = 0; cc < int(ncomp(fc)); cc++)
                 {
-                    (*(data + ncomp(fc)*((this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1] + ii)*this->clayout->sizes[2] + cc))[0] =
-                            (*(buffer + ncomp(fc)*(this->clayout->sizes[1]-ii)+cc))[0];
-                    (*(data + ncomp(fc)*((this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1] + ii)*this->clayout->sizes[2] + cc))[1] =
-                            -(*(buffer + ncomp(fc)*(this->clayout->sizes[1]-ii)+cc))[1];
+                    (*(cdata + ncomp(fc)*cindex + cc))[0] =  (*(buffer + ncomp(fc)*iz+cc))[0];
+                    (*(cdata + ncomp(fc)*cindex + cc))[1] = -(*(buffer + ncomp(fc)*iz+cc))[1];
                 }
-            for (cc = 0; cc < ncomp(fc); cc++)
+            }
+            ptrdiff_t cindex = this->get_cindex(0, iyy, 0);
+            for (int cc = 0; cc < int(ncomp(fc)); cc++)
             {
-                (*((data + cc + ncomp(fc)*(this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1]*this->clayout->sizes[2])))[0] =  (*(buffer + cc))[0];
-                (*((data + cc + ncomp(fc)*(this->clayout->sizes[0] - yy - this->clayout->starts[0])*this->clayout->sizes[1]*this->clayout->sizes[2])))[1] = -(*(buffer + cc))[1];
+                (*(cdata + cc + ncomp(fc)*cindex))[0] =  (*(buffer + cc))[0];
+                (*(cdata + cc + ncomp(fc)*cindex))[1] = -(*(buffer + cc))[1];
             }
         }
     }
-    fftw_interface<rnumber>::free(buffer);
+    //fftw_interface<rnumber>::free(buffer);
+    delete[] buffer;
     delete mpistatus;
-    /* put asymmetric data to 0 */
-    /*if (this->clayout->myrank == this->clayout->rank[0][this->clayout->sizes[0]/2])
+    // symmetrize kx = 0, ky = 0 line
+    if (this->clayout->myrank == this->clayout->rank[0][0])
+    {
+        for (ptrdiff_t iz = 1; iz < ptrdiff_t(this->clayout->sizes[1]/2); iz++)
+        {
+            ptrdiff_t cindex0 = this->get_cindex(0, 0, iz);
+            ptrdiff_t cindex1 = this->get_cindex(0, 0, this->clayout->sizes[1] - iz);
+            for (int cc = 0; cc < int(ncomp(fc)); cc++)
+            {
+                (*(cdata + cc + ncomp(fc)*cindex1))[0] =  (*(cdata + cc + ncomp(fc)*cindex0))[0];
+                (*(cdata + cc + ncomp(fc)*cindex1))[1] = -(*(cdata + cc + ncomp(fc)*cindex0))[1];
+            }
+        }
+    }
+    // make 0 mode real
+    if (this->myrank == this->clayout->rank[0][0])
+    {
+        for (ptrdiff_t cc = 0; cc < ncomp(fc); cc++)
+            cdata[cc][1] = 0.0;
+    }
+    // put kx = nx/2 modes to 0
+    for (ptrdiff_t iy = 0; iy < ptrdiff_t(this->clayout->subsizes[0]); iy++)
+    for (ptrdiff_t iz = 0; iz < ptrdiff_t(this->clayout->subsizes[1]); iz++)
     {
-        tindex = ncomp(fc)*(this->clayout->sizes[0]/2 - this->clayout->starts[0])*this->clayout->sizes[1]*this->clayout->sizes[2];
-        for (ii = 0; ii < this->clayout->sizes[1]; ii++)
+        ptrdiff_t cindex = this->get_cindex(this->clayout->sizes[2]-1, iy, iz);
+        for (int cc = 0; cc < int(ncomp(fc)); cc++) {
+            (*(cdata + cc + ncomp(fc)*cindex))[0] = 0.0;
+            (*(cdata + cc + ncomp(fc)*cindex))[1] = 0.0;
+        }
+    }
+    // put ky = ny/2 modes to 0
+    if (this->clayout->myrank == this->clayout->rank[0][this->clayout->sizes[0]/2])
+    {
+        for (ptrdiff_t iz = 0; iz < ptrdiff_t(this->clayout->subsizes[1]); iz++)
+        for (ptrdiff_t ix = 0; ix < ptrdiff_t(this->clayout->subsizes[2]); ix++)
         {
-            std::fill_n((rnumber*)(data + tindex), ncomp(fc)*2*this->clayout->sizes[2], 0.0);
-            tindex += ncomp(fc)*this->clayout->sizes[2];
+            ptrdiff_t cindex = this->get_cindex(ix, this->clayout->sizes[0]/2-this->clayout->starts[0], iz);
+            for (int cc = 0; cc < int(ncomp(fc)); cc++) {
+                (*(cdata + cc + ncomp(fc)*cindex))[0] = 0.0;
+                (*(cdata + cc + ncomp(fc)*cindex))[1] = 0.0;
+            }
+        }
+    }
+    // put kz = nz/2 modes to 0
+    for (ptrdiff_t iy = 0; iy < ptrdiff_t(this->clayout->subsizes[0]); iy++)
+    for (ptrdiff_t ix = 0; ix < ptrdiff_t(this->clayout->subsizes[2]); ix++)
+    {
+        ptrdiff_t cindex = this->get_cindex(ix, iy, this->clayout->sizes[1]/2);
+        for (int cc = 0; cc < int(ncomp(fc)); cc++) {
+            (*(cdata + cc + ncomp(fc)*cindex))[0] = 0.0;
+            (*(cdata + cc + ncomp(fc)*cindex))[1] = 0.0;
         }
     }
-    tindex = ncomp(fc)*();
-    std::fill_n((rnumber*)(data + tindex), ncomp(fc)*2, 0.0);*/
 }
 
 template <typename rnumber,
@@ -1022,7 +1387,6 @@ void field<rnumber, be, fc>::compute_stats(
     // what follows gave me a headache until I found this link:
     // http://stackoverflow.com/questions/8256636/expected-primary-expression-error-on-template-method-using
     kk->template cospectrum<rnumber, fc>(
-            (typename fftw_interface<rnumber>::complex*)this->data,
             (typename fftw_interface<rnumber>::complex*)this->data,
             group,
             dset_name + "_" + dset_name,
@@ -1039,6 +1403,42 @@ void field<rnumber, be, fc>::compute_stats(
     }
 }
 
+template <typename rnumber,
+          field_backend be,
+          field_components fc>
+template <kspace_dealias_type dt>
+double field<rnumber, be, fc>::L2norm(
+        kspace<be, dt> *kk)
+{
+    TIMEZONE("field::L2norm");
+    if (!this->real_space_representation)
+        return kk->template L2norm<rnumber, fc>(this->get_cdata());
+    else
+    {
+        shared_array<double> local_m2_threaded(1, [&](double* local_moment){
+            std::fill_n(local_moment, 1, 0);});
+
+        this->RLOOP(
+                [&](ptrdiff_t rindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex){
+                double *local_m2 = local_m2_threaded.getMine();
+                for (unsigned int i=0; i<ncomp(fc); i++)
+                    local_m2[0] += this->data[rindex*ncomp(fc)+i]*this->data[rindex*ncomp(fc)+i];
+            });
+
+        local_m2_threaded.mergeParallel();
+        double m2;
+        MPI_Allreduce(
+                (void*)local_m2_threaded.getMasterData(),
+                &m2,
+                1,
+                MPI_DOUBLE, MPI_SUM, this->comm);
+        return sqrt(m2 / this->npoints);
+    }
+}
+
 template <typename rnumber,
           field_backend be,
           field_components fc1,
@@ -1172,6 +1572,7 @@ int joint_rspace_PDF(
         hid_t dset, wspace;
         hsize_t dims[5];
         int ndims;
+        variable_used_only_in_assert(ndims);
         if (fc == THREE)
         {
             dset = H5Dopen(
@@ -1180,7 +1581,6 @@ int joint_rspace_PDF(
                     H5P_DEFAULT);
             wspace = H5Dget_space(dset);
             ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);
-            DEBUG_MSG("number of dimensions is %d\n", ndims);
             assert(ndims == 5);
             assert(dims[3] == 3);
             assert(dims[4] == 3);
@@ -1235,8 +1635,8 @@ int joint_rspace_PDF(
     {
         for (unsigned int i=0; i<4; i++)
         {
-            bin1size[i] = max_f1_estimate[0] / nbins;
-            bin2size[i] = max_f2_estimate[0] / nbins;
+            bin1size[i] = 2*max_f1_estimate[0] / nbins;
+            bin2size[i] = 2*max_f2_estimate[0] / nbins;
         }
     }
 
@@ -1279,8 +1679,8 @@ int joint_rspace_PDF(
             }
             else if (fc == ONE)
             {
-                bin1 = int(floor(f1->rval(rindex)/bin1size[3]));
-                bin2 = int(floor(f2->rval(rindex)/bin2size[3]));
+                bin1 = int(floor((f1->rval(rindex) + max_f1_estimate[0])/bin1size[3]));
+                bin2 = int(floor((f2->rval(rindex) + max_f2_estimate[0])/bin2size[3]));
             }
             if ((bin1 >= 0 && bin1 < nbins) &&
                 (bin2 >= 0 && bin2 < nbins))
@@ -1360,6 +1760,274 @@ int joint_rspace_PDF(
     return EXIT_SUCCESS;
 }
 
+// Debarghya edit for 3 scale PDFs //
+
+template <typename rnumber,
+          field_backend be>
+int joint_rspace_3PDF(
+        field<rnumber, be, ONE> *f1,
+        field<rnumber, be, ONE> *f2,
+        field<rnumber, be, ONE> *f3,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset,
+        const std::vector<double> max_f1_estimate,
+        const std::vector<double> max_f2_estimate,
+        const std::vector<double> max_f3_estimate)
+{
+    TIMEZONE("joint_rspace_3PDF");
+    assert(f1->real_space_representation);
+    assert(f2->real_space_representation);
+    assert(f3->real_space_representation);
+
+    assert(max_f1_estimate.size() == 1);
+    assert(max_f2_estimate.size() == 1);
+    assert(max_f3_estimate.size() == 1);
+    
+    int nbins;
+    std::string dsetc, dsetm;
+    dsetc = "histograms/" + dset_name + "_components";
+    dsetm = "histograms/" + dset_name;
+    if (f1->myrank == 0)
+    {
+        hid_t dset, wspace;
+        hsize_t dims[5];
+        int ndims;
+        dset = H5Dopen(
+                group,
+                dsetm.c_str(),
+                H5P_DEFAULT);
+        wspace = H5Dget_space(dset);
+        ndims = H5Sget_simple_extent_dims(wspace, dims, NULL);
+        assert(ndims == 4);
+        H5Sclose(wspace);
+        H5Dclose(dset);
+        nbins = dims[1];
+    }
+    {
+        TIMEZONE("MPI_Bcast");
+        MPI_Bcast(&nbins, 1, MPI_INT, 0, f1->comm);
+    }
+
+    
+    /// histogram magnitudes
+    shared_array<ptrdiff_t> local_histm_threaded(
+            nbins*nbins*nbins,
+            [&](ptrdiff_t* local_hist){
+                std::fill_n(local_hist, nbins*nbins*nbins, 0);
+                });
+
+    /// set up bin sizes
+    std::vector<double> bin1size, bin2size, bin3size;
+    bin1size.resize(1);
+    bin2size.resize(1);
+    bin3size.resize(1);
+    
+    bin1size[0] = 2*max_f1_estimate[0] / nbins;
+    bin2size[0] = 2*max_f2_estimate[0] / nbins;
+    bin3size[0] = 2*max_f3_estimate[0] / nbins;
+
+
+    {
+        TIMEZONE("field::RLOOP");
+        f1->RLOOP(
+                [&](ptrdiff_t rindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex){
+            ptrdiff_t *local_histm = local_histm_threaded.getMine();
+            int bin1 = 0;
+            int bin2 = 0;
+            int bin3 = 0;
+
+            bin1 = int(floor((f1->rval(rindex) + max_f1_estimate[0])/bin1size[0]));
+            bin2 = int(floor((f2->rval(rindex) + max_f2_estimate[0])/bin2size[0]));
+            bin3 = int(floor((f3->rval(rindex) + max_f3_estimate[0])/bin3size[0]));
+            if ((bin1 >= 0 && bin1 < nbins) &&
+                (bin2 >= 0 && bin2 < nbins) &&
+                (bin3 >= 0 && bin3 < nbins))
+                local_histm[bin1*nbins*nbins + bin2*nbins + bin3]++;
+            });
+    }
+    local_histm_threaded.mergeParallel();
+    ptrdiff_t *histm = new ptrdiff_t[nbins*nbins*nbins];
+    ptrdiff_t *histc = NULL;
+    {
+        MPI_Allreduce(
+                (void*)local_histm_threaded.getMasterData(),
+                (void*)histm,
+                nbins*nbins*nbins,
+                MPI_INT64_T, MPI_SUM, f1->comm);
+    }
+
+    if (f1->myrank == 0)
+    {
+        TIMEZONE("root-work");
+        hid_t dset, wspace, mspace;
+        hsize_t count[5], offset[5];
+        
+        dset = H5Dopen(group, dsetm.c_str(), H5P_DEFAULT);
+        assert(dset > 0);
+        offset[0] = toffset;
+        offset[1] = 0;
+        offset[2] = 0;
+        offset[3] = 0;
+        count[0] = 1;
+        count[1] = nbins;
+        count[2] = nbins;
+        count[3] = nbins;
+        mspace = H5Screate_simple(4, count, NULL);
+        wspace = H5Dget_space(dset);
+        H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
+        H5Dwrite(dset, H5T_NATIVE_INT64, mspace, wspace, H5P_DEFAULT, histm);
+        H5Sclose(wspace);
+        H5Sclose(mspace);
+        H5Dclose(dset);
+    }
+
+    delete[] histm;
+
+    return EXIT_SUCCESS;
+}
+
+
+
+template <typename rnumber,
+          field_backend be,
+          field_components fc>
+field<rnumber, be, fc> &field<rnumber, be, fc>::operator=(
+        const field<rnumber, be, fc> &src)
+{
+    TIMEZONE("field::operator=");
+    if (src.real_space_representation)
+    {
+        assert(this->get_nx() == src.get_nx());
+        assert(this->get_ny() == src.get_ny());
+        assert(this->get_nz() == src.get_nz());
+        this->real_space_representation = true;
+        std::copy(src.data,
+                  src.data + this->rmemlayout->local_size,
+                  this->data);
+    }
+    else
+    {
+        this->real_space_representation = false;
+        // simple copy
+        if (this->get_nx() == src.get_nx() &&
+            this->get_ny() == src.get_ny() &&
+            this->get_nz() == src.get_nz())
+        {
+            std::copy(src.data,
+                      src.data + this->rmemlayout->local_size,
+                      this->data);
+        }
+        // complicated resize
+        else
+        {
+            int64_t slice_size = src.clayout->local_size / src.clayout->subsizes[0];
+            // clean up
+            std::fill_n(this->data,
+                        this->rmemlayout->local_size,
+                        0.0);
+            typename fftw_interface<rnumber>::complex *buffer;
+            buffer = fftw_interface<rnumber>::alloc_complex(slice_size*ncomp(fc));
+
+            int min_fast_dim =
+                    (src.clayout->sizes[2] > this->clayout->sizes[2]) ?
+                        this->clayout->sizes[2] : src.clayout->sizes[2];
+
+            int64_t ii0, ii1;
+            int64_t oi0, oi1;
+            int64_t delta1, delta0;
+            int irank, orank;
+            delta0 = (this->clayout->sizes[0] - src.clayout->sizes[0]);
+            delta1 = (this->clayout->sizes[1] - src.clayout->sizes[1]);
+            for (ii0=0; ii0 < int64_t(src.clayout->sizes[0]); ii0++)
+            {
+                if (ii0 <= int64_t(src.clayout->sizes[0]/2))
+                {
+                    oi0 = ii0;
+                    if (oi0 > int64_t(this->clayout->sizes[0]/2))
+                        continue;
+                }
+                else
+                {
+                    oi0 = ii0 + delta0;
+                    if ((oi0 < 0) || ((int64_t(this->clayout->sizes[0]) - oi0) >= int64_t(this->clayout->sizes[0]/2)))
+                        continue;
+                }
+                if (be == FFTW)
+                {
+                    irank = src.clayout->rank[0][ii0];
+                    orank = this->clayout->rank[0][oi0];
+                }
+                else
+                {// TODO: handle 2D layout here
+                }
+                if ((irank == orank) &&
+                        (irank == src.clayout->myrank))
+                {
+                    std::copy(
+                            (rnumber*)(src.get_cdata() + (ii0 - src.clayout->starts[0]    )*slice_size),
+                            (rnumber*)(src.get_cdata() + (ii0 - src.clayout->starts[0] + 1)*slice_size),
+                            (rnumber*)buffer);
+                }
+                else
+                {
+                    if (src.clayout->myrank == irank)
+                    {
+                        MPI_Send(
+                                (void*)(src.get_cdata() + (ii0-src.clayout->starts[0])*slice_size),
+                                slice_size,
+                                mpi_real_type<rnumber>::complex(),
+                                orank,
+                                ii0,
+                                src.clayout->comm);
+                    }
+                    if (src.clayout->myrank == orank)
+                    {
+                        MPI_Recv(
+                                    (void*)(buffer),
+                                    slice_size,
+                                    mpi_real_type<rnumber>::complex(),
+                                    irank,
+                                    ii0,
+                                    src.clayout->comm,
+                                    MPI_STATUS_IGNORE);
+                    }
+                }
+                if (src.clayout->myrank == orank)
+                {
+                    for (ii1 = 0; ii1 < int64_t(src.clayout->sizes[1]); ii1++)
+                    {
+                        if (ii1 <= int64_t(src.clayout->sizes[1]/2))
+                        {
+                            oi1 = ii1;
+                            if (oi1 > int64_t(this->clayout->sizes[1]/2))
+                                continue;
+                        }
+                        else
+                        {
+                            oi1 = ii1 + delta1;
+                            if ((oi1 < 0) || ((int64_t(this->clayout->sizes[1]) - oi1) >= int64_t(this->clayout->sizes[1]/2)))
+                                continue;
+                        }
+                        std::copy(
+                                    (rnumber*)(buffer + (ii1*src.clayout->sizes[2]*ncomp(fc))),
+                                (rnumber*)(buffer + (ii1*src.clayout->sizes[2] + min_fast_dim)*ncomp(fc)),
+                                (rnumber*)(this->get_cdata() +
+                                           ((oi0 - this->clayout->starts[0])*this->clayout->sizes[1] +
+                                oi1)*this->clayout->sizes[2]*ncomp(fc)));
+                    }
+                }
+            }
+            fftw_interface<rnumber>::free(buffer);
+            MPI_Barrier(src.clayout->comm);
+        }
+    }
+    return *this;
+}
+
 template class field<float, FFTW, ONE>;
 template class field<float, FFTW, THREE>;
 template class field<float, FFTW, THREExTHREE>;
@@ -1407,6 +2075,34 @@ template void field<double, FFTW, THREExTHREE>::compute_stats<SMOOTH>(
         kspace<FFTW, SMOOTH> *,
         const hid_t, const std::string, const hsize_t, const double);
 
+template double field<float, FFTW, ONE>::L2norm<TWO_THIRDS>(
+        kspace<FFTW, TWO_THIRDS> *);
+template double field<float, FFTW, THREE>::L2norm<TWO_THIRDS>(
+        kspace<FFTW, TWO_THIRDS> *);
+template double field<float, FFTW, THREExTHREE>::L2norm<TWO_THIRDS>(
+        kspace<FFTW, TWO_THIRDS> *);
+
+template double field<double, FFTW, ONE>::L2norm<TWO_THIRDS>(
+        kspace<FFTW, TWO_THIRDS> *);
+template double field<double, FFTW, THREE>::L2norm<TWO_THIRDS>(
+        kspace<FFTW, TWO_THIRDS> *);
+template double field<double, FFTW, THREExTHREE>::L2norm<TWO_THIRDS>(
+        kspace<FFTW, TWO_THIRDS> *);
+
+template double field<float, FFTW, ONE>::L2norm<SMOOTH>(
+        kspace<FFTW, SMOOTH> *);
+template double field<float, FFTW, THREE>::L2norm<SMOOTH>(
+        kspace<FFTW, SMOOTH> *);
+template double field<float, FFTW, THREExTHREE>::L2norm<SMOOTH>(
+        kspace<FFTW, SMOOTH> *);
+
+template double field<double, FFTW, ONE>::L2norm<SMOOTH>(
+        kspace<FFTW, SMOOTH> *);
+template double field<double, FFTW, THREE>::L2norm<SMOOTH>(
+        kspace<FFTW, SMOOTH> *);
+template double field<double, FFTW, THREExTHREE>::L2norm<SMOOTH>(
+        kspace<FFTW, SMOOTH> *);
+
 template int compute_gradient<float, FFTW, THREE, THREExTHREE, SMOOTH>(
         kspace<FFTW, SMOOTH> *,
         field<float, FFTW, THREE> *,
@@ -1468,3 +2164,24 @@ template int joint_rspace_PDF<double, FFTW, ONE>(
         const std::vector<double>,
         const std::vector<double>);
 
+template int joint_rspace_3PDF<float, FFTW>(
+        field<float, FFTW, ONE> *,
+        field<float, FFTW, ONE> *,
+        field<float, FFTW, ONE> *,
+        const hid_t,
+        const std::string,
+        const hsize_t,
+        const std::vector<double>,
+        const std::vector<double>,
+        const std::vector<double>);
+template int joint_rspace_3PDF<double, FFTW>(
+        field<double, FFTW, ONE> *,
+        field<double, FFTW, ONE> *,
+        field<double, FFTW, ONE> *,
+        const hid_t,
+        const std::string,
+        const hsize_t,
+        const std::vector<double>,
+        const std::vector<double>,
+        const std::vector<double>);
+
diff --git a/bfps/cpp/field.hpp b/cpp/field.hpp
similarity index 87%
rename from bfps/cpp/field.hpp
rename to cpp/field.hpp
index 52a936320974a9076a419d4b081d0ee9ab5d4ae5..c6a62b5c1739d5bafdf7c823aea7bc8b24059147 100644
--- a/bfps/cpp/field.hpp
+++ b/cpp/field.hpp
@@ -72,8 +72,8 @@ class field
         field_layout<fc> *clayout, *rlayout, *rmemlayout;
 
         /* FFT plans */
-        typename fftw_interface<rnumber>::plan c2r_plan;
-        typename fftw_interface<rnumber>::plan r2c_plan;
+        typename fftw_interface<rnumber>::many_plan c2r_plan;
+        typename fftw_interface<rnumber>::many_plan r2c_plan;
         unsigned fftw_plan_rigor;
 
         /* HDF5 data types for arrays */
@@ -103,6 +103,13 @@ class field
                 const hid_t group,
                 const std::string field_name,
                 const int iteration);
+        int write_filtered(
+                const std::string fname,
+                const std::string field_name,
+                const int iteration,
+                const int nx,
+                const int ny,
+                const int nz);
 
         int io_binary(
                 const std::string fname,
@@ -129,6 +136,25 @@ class field
                 const hsize_t toffset,
                 const std::vector<double> max_estimate);
 
+        void compute_rspace_zaverage(
+                const hid_t group,
+                const std::string dset_name,
+                const hsize_t toffset);
+
+        /* access sizes */
+        inline int get_nx() const
+        {
+            return this->rlayout->sizes[2];
+        }
+        inline int get_ny() const
+        {
+            return this->rlayout->sizes[1];
+        }
+        inline int get_nz() const
+        {
+            return this->rlayout->sizes[0];
+        }
+
         /* acess data */
         inline rnumber *__restrict__ get_rdata()
         {
@@ -145,6 +171,11 @@ class field
             return (typename fftw_interface<rnumber>::complex*__restrict__)this->data;
         }
 
+        inline typename fftw_interface<rnumber>::complex *__restrict__ get_cdata() const
+        {
+            return (typename fftw_interface<rnumber>::complex*__restrict__)this->data;
+        }
+
         inline rnumber &rval(ptrdiff_t rindex, unsigned int component = 0)
         {
             assert(fc == ONE || fc == THREE);
@@ -154,7 +185,7 @@ class field
 
         inline const rnumber& rval(ptrdiff_t rindex, unsigned int component = 0) const
         {
-            assert(fc == ONE || fc == THREE);
+            //assert(fc == ONE || fc == THREE);
             assert(component >= 0 && component < ncomp(fc));
             return *(this->data + rindex*ncomp(fc) + component);
         }
@@ -216,6 +247,8 @@ class field
             return *this;
         }
 
+        field<rnumber, be, fc>& operator=(const field<rnumber, be, fc> &src);
+
         template <kspace_dealias_type dt>
         void compute_stats(
                 kspace<be, dt> *kk,
@@ -223,6 +256,9 @@ class field
                 const std::string dset_name,
                 const hsize_t toffset,
                 const double max_estimate);
+        template <kspace_dealias_type dt>
+        double L2norm(
+                kspace<be, dt> *kk);
         inline void impose_zero_mode()
         {
             if (this->clayout->myrank == this->clayout->rank[0][0] &&
@@ -318,5 +354,18 @@ int joint_rspace_PDF(
         const std::vector<double> max_f1_estimate,
         const std::vector<double> max_f2_estimate);
 
+template <typename rnumber,
+          field_backend be>
+int joint_rspace_3PDF(
+        field<rnumber, be, ONE> *f1,
+        field<rnumber, be, ONE> *f2,
+        field<rnumber, be, ONE> *f3,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset,
+        const std::vector<double> max_f1_estimate,
+        const std::vector<double> max_f2_estimate,
+        const std::vector<double> max_f3_estimate);
+
 #endif//FIELD_HPP
 
diff --git a/bfps/cpp/field_binary_IO.cpp b/cpp/field_binary_IO.cpp
similarity index 100%
rename from bfps/cpp/field_binary_IO.cpp
rename to cpp/field_binary_IO.cpp
diff --git a/bfps/cpp/field_binary_IO.hpp b/cpp/field_binary_IO.hpp
similarity index 100%
rename from bfps/cpp/field_binary_IO.hpp
rename to cpp/field_binary_IO.hpp
diff --git a/bfps/cpp/field_layout.cpp b/cpp/field_layout.cpp
similarity index 99%
rename from bfps/cpp/field_layout.cpp
rename to cpp/field_layout.cpp
index 908904991d5d95b0c89ba679b402d8d5727b8c85..61dd3f2ac1094e5f93a375fa295cffab669b34f9 100644
--- a/bfps/cpp/field_layout.cpp
+++ b/cpp/field_layout.cpp
@@ -23,10 +23,15 @@
 **********************************************************************/
 
 
+
+#define NDEBUG
+
 #include <cassert>
 #include "field_layout.hpp"
 #include "scope_timer.hpp"
 
+
+
 template <field_components fc>
 field_layout<fc>::field_layout(
         const hsize_t *SIZES,
diff --git a/bfps/cpp/field_layout.hpp b/cpp/field_layout.hpp
similarity index 100%
rename from bfps/cpp/field_layout.hpp
rename to cpp/field_layout.hpp
diff --git a/cpp/full_code/NSVE.cpp b/cpp/full_code/NSVE.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7b1b2d9550c45f9166c37e1b8132427fed046597
--- /dev/null
+++ b/cpp/full_code/NSVE.cpp
@@ -0,0 +1,201 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#define NDEBUG
+
+#include <string>
+#include <cmath>
+#include "NSVE.hpp"
+#include "scope_timer.hpp"
+#include "fftw_tools.hpp"
+
+
+template <typename rnumber>
+int NSVE<rnumber>::initialize(void)
+{
+    TIMEZONE("NSVE::initialize");
+    this->read_iteration();
+    this->read_parameters();
+    if (this->myrank == 0)
+    {
+        // set caching parameters
+        hid_t fapl = H5Pcreate(H5P_FILE_ACCESS);
+        herr_t cache_err = H5Pset_cache(fapl, 0, 521, 134217728, 1.0);
+        variable_used_only_in_assert(cache_err);
+        DEBUG_MSG("when setting stat_file cache I got %d\n", cache_err);
+        this->stat_file = H5Fopen(
+                (this->simname + ".h5").c_str(),
+                H5F_ACC_RDWR,
+                fapl);
+    }
+    int data_file_problem;
+    if (this->myrank == 0)
+        data_file_problem = this->grow_file_datasets();
+    MPI_Bcast(&data_file_problem, 1, MPI_INT, 0, this->comm);
+    if (data_file_problem > 0)
+    {
+        std::cerr <<
+            data_file_problem <<
+            " problems growing file datasets.\ntrying to exit now." <<
+            std::endl;
+        return EXIT_FAILURE;
+    }
+    this->fs = new vorticity_equation<rnumber, FFTW>(
+            simname.c_str(),
+            nx, ny, nz,
+            dkx, dky, dkz,
+            fftw_planner_string_to_flag[this->fftw_plan_rigor]);
+    this->tmp_vec_field = new field<rnumber, FFTW, THREE>(
+            nx, ny, nz,
+            this->comm,
+            fftw_planner_string_to_flag[this->fftw_plan_rigor]);
+
+
+    this->fs->checkpoints_per_file = checkpoints_per_file;
+    this->fs->nu = nu;
+    this->fs->fmode = fmode;
+    this->fs->famplitude = famplitude;
+    this->fs->friction_coefficient = friction_coefficient;
+    this->fs->energy = energy;
+    this->fs->injection_rate = injection_rate;
+    this->fs->fk0 = fk0;
+    this->fs->fk1 = fk1;
+    strncpy(this->fs->forcing_type, forcing_type, 128);
+    this->fs->iteration = this->iteration;
+    this->fs->checkpoint = this->checkpoint;
+
+    this->fs->cvorticity->real_space_representation = false;
+    this->fs->io_checkpoint();
+
+    if (this->myrank == 0 && this->iteration == 0)
+        this->fs->kk->store(stat_file);
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int NSVE<rnumber>::step(void)
+{
+    TIMEZONE("NSVE::step");
+    this->fs->step(this->dt);
+    this->iteration = this->fs->iteration;
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int NSVE<rnumber>::write_checkpoint(void)
+{
+    TIMEZONE("NSVE::write_checkpoint");
+    this->fs->io_checkpoint(false);
+    this->checkpoint = this->fs->checkpoint;
+    this->write_iteration();
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int NSVE<rnumber>::finalize(void)
+{
+    TIMEZONE("NSVE::finalize");
+    if (this->myrank == 0)
+        H5Fclose(this->stat_file);
+    delete this->fs;
+    delete this->tmp_vec_field;
+    return EXIT_SUCCESS;
+}
+
+/** \brief Compute standard statistics for velocity and vorticity fields.
+ *
+ *  IMPORTANT: at the end of this subroutine, `this->fs->cvelocity` contains
+ *  the Fourier space representation of the velocity field, and
+ *  `this->tmp_vec_field` contains the real space representation of the
+ *  velocity field.
+ *  This behavior is relied upon in the `NSVEparticles` class, so please
+ *  don't break it.
+ */
+
+template <typename rnumber>
+int NSVE<rnumber>::do_stats()
+{
+    TIMEZONE("NSVE::do_stats");
+    if (!(this->iteration % this->niter_stat == 0))
+        return EXIT_SUCCESS;
+    hid_t stat_group;
+    if (this->myrank == 0)
+        stat_group = H5Gopen(
+                this->stat_file,
+                "statistics",
+                H5P_DEFAULT);
+    else
+        stat_group = 0;
+
+    *tmp_vec_field = fs->cvorticity->get_cdata();
+    tmp_vec_field->compute_stats(
+            fs->kk,
+            stat_group,
+            "vorticity",
+            fs->iteration / niter_stat,
+            max_vorticity_estimate/sqrt(3));
+
+    fs->compute_velocity(fs->cvorticity);
+    *tmp_vec_field = fs->cvelocity->get_cdata();
+    tmp_vec_field->compute_stats(
+            fs->kk,
+            stat_group,
+            "velocity",
+            fs->iteration / niter_stat,
+            max_velocity_estimate/sqrt(3));
+
+    if (this->myrank == 0)
+        H5Gclose(stat_group);
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int NSVE<rnumber>::read_parameters(void)
+{
+    TIMEZONE("NSVE::read_parameters");
+    this->direct_numerical_simulation::read_parameters();
+    hid_t parameter_file = H5Fopen((this->simname + ".h5").c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
+    this->nu = hdf5_tools::read_value<double>(parameter_file, "parameters/nu");
+    this->dt = hdf5_tools::read_value<double>(parameter_file, "parameters/dt");
+    this->fmode = hdf5_tools::read_value<int>(parameter_file, "parameters/fmode");
+    this->famplitude = hdf5_tools::read_value<double>(parameter_file, "parameters/famplitude");
+    this->friction_coefficient = hdf5_tools::read_value<double>(parameter_file, "parameters/friction_coefficient");
+    this->injection_rate = hdf5_tools::read_value<double>(parameter_file, "parameters/injection_rate");
+    this->fk0 = hdf5_tools::read_value<double>(parameter_file, "parameters/fk0");
+    this->fk1 = hdf5_tools::read_value<double>(parameter_file, "parameters/fk1");
+    this->energy = hdf5_tools::read_value<double>(parameter_file, "parameters/energy");
+    this->histogram_bins = hdf5_tools::read_value<int>(parameter_file, "parameters/histogram_bins");
+    this->max_velocity_estimate = hdf5_tools::read_value<double>(parameter_file, "parameters/max_velocity_estimate");
+    this->max_vorticity_estimate = hdf5_tools::read_value<double>(parameter_file, "parameters/max_vorticity_estimate");
+    std::string tmp = hdf5_tools::read_string(parameter_file, "parameters/forcing_type");
+    snprintf(this->forcing_type, 511, "%s", tmp.c_str());
+    this->fftw_plan_rigor = hdf5_tools::read_string(parameter_file, "parameters/fftw_plan_rigor");
+    H5Fclose(parameter_file);
+    return EXIT_SUCCESS;
+}
+
+template class NSVE<float>;
+template class NSVE<double>;
+
diff --git a/bfps/cpp/full_code/NSVE.hpp b/cpp/full_code/NSVE.hpp
similarity index 95%
rename from bfps/cpp/full_code/NSVE.hpp
rename to cpp/full_code/NSVE.hpp
index d444b71ceb48ea19dc292a57cc91ac81157e15ed..83c63d35790d3616cf143da1ac43bec133e91675 100644
--- a/bfps/cpp/full_code/NSVE.hpp
+++ b/cpp/full_code/NSVE.hpp
@@ -42,14 +42,18 @@ class NSVE: public direct_numerical_simulation
         /* parameters that are read in read_parameters */
         double dt;
         double famplitude;
+        double friction_coefficient;
         double fk0;
         double fk1;
+        double energy;
+        double injection_rate;
         int fmode;
         char forcing_type[512];
         int histogram_bins;
         double max_velocity_estimate;
         double max_vorticity_estimate;
         double nu;
+        std::string fftw_plan_rigor;
 
         /* other stuff */
         vorticity_equation<rnumber, FFTW> *fs;
diff --git a/bfps/cpp/full_code/NSVE_field_stats.cpp b/cpp/full_code/NSVE_field_stats.cpp
similarity index 57%
rename from bfps/cpp/full_code/NSVE_field_stats.cpp
rename to cpp/full_code/NSVE_field_stats.cpp
index 7e33acf93644208d292c5d8df66653f4bb7b806f..0969175cc75530e2dad2c3c5dd9e6a0449416ed0 100644
--- a/bfps/cpp/full_code/NSVE_field_stats.cpp
+++ b/cpp/full_code/NSVE_field_stats.cpp
@@ -1,17 +1,44 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
 #include <string>
 #include <cmath>
 #include "NSVE_field_stats.hpp"
+#include "fftw_tools.hpp"
 #include "scope_timer.hpp"
 
 
 template <typename rnumber>
 int NSVE_field_stats<rnumber>::initialize(void)
 {
+    TIMEZONE("NSVE_field_stats::initialize");
     this->postprocess::read_parameters();
     this->vorticity = new field<rnumber, FFTW, THREE>(
             nx, ny, nz,
             this->comm,
-            DEFAULT_FFTW_FLAG);
+            fftw_planner_string_to_flag[this->fftw_plan_rigor]);
     this->vorticity->real_space_representation = false;
     hid_t parameter_file = H5Fopen(
             (this->simname + std::string(".h5")).c_str(),
@@ -42,6 +69,7 @@ int NSVE_field_stats<rnumber>::initialize(void)
                 this->vorticity->clayout->starts,
                 this->vorticity->clayout->comm);
     }
+    this->fftw_plan_rigor = hdf5_tools::read_string(parameter_file, "parameters/fftw_plan_rigor");
     H5Fclose(parameter_file);
     return EXIT_SUCCESS;
 }
@@ -49,6 +77,7 @@ int NSVE_field_stats<rnumber>::initialize(void)
 template <typename rnumber>
 int NSVE_field_stats<rnumber>::read_current_cvorticity(void)
 {
+    TIMEZONE("NSVE_field_stats::read_current_cvorticity");
     this->vorticity->real_space_representation = false;
     if (this->bin_IO != NULL)
     {
@@ -76,6 +105,7 @@ int NSVE_field_stats<rnumber>::read_current_cvorticity(void)
 template <typename rnumber>
 int NSVE_field_stats<rnumber>::finalize(void)
 {
+    TIMEZONE("NSVE_field_stats::finalize");
     if (this->bin_IO != NULL)
         delete this->bin_IO;
     delete this->vorticity;
@@ -85,6 +115,7 @@ int NSVE_field_stats<rnumber>::finalize(void)
 template <typename rnumber>
 int NSVE_field_stats<rnumber>::work_on_current_iteration(void)
 {
+    TIMEZONE("NSVE_field_stats::work_on_current_iteration");
     return EXIT_SUCCESS;
 }
 
diff --git a/bfps/cpp/full_code/NSVE_field_stats.hpp b/cpp/full_code/NSVE_field_stats.hpp
similarity index 98%
rename from bfps/cpp/full_code/NSVE_field_stats.hpp
rename to cpp/full_code/NSVE_field_stats.hpp
index d544c0c7d5f4c75559e63ea3e59bf9457d4730c5..28a2376f17ac2ac837cbacac828cd91572bb3a17 100644
--- a/bfps/cpp/full_code/NSVE_field_stats.hpp
+++ b/cpp/full_code/NSVE_field_stats.hpp
@@ -42,6 +42,8 @@ class NSVE_field_stats: public postprocess
     private:
         field_binary_IO<rnumber, COMPLEX, THREE> *bin_IO;
     public:
+        std::string fftw_plan_rigor;
+
         field<rnumber, FFTW, THREE> *vorticity;
 
         NSVE_field_stats(
diff --git a/cpp/full_code/NSVE_no_output.hpp b/cpp/full_code/NSVE_no_output.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..045db08ec74b74206973e0dfbcb30716d62be0de
--- /dev/null
+++ b/cpp/full_code/NSVE_no_output.hpp
@@ -0,0 +1,51 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2017 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+
+#ifndef NSVE_NO_OUTPUT_HPP
+#define NSVE_NO_OUTPUT_HPP
+
+#include "full_code/NSVE.hpp"
+
+template <typename rnumber>
+class NSVE_no_output: public NSVE<rnumber>
+{
+    public:
+    NSVE_no_output(
+            const MPI_Comm COMMUNICATOR,
+            const std::string &simulation_name):
+        NSVE<rnumber>(
+                COMMUNICATOR,
+                simulation_name){}
+    ~NSVE_no_output(){}
+    int write_checkpoint(void)
+    {
+        TIMEZONE("NSVE_no_output::write_checkpoint");
+        return EXIT_SUCCESS;
+    }
+};
+
+#endif//NSVE_NO_OUTPUT_HPP
+
diff --git a/cpp/full_code/NSVEcomplex_particles.cpp b/cpp/full_code/NSVEcomplex_particles.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3bd27102d7495b39dfa92bb5b7975b3f64d6cca5
--- /dev/null
+++ b/cpp/full_code/NSVEcomplex_particles.cpp
@@ -0,0 +1,265 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2017 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+
+#define NDEBUG
+
+#include <string>
+#include <cmath>
+#include "NSVEcomplex_particles.hpp"
+#include "scope_timer.hpp"
+#include "particles/particles_sampling.hpp"
+#include "particles/p2p_computer.hpp"
+#include "particles/particles_inner_computer.hpp"
+
+template <typename rnumber>
+int NSVEcomplex_particles<rnumber>::initialize(void)
+{
+    TIMEZONE("NSVEcomplex_particles::initialize");
+    this->NSVE<rnumber>::initialize();
+
+    p2p_computer<double, long long int> current_p2p_computer;
+    current_p2p_computer.setEnable(this->enable_p2p);
+
+    particles_inner_computer<double, long long int> current_particles_inner_computer(inner_v0, this->lambda);
+    current_particles_inner_computer.setEnable(enable_inner);
+
+
+    this->ps = particles_system_builder_with_p2p(
+                this->fs->cvelocity,                                                    // (field object)
+                this->fs->kk,                                                           // (kspace object, contains dkx, dky, dkz)
+                tracers0_integration_steps,                                             // to check coherency between parameters and hdf input file (nb rhs)
+                (long long int)nparticles,                                              // to check coherency between parameters and hdf input file
+                this->fs->get_current_fname(),                                          // particles input filename
+                std::string("/tracers0/state/") + std::to_string(this->fs->iteration),  // dataset name for initial input
+                std::string("/tracers0/rhs/")  + std::to_string(this->fs->iteration),   // dataset name for initial input
+                tracers0_neighbours,                                                    // parameter (interpolation no neighbours)
+                tracers0_smoothness,                                                    // parameter (how many continuous derivatives)
+                this->comm,
+                this->fs->iteration+1,
+                std::move(current_p2p_computer),
+                std::move(current_particles_inner_computer),
+                cutoff);
+
+    this->particles_output_writer_mpi = new particles_output_hdf5<
+        long long int, double, 6>(
+                MPI_COMM_WORLD,
+                "tracers0",
+                nparticles,
+                tracers0_integration_steps);
+    this->particles_sample_writer_mpi = new particles_output_sampling_hdf5<
+        long long int, double, 3>(
+                MPI_COMM_WORLD,
+                this->ps->getGlobalNbParticles(),
+                (this->simname + "_particles.h5"),
+                "tracers0",
+                "position/0");
+
+
+    /// allocate grad vel field
+    this->nabla_u = new field<rnumber, FFTW, THREExTHREE>(
+            this->nx, this->ny, this->nz,
+            this->comm,
+            this->fs->cvorticity->fftw_plan_rigor);
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int NSVEcomplex_particles<rnumber>::step(void)
+{
+    TIMEZONE("NSVEcomplex_particles::step");
+    this->fs->compute_velocity(this->fs->cvorticity);
+    if(this->enable_vorticity_omega){
+        compute_gradient(
+            this->fs->kk,
+            this->fs->cvelocity,
+            this->nabla_u);
+        this->nabla_u->ift();
+        this->fs->cvelocity->ift(); // needed before completeloop
+        //std::unique_ptr<double[]> sampled_vorticity(new double[9*this->ps->getLocalNbParticles()]);
+        //std::fill_n(sampled_vorticity.get(), 9*this->ps->getLocalNbParticles(), 0);
+        //this->ps->sample_compute_field(*this->nabla_u, sampled_vorticity.get());
+        //*this->tmp_vec_field = this->fs->cvorticity->get_cdata();
+        //this->tmp_vec_field->ift();
+        this->ps->completeLoopWithExtraField(this->dt, *this->nabla_u);
+    }
+    else{
+        this->fs->cvelocity->ift();
+        this->ps->completeLoop(this->dt);
+    }
+    this->NSVE<rnumber>::step();
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int NSVEcomplex_particles<rnumber>::write_checkpoint(void)
+{
+    TIMEZONE("NSVEcomplex_particles::write_checkpoint");
+    this->NSVE<rnumber>::write_checkpoint();
+    this->particles_output_writer_mpi->open_file(this->fs->get_current_fname());
+    // TODO P2P write particle data too
+    this->particles_output_writer_mpi->template save<6>(
+            this->ps->getParticlesState(),
+            this->ps->getParticlesRhs(),
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->fs->iteration);
+    this->particles_output_writer_mpi->close_file();
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int NSVEcomplex_particles<rnumber>::finalize(void)
+{
+    TIMEZONE("NSVEcomplex_particles::finalize");
+    delete this->nabla_u;
+    delete this->particles_output_writer_mpi;
+    delete this->particles_sample_writer_mpi;
+    this->NSVE<rnumber>::finalize();
+    return EXIT_SUCCESS;
+}
+
+/** \brief Compute fluid stats and sample particle data.
+ */
+
+template <typename rnumber>
+int NSVEcomplex_particles<rnumber>::do_stats()
+{
+    TIMEZONE("NSVEcomplex_particles::do_stats");
+    /// perform fluid stats
+    this->NSVE<rnumber>::do_stats();
+
+    /// check if particle stats should be performed now;
+    /// if not, exit method.
+    if (!(this->iteration % this->niter_part == 0))
+        return EXIT_SUCCESS;
+
+    /// allocate temporary data array
+    /// initialize pdata0 with the positions, and pdata1 with the orientations
+    std::unique_ptr<double[]> pdata0 = this->ps->extractParticlesState(0, 3);
+    std::unique_ptr<double[]> pdata1 = this->ps->extractParticlesState(3, 6);
+    std::unique_ptr<double[]> pdata2(new double[9*this->ps->getLocalNbParticles()]);
+
+    /// sample position
+    this->particles_sample_writer_mpi->template save_dataset<3>(
+            "tracers0",
+            "position",
+            pdata0.get(),
+            &pdata0,
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->ps->get_step_idx()-1);
+
+    /// sample orientation
+    this->particles_sample_writer_mpi->template save_dataset<3>(
+            "tracers0",
+            "orientation",
+            pdata0.get(),
+            &pdata1,
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->ps->get_step_idx()-1);
+
+    /// sample velocity
+    /// from now on, we need to clean up data arrays before interpolation
+    std::fill_n(pdata1.get(), 3*this->ps->getLocalNbParticles(), 0);
+    this->ps->sample_compute_field(*this->tmp_vec_field, pdata1.get());
+    this->particles_sample_writer_mpi->template save_dataset<3>(
+            "tracers0",
+            "velocity",
+            pdata0.get(),
+            &pdata1,
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->ps->get_step_idx()-1);
+
+    /// sample velocity gradient
+    /// fs->cvelocity should contain the velocity in Fourier space
+    this->fs->compute_velocity(this->fs->cvorticity);
+    compute_gradient(
+            this->fs->kk,
+            this->fs->cvelocity,
+            this->nabla_u);
+    this->nabla_u->ift();
+    std::fill_n(pdata2.get(), 9*this->ps->getLocalNbParticles(), 0);
+    this->ps->sample_compute_field(*this->nabla_u, pdata2.get());
+    this->particles_sample_writer_mpi->template save_dataset<9>(
+            "tracers0",
+            "velocity_gradient",
+            pdata0.get(),
+            &pdata2,
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->ps->get_step_idx()-1);
+
+    /// compute acceleration and sample it
+    this->fs->compute_Lagrangian_acceleration(this->tmp_vec_field);
+    this->tmp_vec_field->ift();
+    std::fill_n(pdata1.get(), 3*this->ps->getLocalNbParticles(), 0);
+    this->ps->sample_compute_field(*this->tmp_vec_field, pdata1.get());
+    this->particles_sample_writer_mpi->template save_dataset<3>(
+            "tracers0",
+            "acceleration",
+            pdata0.get(),
+            &pdata1,
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->ps->get_step_idx()-1);
+
+    // deallocate temporary data array
+    delete[] pdata0.release();
+    delete[] pdata1.release();
+    delete[] pdata2.release();
+
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int NSVEcomplex_particles<rnumber>::read_parameters(void)
+{
+    TIMEZONE("NSVEcomplex_particles::read_parameters");
+    this->NSVE<rnumber>::read_parameters();
+    hid_t parameter_file = H5Fopen((this->simname + ".h5").c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
+    this->niter_part = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_part");
+    this->nparticles = hdf5_tools::read_value<int>(parameter_file, "parameters/nparticles");
+    this->tracers0_integration_steps = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_integration_steps");
+    this->tracers0_neighbours = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_neighbours");
+    this->tracers0_smoothness = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_smoothness");
+    this->enable_p2p = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_enable_p2p");
+    this->enable_inner = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_enable_inner");
+    int tval = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_enable_vorticity_omega");
+    this->enable_vorticity_omega = tval;
+    DEBUG_MSG("tracers0_enable_vorticity_omega = %d, this->enable_vorticity_omega = %d\n",
+              tval, this->enable_vorticity_omega);
+    this->cutoff = hdf5_tools::read_value<double>(parameter_file, "parameters/tracers0_cutoff");
+    this->inner_v0 = hdf5_tools::read_value<double>(parameter_file, "parameters/tracers0_inner_v0");
+    this->lambda = hdf5_tools::read_value<double>(parameter_file, "parameters/tracers0_lambda");
+    H5Fclose(parameter_file);
+    return EXIT_SUCCESS;
+}
+
+template class NSVEcomplex_particles<float>;
+template class NSVEcomplex_particles<double>;
+
diff --git a/cpp/full_code/NSVEcomplex_particles.hpp b/cpp/full_code/NSVEcomplex_particles.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..20a84a6592f9b1158738610674836dd30362b6dc
--- /dev/null
+++ b/cpp/full_code/NSVEcomplex_particles.hpp
@@ -0,0 +1,97 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2017 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+
+#ifndef NSVECOMPLEX_PARTICLES_HPP
+#define NSVECOMPLEX_PARTICLES_HPP
+
+
+
+#include <cstdlib>
+#include "base.hpp"
+#include "vorticity_equation.hpp"
+#include "full_code/NSVE.hpp"
+#include "particles/particles_system_builder.hpp"
+#include "particles/particles_output_hdf5.hpp"
+#include "particles/particles_sampling.hpp"
+
+/** \brief Navier-Stokes solver that includes complex particles.
+ *
+ *  Child of Navier Stokes vorticity equation solver, this class calls all the
+ *  methods from `NSVE`, and in addition integrates `complex particles`
+ *  in the resulting velocity field.
+ *  By `complex particles` we mean neutrally buoyant, very small particles,
+ *  which have an orientation and actively swim in that direction, and they may
+ *  also interact with each other, trying to reorient to a common orientation.
+ */
+
+template <typename rnumber>
+class NSVEcomplex_particles: public NSVE<rnumber>
+{
+    public:
+
+        /* parameters that are read in read_parameters */
+        int niter_part;
+        int nparticles;
+        int tracers0_integration_steps;
+        int tracers0_neighbours;
+        int tracers0_smoothness;
+
+        double cutoff;
+        double inner_v0;
+        double lambda;
+        bool enable_p2p;
+        bool enable_inner;
+        bool enable_vorticity_omega;
+
+        /* other stuff */
+        std::unique_ptr<abstract_particles_system<long long int, double>> ps;
+        // TODO P2P use a reader with particle data
+        particles_output_hdf5<long long int, double,6> *particles_output_writer_mpi;
+        particles_output_sampling_hdf5<long long int, double, 3> *particles_sample_writer_mpi;
+        // field for sampling velocity gradient
+        field<rnumber, FFTW, THREExTHREE> *nabla_u;
+
+
+        NSVEcomplex_particles(
+                const MPI_Comm COMMUNICATOR,
+                const std::string &simulation_name):
+            NSVE<rnumber>(
+                    COMMUNICATOR,
+                    simulation_name),
+            cutoff(10), inner_v0(1), lambda(1.0), enable_p2p(true), enable_inner(true), enable_vorticity_omega(true){}
+        ~NSVEcomplex_particles(){}
+
+        int initialize(void);
+        int step(void);
+        int finalize(void);
+
+        int read_parameters(void);
+        int write_checkpoint(void);
+        int do_stats(void);
+};
+
+#endif//NSVECOMPLEX_PARTICLES_HPP
+
diff --git a/cpp/full_code/NSVEp_extra_sampling.cpp b/cpp/full_code/NSVEp_extra_sampling.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7b3e5a76c6d47c990df9698ccb5f8ef22770a70d
--- /dev/null
+++ b/cpp/full_code/NSVEp_extra_sampling.cpp
@@ -0,0 +1,154 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#include "full_code/NSVEp_extra_sampling.hpp"
+
+
+
+template <typename rnumber>
+int NSVEp_extra_sampling<rnumber>::initialize(void)
+{
+    TIMEZONE("NSVEp_extra_sampling::initialize");
+    this->NSVEparticles<rnumber>::initialize();
+
+    /// allocate grad vel field
+    this->nabla_u = new field<rnumber, FFTW, THREExTHREE>(
+            this->nx, this->ny, this->nz,
+            this->comm,
+            this->fs->cvorticity->fftw_plan_rigor);
+    this->pressure = new field<rnumber, FFTW, ONE>(
+            this->nx, this->ny, this->nz,
+            this->comm,
+            this->fs->cvorticity->fftw_plan_rigor);
+    this->nabla_p = new field<rnumber, FFTW, THREE>(
+            this->nx, this->ny, this->nz,
+            this->comm,
+            this->fs->cvorticity->fftw_plan_rigor);
+    this->Hessian_p = new field<rnumber, FFTW, THREExTHREE>(
+            this->nx, this->ny, this->nz,
+            this->comm,
+            this->fs->cvorticity->fftw_plan_rigor);
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int NSVEp_extra_sampling<rnumber>::finalize(void)
+{
+    TIMEZONE("NSVEp_extra_sampling::finalize");
+    delete this->nabla_u;
+    delete this->pressure;
+    delete this->nabla_p;
+    delete this->Hessian_p;
+    this->NSVEparticles<rnumber>::finalize();
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int NSVEp_extra_sampling<rnumber>::do_stats()
+{
+    TIMEZONE("NSVEp_extra_sampling::do_stats");
+    this->NSVEparticles<rnumber>::do_stats();
+    if (!(this->iteration % this->niter_part == 0))
+        return EXIT_SUCCESS;
+
+    /// fs->cvelocity should contain the velocity in Fourier space
+    this->fs->compute_velocity(this->fs->cvorticity);
+    compute_gradient(
+            this->fs->kk,
+            this->fs->cvelocity,
+            this->nabla_u);
+    this->nabla_u->ift();
+
+    this->fs->compute_pressure(this->pressure);
+
+    compute_gradient(
+            this->fs->kk,
+            this->pressure,
+            this->nabla_p);
+
+    compute_gradient(
+            this->fs->kk,
+            this->nabla_p,
+            this->Hessian_p);
+
+    this->pressure->ift();
+    this->nabla_p->ift();
+    this->Hessian_p->ift();
+
+    // sample velocity gradient
+    std::unique_ptr<double[]> pdata(new double[9*this->ps->getLocalNbParticles()]);
+    std::fill_n(pdata.get(), 9*this->ps->getLocalNbParticles(), 0);
+    this->ps->sample_compute_field(*this->nabla_u, pdata.get());
+
+    this->particles_sample_writer_mpi->template save_dataset<9>(
+            "tracers0",
+            "velocity_gradient",
+            this->ps->getParticlesState(),
+            &pdata,
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->ps->get_step_idx()-1);
+
+    // sample pressure
+    std::fill_n(pdata.get(), this->ps->getLocalNbParticles(), 0);
+    this->ps->sample_compute_field(*this->pressure, pdata.get());
+    this->particles_sample_writer_mpi->template save_dataset<1>(
+            "tracers0",
+            "pressure",
+            this->ps->getParticlesState(),
+            &pdata,
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->ps->get_step_idx()-1);
+
+    // sample pressure gradient
+    std::fill_n(pdata.get(), 3*this->ps->getLocalNbParticles(), 0);
+    this->ps->sample_compute_field(*this->nabla_p, pdata.get());
+    this->particles_sample_writer_mpi->template save_dataset<3>(
+            "tracers0",
+            "pressure_gradient",
+            this->ps->getParticlesState(),
+            &pdata,
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->ps->get_step_idx()-1);
+
+    // sample pressure gradient
+    std::fill_n(pdata.get(), 9*this->ps->getLocalNbParticles(), 0);
+    this->ps->sample_compute_field(*this->Hessian_p, pdata.get());
+    this->particles_sample_writer_mpi->template save_dataset<9>(
+            "tracers0",
+            "pressure_Hessian",
+            this->ps->getParticlesState(),
+            &pdata,
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->ps->get_step_idx()-1);
+    return EXIT_SUCCESS;
+}
+
+template class NSVEp_extra_sampling<float>;
+template class NSVEp_extra_sampling<double>;
+
diff --git a/cpp/full_code/NSVEp_extra_sampling.hpp b/cpp/full_code/NSVEp_extra_sampling.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d3d1c1863a315d14f774644f54eed4d6a606d176
--- /dev/null
+++ b/cpp/full_code/NSVEp_extra_sampling.hpp
@@ -0,0 +1,72 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#ifndef NSVEP_EXTRA_SAMPLING_HPP
+#define NSVEP_EXTRA_SAMPLING_HPP
+
+
+
+#include <cstdlib>
+#include "base.hpp"
+#include "vorticity_equation.hpp"
+#include "full_code/NSVEparticles.hpp"
+#include "particles/particles_system_builder.hpp"
+#include "particles/particles_output_hdf5.hpp"
+#include "particles/particles_sampling.hpp"
+
+/** \brief Navier-Stokes solver with tracers that sample velocity gradient
+ *  and pressure Hessian.
+ *
+ */
+
+template <typename rnumber>
+class NSVEp_extra_sampling: public NSVEparticles<rnumber>
+{
+    public:
+
+        /* other stuff */
+        field<rnumber, FFTW, ONE> *pressure;
+        field<rnumber, FFTW, THREE> *nabla_p;
+        field<rnumber, FFTW, THREExTHREE> *nabla_u;
+        field<rnumber, FFTW, THREExTHREE> *Hessian_p;
+
+        NSVEp_extra_sampling(
+                const MPI_Comm COMMUNICATOR,
+                const std::string &simulation_name):
+            NSVEparticles<rnumber>(
+                    COMMUNICATOR,
+                    simulation_name){}
+        ~NSVEp_extra_sampling(){}
+
+        int initialize(void);
+        int finalize(void);
+
+        int read_parameters(void);
+        int do_stats(void);
+};
+
+#endif//NSVEP_EXTRA_SAMPLING_HPP
+
+
diff --git a/cpp/full_code/NSVEparticles.cpp b/cpp/full_code/NSVEparticles.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9b8743cdb48a5f3575931dfcc200fe1f0362778d
--- /dev/null
+++ b/cpp/full_code/NSVEparticles.cpp
@@ -0,0 +1,210 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2019 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+
+#define NDEBUG
+
+#include <string>
+#include <cmath>
+#include "NSVEparticles.hpp"
+#include "scope_timer.hpp"
+
+template <typename rnumber>
+int NSVEparticles<rnumber>::initialize(void)
+{
+    TIMEZONE("NSVEparticles::intialize");
+    this->NSVE<rnumber>::initialize();
+    this->pressure = new field<rnumber, FFTW, ONE>(
+            this->fs->cvelocity->rlayout->sizes[2],
+            this->fs->cvelocity->rlayout->sizes[1],
+            this->fs->cvelocity->rlayout->sizes[0],
+            this->fs->cvelocity->rlayout->comm,
+            this->fs->cvelocity->fftw_plan_rigor);
+
+    this->ps = particles_system_builder(
+                this->fs->cvelocity,              // (field object)
+                this->fs->kk,                     // (kspace object, contains dkx, dky, dkz)
+                tracers0_integration_steps, // to check coherency between parameters and hdf input file (nb rhs)
+                (long long int)nparticles,  // to check coherency between parameters and hdf input file
+                this->fs->get_current_fname(),    // particles input filename
+                std::string("/tracers0/state/") + std::to_string(this->fs->iteration), // dataset name for initial input
+                std::string("/tracers0/rhs/")  + std::to_string(this->fs->iteration),  // dataset name for initial input
+                tracers0_neighbours,        // parameter (interpolation no neighbours)
+                tracers0_smoothness,        // parameter
+                this->comm,
+                this->fs->iteration+1);
+    this->particles_output_writer_mpi = new particles_output_hdf5<
+        long long int, double, 3>(
+                MPI_COMM_WORLD,
+                "tracers0",
+                nparticles,
+                tracers0_integration_steps);
+    this->particles_output_writer_mpi->setParticleFileLayout(this->ps->getParticleFileLayout());
+    this->particles_sample_writer_mpi = new particles_output_sampling_hdf5<
+        long long int, double, 3>(
+                MPI_COMM_WORLD,
+                this->ps->getGlobalNbParticles(),
+                (this->simname + "_particles.h5"),
+                "tracers0",
+                "position/0");
+    this->particles_sample_writer_mpi->setParticleFileLayout(this->ps->getParticleFileLayout());
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int NSVEparticles<rnumber>::step(void)
+{
+    TIMEZONE("NSVEparticles::step");
+    this->fs->compute_velocity(this->fs->cvorticity);
+    this->fs->cvelocity->ift();
+    this->ps->completeLoop(this->dt);
+    this->NSVE<rnumber>::step();
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int NSVEparticles<rnumber>::write_checkpoint(void)
+{
+    TIMEZONE("NSVEparticles::write_checkpoint");
+    this->NSVE<rnumber>::write_checkpoint();
+    this->particles_output_writer_mpi->open_file(this->fs->get_current_fname());
+    this->particles_output_writer_mpi->template save<3>(
+            this->ps->getParticlesState(),
+            this->ps->getParticlesRhs(),
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->fs->iteration);
+    this->particles_output_writer_mpi->close_file();
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int NSVEparticles<rnumber>::finalize(void)
+{
+    TIMEZONE("NSVEparticles::finalize");
+    delete this->pressure;
+    delete this->ps.release();
+    delete this->particles_output_writer_mpi;
+    delete this->particles_sample_writer_mpi;
+    this->NSVE<rnumber>::finalize();
+    return EXIT_SUCCESS;
+}
+
+/** \brief Compute fluid stats and sample fields at particle locations.
+ */
+
+template <typename rnumber>
+int NSVEparticles<rnumber>::do_stats()
+{
+    TIMEZONE("NSVEparticles::do_stats");
+    /// fluid stats go here
+    this->NSVE<rnumber>::do_stats();
+
+
+    /// either one of two conditions suffices to compute statistics:
+    /// 1) current iteration is a multiple of niter_part
+    /// 2) we are within niter_part_fine_duration/2 of a multiple of niter_part_fine_period
+    if (!(this->iteration % this->niter_part == 0 ||
+          ((this->iteration + this->niter_part_fine_duration/2) % this->niter_part_fine_period <=
+           this->niter_part_fine_duration)))
+        return EXIT_SUCCESS;
+
+    // allocate temporary data array
+    std::unique_ptr<double[]> pdata(new double[3*this->ps->getLocalNbParticles()]);
+
+    /// copy position data
+
+    /// sample position
+    std::copy(this->ps->getParticlesState(),
+              this->ps->getParticlesState()+3*this->ps->getLocalNbParticles(),
+              pdata.get());
+    this->particles_sample_writer_mpi->template save_dataset<3>(
+            "tracers0",
+            "position",
+            this->ps->getParticlesState(),
+            &pdata,
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->ps->get_step_idx()-1);
+
+    /// sample velocity
+    std::fill_n(pdata.get(), 3*this->ps->getLocalNbParticles(), 0);
+    if (!(this->iteration % this->niter_stat == 0))
+    {
+        // we need to compute velocity field manually, because it didn't happen in NSVE::do_stats()
+        this->fs->compute_velocity(this->fs->cvorticity);
+        *this->tmp_vec_field = this->fs->cvelocity->get_cdata();
+        this->tmp_vec_field->ift();
+    }
+    this->ps->sample_compute_field(*this->tmp_vec_field, pdata.get());
+    this->particles_sample_writer_mpi->template save_dataset<3>(
+            "tracers0",
+            "velocity",
+            this->ps->getParticlesState(),
+            &pdata,
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->ps->get_step_idx()-1);
+
+    /// compute acceleration and sample it
+    this->fs->compute_Lagrangian_acceleration(this->tmp_vec_field, this->pressure);
+    this->tmp_vec_field->ift();
+    std::fill_n(pdata.get(), 3*this->ps->getLocalNbParticles(), 0);
+    this->ps->sample_compute_field(*this->tmp_vec_field, pdata.get());
+    this->particles_sample_writer_mpi->template save_dataset<3>(
+            "tracers0",
+            "acceleration",
+            this->ps->getParticlesState(),
+            &pdata,
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->ps->get_step_idx()-1);
+
+    // deallocate temporary data array
+    delete[] pdata.release();
+
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int NSVEparticles<rnumber>::read_parameters(void)
+{
+    TIMEZONE("NSVEparticles::read_parameters");
+    this->NSVE<rnumber>::read_parameters();
+    hid_t parameter_file = H5Fopen((this->simname + ".h5").c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
+    this->niter_part = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_part");
+    this->niter_part_fine_period = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_part_fine_period");
+    this->niter_part_fine_duration = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_part_fine_duration");
+    this->nparticles = hdf5_tools::read_value<int>(parameter_file, "parameters/nparticles");
+    this->tracers0_integration_steps = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_integration_steps");
+    this->tracers0_neighbours = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_neighbours");
+    this->tracers0_smoothness = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_smoothness");
+    H5Fclose(parameter_file);
+    return EXIT_SUCCESS;
+}
+
+template class NSVEparticles<float>;
+template class NSVEparticles<double>;
+
diff --git a/bfps/cpp/full_code/NSVEparticles.hpp b/cpp/full_code/NSVEparticles.hpp
similarity index 89%
rename from bfps/cpp/full_code/NSVEparticles.hpp
rename to cpp/full_code/NSVEparticles.hpp
index ccafe6eeb09d27a6b211cfd75ecfba4fc5abe92b..8b70ead9b084aa4f7693c243b2f04e5b06c2d572 100644
--- a/bfps/cpp/full_code/NSVEparticles.hpp
+++ b/cpp/full_code/NSVEparticles.hpp
@@ -35,6 +35,7 @@
 #include "full_code/NSVE.hpp"
 #include "particles/particles_system_builder.hpp"
 #include "particles/particles_output_hdf5.hpp"
+#include "particles/particles_sampling.hpp"
 
 /** \brief Navier-Stokes solver that includes simple Lagrangian tracers.
  *
@@ -50,6 +51,8 @@ class NSVEparticles: public NSVE<rnumber>
 
         /* parameters that are read in read_parameters */
         int niter_part;
+        int niter_part_fine_period;
+        int niter_part_fine_duration;
         int nparticles;
         int tracers0_integration_steps;
         int tracers0_neighbours;
@@ -57,7 +60,10 @@ class NSVEparticles: public NSVE<rnumber>
 
         /* other stuff */
         std::unique_ptr<abstract_particles_system<long long int, double>> ps;
-        particles_output_hdf5<long long int, double,3,3> *particles_output_writer_mpi;
+        field<rnumber, FFTW, ONE> *pressure;
+
+        particles_output_hdf5<long long int, double,3> *particles_output_writer_mpi;
+        particles_output_sampling_hdf5<long long int, double, 3> *particles_sample_writer_mpi;
 
 
         NSVEparticles(
diff --git a/cpp/full_code/NSVEparticles_no_output.hpp b/cpp/full_code/NSVEparticles_no_output.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..4e6de379b06c593e7b9cfbacb50a81c7bdcefcfd
--- /dev/null
+++ b/cpp/full_code/NSVEparticles_no_output.hpp
@@ -0,0 +1,50 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#ifndef NSVEPARTICLES_NO_OUTPUT_HPP
+#define NSVEPARTICLES_NO_OUTPUT_HPP
+
+#include "full_code/NSVEparticles.hpp"
+
+template <typename rnumber>
+class NSVEparticles_no_output: public NSVEparticles<rnumber>
+{
+    public:
+    NSVEparticles_no_output(
+            const MPI_Comm COMMUNICATOR,
+            const std::string &simulation_name):
+        NSVEparticles<rnumber>(
+                COMMUNICATOR,
+                simulation_name){}
+    ~NSVEparticles_no_output(){}
+    int write_checkpoint(void)
+    {
+        TIMEZONE("NSVEparticles_no_output::write_checkpoint");
+        return EXIT_SUCCESS;
+    }
+};
+
+#endif//NSVEPARTICLES_NO_OUTPUT_HPP
+
diff --git a/cpp/full_code/code_base.cpp b/cpp/full_code/code_base.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a6487c726de44b018392128f955ccebf7e7100a1
--- /dev/null
+++ b/cpp/full_code/code_base.cpp
@@ -0,0 +1,82 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2017 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+
+#define NDEBUG
+
+#include "code_base.hpp"
+#include "scope_timer.hpp"
+
+
+code_base::code_base(
+        const MPI_Comm COMMUNICATOR,
+        const std::string &simulation_name):
+    comm(COMMUNICATOR),
+    simname(simulation_name)
+{
+    TIMEZONE("code_base::code_base");
+    MPI_Comm_rank(this->comm, &this->myrank);
+    MPI_Comm_size(this->comm, &this->nprocs);
+    this->stop_code_now = false;
+}
+
+int code_base::check_stopping_condition(void)
+{
+    TIMEZONE("code_base::check_stopping_condition");
+    if (myrank == 0)
+    {
+        std::string fname = (
+                std::string("stop_") +
+                std::string(this->simname));
+        {
+            struct stat file_buffer;
+            this->stop_code_now = (
+                    stat(fname.c_str(), &file_buffer) == 0);
+        }
+    }
+    MPI_Bcast(
+            &this->stop_code_now,
+            1,
+            MPI_C_BOOL,
+            0,
+            MPI_COMM_WORLD);
+    return EXIT_SUCCESS;
+}
+
+int code_base::read_parameters(void)
+{
+    TIMEZONE("code_base::read_parameters");
+    hid_t parameter_file = H5Fopen((this->simname + ".h5").c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
+    this->dkx = hdf5_tools::read_value<double>(parameter_file, "parameters/dkx");
+    this->dky = hdf5_tools::read_value<double>(parameter_file, "parameters/dky");
+    this->dkz = hdf5_tools::read_value<double>(parameter_file, "parameters/dkz");
+    this->nx = hdf5_tools::read_value<int>(parameter_file, "parameters/nx");
+    this->ny = hdf5_tools::read_value<int>(parameter_file, "parameters/ny");
+    this->nz = hdf5_tools::read_value<int>(parameter_file, "parameters/nz");
+    this->dealias_type = hdf5_tools::read_value<int>(parameter_file, "parameters/dealias_type");
+    H5Fclose(parameter_file);
+    return EXIT_SUCCESS;
+}
+
diff --git a/bfps/cpp/full_code/code_base.hpp b/cpp/full_code/code_base.hpp
similarity index 99%
rename from bfps/cpp/full_code/code_base.hpp
rename to cpp/full_code/code_base.hpp
index cf0521e2b7383edf925e1129d4fa4a931a55efe4..5ec4260dbfbaaa8ea4e123d8a38b680c0df121eb 100644
--- a/bfps/cpp/full_code/code_base.hpp
+++ b/cpp/full_code/code_base.hpp
@@ -108,6 +108,7 @@ class code_base
             return EXIT_SUCCESS;
         }
 
+        virtual int read_parameters(void);
         virtual int initialize(void) = 0;
         virtual int main_loop(void) = 0;
         virtual int finalize(void) = 0;
diff --git a/cpp/full_code/codes_with_no_output.hpp b/cpp/full_code/codes_with_no_output.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..175bed22d2f6a7b9fa2fc469d6a6bb6f03c9a3a0
--- /dev/null
+++ b/cpp/full_code/codes_with_no_output.hpp
@@ -0,0 +1,34 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#ifndef CODES_WITH_NO_OUTPUT_HPP
+#define CODES_WITH_NO_OUTPUT_HPP
+
+#include "full_code/NSVE_no_output.hpp"
+#include "full_code/NSVEparticles_no_output.hpp"
+
+
+#endif//CODES_WITH_NO_OUTPUT_HPP
+
diff --git a/bfps/cpp/full_code/direct_numerical_simulation.cpp b/cpp/full_code/direct_numerical_simulation.cpp
similarity index 53%
rename from bfps/cpp/full_code/direct_numerical_simulation.cpp
rename to cpp/full_code/direct_numerical_simulation.cpp
index edc2f99497a21368c63348167190dc6c64b44712..5329e7034e082b32cbdad7b4aae3d81665156215 100644
--- a/bfps/cpp/full_code/direct_numerical_simulation.cpp
+++ b/cpp/full_code/direct_numerical_simulation.cpp
@@ -1,3 +1,30 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#define NDEBUG
+
 #include <cstdlib>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -8,6 +35,7 @@
 
 int direct_numerical_simulation::grow_file_datasets()
 {
+    TIMEZONE("direct_numerical_simulation::grow_file_datasets");
     return hdf5_tools::grow_file_datasets(
             this->stat_file,
             "statistics",
@@ -16,6 +44,7 @@ int direct_numerical_simulation::grow_file_datasets()
 
 int direct_numerical_simulation::read_iteration(void)
 {
+    TIMEZONE("direct_numerical_simulation::read_iteration");
     /* read iteration */
     hid_t dset;
     hid_t iteration_file = H5Fopen(
@@ -56,6 +85,7 @@ int direct_numerical_simulation::read_iteration(void)
 
 int direct_numerical_simulation::write_iteration(void)
 {
+    TIMEZONE("direct_numerical_simulation::write_iteration");
     if (this->myrank == 0)
     {
         hid_t dset = H5Dopen(
@@ -88,6 +118,7 @@ int direct_numerical_simulation::write_iteration(void)
 
 int direct_numerical_simulation::main_loop(void)
 {
+    TIMEZONE("direct_numerical_simulation::main_loop");
     this->start_simple_timer();
     int max_iter = (this->iteration + this->niter_todo -
                     (this->iteration % this->niter_todo));
@@ -117,3 +148,15 @@ int direct_numerical_simulation::main_loop(void)
     return EXIT_SUCCESS;
 }
 
+int direct_numerical_simulation::read_parameters(void)
+{
+    TIMEZONE("direct_numerical_simulation::read_parameters");
+    this->code_base::read_parameters();
+    hid_t parameter_file = H5Fopen((this->simname + ".h5").c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
+    this->checkpoints_per_file = hdf5_tools::read_value<int>(parameter_file, "parameters/checkpoints_per_file");
+    this->niter_out = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_out");
+    this->niter_stat = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_stat");
+    this->niter_todo = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_todo");
+    H5Fclose(parameter_file);
+    return EXIT_SUCCESS;
+}
diff --git a/bfps/cpp/full_code/direct_numerical_simulation.hpp b/cpp/full_code/direct_numerical_simulation.hpp
similarity index 98%
rename from bfps/cpp/full_code/direct_numerical_simulation.hpp
rename to cpp/full_code/direct_numerical_simulation.hpp
index 8050bb045b29acf29d655273f7dff310dd10d0fa..15ab698a1128fd836b74b100b9a79d5c6d67d12f 100644
--- a/bfps/cpp/full_code/direct_numerical_simulation.hpp
+++ b/cpp/full_code/direct_numerical_simulation.hpp
@@ -51,6 +51,7 @@ class direct_numerical_simulation: public code_base
                     simulation_name){}
         virtual ~direct_numerical_simulation(){}
 
+        virtual int read_parameters(void);
         virtual int write_checkpoint(void) = 0;
         virtual int initialize(void) = 0;
         virtual int step(void) = 0;
diff --git a/cpp/full_code/field_output_test.cpp b/cpp/full_code/field_output_test.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..649d8dbb5f3a26d30f147ff228c90326cce9ee6d
--- /dev/null
+++ b/cpp/full_code/field_output_test.cpp
@@ -0,0 +1,92 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#include <string>
+#include <cmath>
+#include <random>
+#include "field_output_test.hpp"
+#include "scope_timer.hpp"
+
+
+template <typename rnumber>
+int field_output_test<rnumber>::initialize(void)
+{
+    TIMEZONE("field_output_test::initialize");
+    this->read_parameters();
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int field_output_test<rnumber>::finalize(void)
+{
+    TIMEZONE("field_output_test::finalize");
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int field_output_test<rnumber>::read_parameters()
+{
+    TIMEZONE("field_output_test::read_parameters");
+    this->test::read_parameters();
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int field_output_test<rnumber>::do_work(void)
+{
+    TIMEZONE("field_output_test::do_work");
+    // allocate
+    field<rnumber, FFTW, ONE> *scal_field = new field<rnumber, FFTW, ONE>(
+            this->nx, this->ny, this->nz,
+            this->comm,
+            FFTW_ESTIMATE);
+    std::default_random_engine rgen;
+    std::normal_distribution<rnumber> rdist;
+    rgen.seed(1);
+
+    // fill up scal_field
+    scal_field->real_space_representation = true;
+    scal_field->RLOOP(
+            [&](ptrdiff_t rindex,
+                ptrdiff_t xindex,
+                ptrdiff_t yindex,
+                ptrdiff_t zindex){
+            scal_field->rval(rindex) = rdist(rgen);
+            });
+
+    scal_field->io(
+            this->simname + std::string("_fields.h5"),
+            "scal_field",
+            0,
+            false);
+
+    // deallocate
+    delete scal_field;
+    return EXIT_SUCCESS;
+}
+
+template class field_output_test<float>;
+template class field_output_test<double>;
+
diff --git a/cpp/full_code/field_output_test.hpp b/cpp/full_code/field_output_test.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..3662e4b1ab610831e53be3edd2d1612eb2b45a46
--- /dev/null
+++ b/cpp/full_code/field_output_test.hpp
@@ -0,0 +1,60 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2017 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+
+#ifndef FILTER_OUTPUT_TEST_HPP
+#define FILTER_OUTPUT_TEST_HPP
+
+
+
+#include <cstdlib>
+#include "base.hpp"
+#include "kspace.hpp"
+#include "field.hpp"
+#include "full_code/test.hpp"
+
+/** \brief A class for testing basic field class functionality.
+ */
+
+template <typename rnumber>
+class field_output_test: public test
+{
+    public:
+        field_output_test(
+                const MPI_Comm COMMUNICATOR,
+                const std::string &simulation_name):
+            test(
+                    COMMUNICATOR,
+                    simulation_name){}
+        ~field_output_test(){}
+
+        int initialize(void);
+        int do_work(void);
+        int finalize(void);
+        int read_parameters(void);
+};
+
+#endif//FILTER_OUTPUT_TEST_HPP
+
diff --git a/cpp/full_code/field_single_to_double.cpp b/cpp/full_code/field_single_to_double.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..93a03aed5a494138ba8279792788a7bb19105325
--- /dev/null
+++ b/cpp/full_code/field_single_to_double.cpp
@@ -0,0 +1,119 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#include <string>
+#include <cmath>
+#include "field_single_to_double.hpp"
+#include "scope_timer.hpp"
+
+
+template <typename rnumber>
+int field_single_to_double<rnumber>::initialize(void)
+{
+    TIMEZONE("field_single_to_double::intialize");
+    this->NSVE_field_stats<rnumber>::initialize();
+    DEBUG_MSG("after NSVE_field_stats::initialize\n");
+    this->kk = new kspace<FFTW, SMOOTH>(
+            this->vorticity->clayout, this->dkx, this->dky, this->dkz);
+    this->vec_field_double = new field<double, FFTW, THREE>(
+            this->nx, this->ny, this->nz,
+            this->comm,
+            this->vorticity->fftw_plan_rigor);
+    this->vec_field_double->real_space_representation = false;
+    hid_t parameter_file = H5Fopen(
+            (this->simname + std::string(".h5")).c_str(),
+            H5F_ACC_RDONLY,
+            H5P_DEFAULT);
+    hid_t dset = H5Dopen(parameter_file, "/parameters/niter_out", H5P_DEFAULT);
+    H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->niter_out);
+    H5Dclose(dset);
+    if (H5Lexists(parameter_file, "/parameters/checkpoints_per_file", H5P_DEFAULT))
+    {
+        dset = H5Dopen(parameter_file, "/parameters/checkpoints_per_file", H5P_DEFAULT);
+        H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->checkpoints_per_file);
+        H5Dclose(dset);
+    }
+    else
+        this->checkpoints_per_file = 1;
+    H5Fclose(parameter_file);
+    parameter_file = H5Fopen(
+            (this->simname + std::string("_post.h5")).c_str(),
+            H5F_ACC_RDONLY,
+            H5P_DEFAULT);
+    DEBUG_MSG("before read_vector\n");
+    this->iteration_list = hdf5_tools::read_vector<int>(
+            parameter_file,
+            "/field_single_to_double/parameters/iteration_list");
+    H5Fclose(parameter_file);
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int field_single_to_double<rnumber>::work_on_current_iteration(void)
+{
+    TIMEZONE("field_single_to_double::work_on_current_iteration");
+    this->read_current_cvorticity();
+
+    // using CLOOP as opposed to a global std::copy because CLOOP
+    // is openmp parallelized.
+    this->kk->CLOOP(
+                [&](ptrdiff_t cindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex){
+        {
+            std::copy(
+                    (rnumber*)(this->vorticity->get_cdata() + cindex*3),
+                    (rnumber*)(this->vorticity->get_cdata() + cindex*3) + 6,
+                    (double*)(this->vec_field_double->get_cdata() + cindex*3));
+        }
+    }
+    );
+
+    std::string fname = (
+            this->simname +
+            std::string("_checkpoint_double_") +
+            std::to_string(this->iteration / (this->niter_out*this->checkpoints_per_file)) +
+            std::string(".h5"));
+    this->vec_field_double->io(
+            fname,
+            "vorticity",
+            this->iteration,
+            false);
+
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int field_single_to_double<rnumber>::finalize(void)
+{
+    TIMEZONE("field_single_to_double::finalize");
+    delete this->vec_field_double;
+    delete this->kk;
+    return EXIT_SUCCESS;
+}
+
+template class field_single_to_double<float>;
+
diff --git a/cpp/full_code/field_single_to_double.hpp b/cpp/full_code/field_single_to_double.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..0a7550e40ccfbf40958b74b7a2cd63f54b2bd84a
--- /dev/null
+++ b/cpp/full_code/field_single_to_double.hpp
@@ -0,0 +1,63 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2017 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+
+#ifndef FIELD_SINGLE_TO_DOUBLE_HPP
+#define FIELD_SINGLE_TO_DOUBLE_HPP
+
+#include <cstdlib>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <vector>
+#include "base.hpp"
+#include "field.hpp"
+#include "field_binary_IO.hpp"
+#include "full_code/NSVE_field_stats.hpp"
+
+template <typename rnumber>
+class field_single_to_double: public NSVE_field_stats<rnumber>
+{
+    public:
+        int checkpoints_per_file;
+        int niter_out;
+        kspace<FFTW, SMOOTH> *kk;
+
+        field<double, FFTW, THREE> *vec_field_double;
+
+        field_single_to_double(
+                const MPI_Comm COMMUNICATOR,
+                const std::string &simulation_name):
+            NSVE_field_stats<rnumber>(
+                    COMMUNICATOR,
+                    simulation_name){}
+        virtual ~field_single_to_double(){}
+
+        int initialize(void);
+        int work_on_current_iteration(void);
+        int finalize(void);
+};
+
+#endif//FIELD_SINGLE_TO_DOUBLE_HPP
+
diff --git a/cpp/full_code/field_test.cpp b/cpp/full_code/field_test.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..aa055a6e162ae15fc3652d9d0533e1eb07d5f528
--- /dev/null
+++ b/cpp/full_code/field_test.cpp
@@ -0,0 +1,145 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#include <string>
+#include <cmath>
+#include <random>
+#include "field_test.hpp"
+#include "scope_timer.hpp"
+
+
+template <typename rnumber>
+int field_test<rnumber>::initialize(void)
+{
+    TIMEZONE("field_test::initialize");
+    this->read_parameters();
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int field_test<rnumber>::finalize(void)
+{
+    TIMEZONE("field_test::finalize");
+    this->read_parameters();
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int field_test<rnumber>::read_parameters()
+{
+    TIMEZONE("field_test::read_parameters");
+    this->test::read_parameters();
+    // in case any parameters are needed, this is where they should be read
+    hid_t parameter_file = H5Fopen(
+            (this->simname + std::string(".h5")).c_str(),
+            H5F_ACC_RDONLY,
+            H5P_DEFAULT);
+    this->filter_length = hdf5_tools::read_value<double>(parameter_file, "/parameters/filter_length");
+    H5Fclose(parameter_file);
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int field_test<rnumber>::do_work(void)
+{
+    TIMEZONE("field_test::do_work");
+    // allocate
+    field<rnumber, FFTW, ONE> *scal_field = new field<rnumber, FFTW, ONE>(
+            this->nx, this->ny, this->nz,
+            this->comm,
+            FFTW_ESTIMATE);
+    field<rnumber, FFTW, ONE> *scal_field_alt = new field<rnumber, FFTW, ONE>(
+            this->nx, this->ny, this->nz,
+            this->comm,
+            FFTW_ESTIMATE);
+    std::default_random_engine rgen;
+    std::normal_distribution<rnumber> rdist;
+    rgen.seed(2);
+    //auto gaussian = std::bind(rgen, rdist);
+    kspace<FFTW,SMOOTH> *kk = new kspace<FFTW, SMOOTH>(
+            scal_field->clayout, this->dkx, this->dky, this->dkz);
+
+    if (this->myrank == 0)
+    {
+        hid_t stat_file = H5Fopen(
+                (this->simname + std::string(".h5")).c_str(),
+                H5F_ACC_RDWR,
+                H5P_DEFAULT);
+        kk->store(stat_file);
+        H5Fclose(stat_file);
+    }
+
+    // fill up scal_field
+    scal_field->real_space_representation = true;
+    scal_field->RLOOP(
+            [&](ptrdiff_t rindex,
+                ptrdiff_t xindex,
+                ptrdiff_t yindex,
+                ptrdiff_t zindex){
+            scal_field->rval(rindex) = rdist(rgen);
+            });
+
+    *scal_field_alt = scal_field->get_rdata();
+    double L2r = scal_field->L2norm(kk);
+    scal_field->dft();
+    double L2c = scal_field->L2norm(kk);
+    scal_field->ift();
+    scal_field->normalize();
+    DEBUG_MSG("L2r = %g, L2c = %g\n",
+            L2r, L2c / scal_field->npoints);
+
+    double max_error = 0;
+    scal_field->RLOOP(
+            [&](ptrdiff_t rindex,
+                ptrdiff_t xindex,
+                ptrdiff_t yindex,
+                ptrdiff_t zindex){
+            double tval = fabs(scal_field->rval(rindex) - scal_field_alt->rval(rindex));
+            if (max_error < tval)
+                max_error = tval;
+            });
+
+    DEBUG_MSG("maximum error is %g\n", max_error);
+
+    scal_field->dft();
+    kk->template dealias<rnumber, ONE>(scal_field->get_cdata());
+    scal_field->symmetrize();
+    scal_field->normalize();
+    L2c = scal_field->L2norm(kk);
+    scal_field->ift();
+    L2r = scal_field->L2norm(kk);
+    DEBUG_MSG("L2r = %g, L2c = %g\n",
+            L2r, L2c);
+
+    // deallocate
+    delete kk;
+    delete scal_field;
+    delete scal_field_alt;
+    return EXIT_SUCCESS;
+}
+
+template class field_test<float>;
+template class field_test<double>;
+
diff --git a/bfps/cpp/interpolator.hpp b/cpp/full_code/field_test.hpp
similarity index 51%
rename from bfps/cpp/interpolator.hpp
rename to cpp/full_code/field_test.hpp
index 7e56ebe159fd24ed7cf623f0a869e1d262d4aadb..5339feb80ae690170f52935e97cd700e958a48a4 100644
--- a/bfps/cpp/interpolator.hpp
+++ b/cpp/full_code/field_test.hpp
@@ -1,6 +1,6 @@
 /**********************************************************************
 *                                                                     *
-*  Copyright 2015 Max Planck Institute                                *
+*  Copyright 2017 Max Planck Institute                                *
 *                 for Dynamics and Self-Organization                  *
 *                                                                     *
 *  This file is part of bfps.                                         *
@@ -24,56 +24,40 @@
 
 
 
-#include <cmath>
-#include "field_descriptor.hpp"
-#include "fftw_tools.hpp"
-#include "fluid_solver_base.hpp"
-#include "interpolator_base.hpp"
+#ifndef FILTER_TEST_HPP
+#define FILTER_TEST_HPP
 
-#ifndef INTERPOLATOR
 
-#define INTERPOLATOR
 
-template <class rnumber, int interp_neighbours>
-class interpolator:public interpolator_base<rnumber, interp_neighbours>
-{
-    private:
-        /* pointer to buffered field */
-        rnumber *field;
-
-    public:
-        using interpolator_base<rnumber, interp_neighbours>::operator();
-        ptrdiff_t buffer_size;
-
-        /* descriptor for buffered field */
-        field_descriptor<rnumber> *buffered_descriptor;
+#include <cstdlib>
+#include "base.hpp"
+#include "kspace.hpp"
+#include "field.hpp"
+#include "full_code/test.hpp"
 
-        interpolator(
-                fluid_solver_base<rnumber> *FSOLVER,
-                base_polynomial_values BETA_POLYS,
-                ...);
-        ~interpolator();
+/** \brief A class for testing basic field class functionality.
+ */
 
-        int read_rFFTW(const void *src);
-
-        inline int get_rank(double z)
-        {
-            return this->descriptor->rank[MOD(int(floor(z/this->dz)), this->descriptor->sizes[0])];
-        }
-
-        /* interpolate field at an array of locations */
-        void sample(
-                const int nparticles,
-                const int pdimension,
-                const double *__restrict__ x,
-                double *__restrict__ y,
-                const int *deriv = NULL);
-        void operator()(
-                const int *__restrict__ xg,
-                const double *__restrict__ xx,
-                double *__restrict__ dest,
-                const int *deriv = NULL);
+template <typename rnumber>
+class field_test: public test
+{
+    public:
+        double filter_length;
+        // kspace, in case we want to compute spectra or smth
+
+        field_test(
+                const MPI_Comm COMMUNICATOR,
+                const std::string &simulation_name):
+            test(
+                    COMMUNICATOR,
+                    simulation_name){}
+        ~field_test(){}
+
+        int initialize(void);
+        int do_work(void);
+        int finalize(void);
+        int read_parameters(void);
 };
 
-#endif//INTERPOLATOR
+#endif//FILTER_TEST_HPP
 
diff --git a/bfps/cpp/full_code/filter_test.cpp b/cpp/full_code/filter_test.cpp
similarity index 66%
rename from bfps/cpp/full_code/filter_test.cpp
rename to cpp/full_code/filter_test.cpp
index aeedfbe74806adcff53a97d6c227b8fdcd30195f..5df45a7941d5ce4989cee72cbb84731a7cec487f 100644
--- a/bfps/cpp/full_code/filter_test.cpp
+++ b/cpp/full_code/filter_test.cpp
@@ -1,3 +1,28 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
 #include <string>
 #include <cmath>
 #include "filter_test.hpp"
@@ -7,11 +32,12 @@
 template <typename rnumber>
 int filter_test<rnumber>::initialize(void)
 {
+    TIMEZONE("filter_test::initialize");
     this->read_parameters();
     this->scal_field = new field<rnumber, FFTW, ONE>(
             nx, ny, nz,
             this->comm,
-            DEFAULT_FFTW_FLAG);
+            FFTW_ESTIMATE);
     this->kk = new kspace<FFTW, SMOOTH>(
             this->scal_field->clayout, this->dkx, this->dky, this->dkz);
 
@@ -30,6 +56,7 @@ int filter_test<rnumber>::initialize(void)
 template <typename rnumber>
 int filter_test<rnumber>::finalize(void)
 {
+    TIMEZONE("filter_test::finalize");
     delete this->scal_field;
     delete this->kk;
     return EXIT_SUCCESS;
@@ -38,16 +65,13 @@ int filter_test<rnumber>::finalize(void)
 template <typename rnumber>
 int filter_test<rnumber>::read_parameters()
 {
+    TIMEZONE("filter_test::read_parameters");
     this->test::read_parameters();
-    hid_t parameter_file;
-    hid_t dset, memtype, space;
-    parameter_file = H5Fopen(
+    hid_t parameter_file = H5Fopen(
             (this->simname + std::string(".h5")).c_str(),
             H5F_ACC_RDONLY,
             H5P_DEFAULT);
-    dset = H5Dopen(parameter_file, "/parameters/filter_length", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->filter_length);
-    H5Dclose(dset);
+    this->filter_length = hdf5_tools::read_value<double>(parameter_file, "/parameters/filter_length");
     H5Fclose(parameter_file);
     return EXIT_SUCCESS;
 }
@@ -56,6 +80,7 @@ template <typename rnumber>
 int filter_test<rnumber>::reset_field(
         int dimension)
 {
+    TIMEZONE("filter_test::reset_field");
     this->scal_field->real_space_representation = true;
     *this->scal_field = 0.0;
     if (this->scal_field->rlayout->starts[0] == 0)
@@ -95,6 +120,7 @@ int filter_test<rnumber>::reset_field(
 template <typename rnumber>
 int filter_test<rnumber>::do_work(void)
 {
+    TIMEZONE("filter_test::do_work");
     std::string filename = this->simname + std::string("_fields.h5");
     for (int dimension = 0; dimension < 3; dimension++)
     {
diff --git a/bfps/cpp/full_code/filter_test.hpp b/cpp/full_code/filter_test.hpp
similarity index 100%
rename from bfps/cpp/full_code/filter_test.hpp
rename to cpp/full_code/filter_test.hpp
diff --git a/bfps/cpp/full_code/get_rfields.cpp b/cpp/full_code/get_rfields.cpp
similarity index 53%
rename from bfps/cpp/full_code/get_rfields.cpp
rename to cpp/full_code/get_rfields.cpp
index 0df8b564a61fba11118ef3f551b0a2db6cbfec1d..45f6b5dce95b5d4fbb9edc2ce353fdde51f0fba8 100644
--- a/bfps/cpp/full_code/get_rfields.cpp
+++ b/cpp/full_code/get_rfields.cpp
@@ -1,3 +1,28 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
 #include <string>
 #include <cmath>
 #include "get_rfields.hpp"
@@ -7,27 +32,27 @@
 template <typename rnumber>
 int get_rfields<rnumber>::initialize(void)
 {
+    TIMEZONE("get_rfields::initialize");
     this->NSVE_field_stats<rnumber>::initialize();
+    DEBUG_MSG("after NSVE_field_stats::initialize\n");
     this->kk = new kspace<FFTW, SMOOTH>(
             this->vorticity->clayout, this->dkx, this->dky, this->dkz);
     hid_t parameter_file = H5Fopen(
             (this->simname + std::string(".h5")).c_str(),
             H5F_ACC_RDONLY,
             H5P_DEFAULT);
-    hid_t dset = H5Dopen(parameter_file, "/parameters/niter_out", H5P_DEFAULT);
-    H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->niter_out);
-    H5Dclose(dset);
-    if (H5Lexists(parameter_file, "/parameters/checkpoints_per_file", H5P_DEFAULT))
-    {
-        dset = H5Dopen(parameter_file, "/parameters/checkpoints_per_file", H5P_DEFAULT);
-        H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &this->checkpoints_per_file);
-        H5Dclose(dset);
-    }
-    else
+    this->niter_out = hdf5_tools::read_value<int>(parameter_file, "/parameters/niter_out");
+    this->checkpoints_per_file = hdf5_tools::read_value<int>(parameter_file, "/parameters/checkpoints_per_file");
+    if (this->checkpoints_per_file == INT_MAX) // value returned if dataset does not exist
         this->checkpoints_per_file = 1;
+    H5Fclose(parameter_file);
+    parameter_file = H5Fopen(
+            (this->simname + std::string("_post.h5")).c_str(),
+            H5F_ACC_RDONLY,
+            H5P_DEFAULT);
     this->iteration_list = hdf5_tools::read_vector<int>(
             parameter_file,
-            "/get_rfields/iteration_list");
+            "/get_rfields/parameters/iteration_list");
     H5Fclose(parameter_file);
     return EXIT_SUCCESS;
 }
@@ -35,7 +60,7 @@ int get_rfields<rnumber>::initialize(void)
 template <typename rnumber>
 int get_rfields<rnumber>::work_on_current_iteration(void)
 {
-    DEBUG_MSG("entered get_rfields::work_on_current_iteration\n");
+    TIMEZONE("get_rfields::work_on_current_iteration");
     this->read_current_cvorticity();
     field<rnumber, FFTW, THREE> *vel = new field<rnumber, FFTW, THREE>(
             this->nx, this->ny, this->nz,
@@ -77,12 +102,20 @@ int get_rfields<rnumber>::work_on_current_iteration(void)
             false);
 
     delete vel;
+
+    this->vorticity->ift();
+    this->vorticity->io(
+            fname,
+            "vorticity",
+            this->iteration,
+            false);
     return EXIT_SUCCESS;
 }
 
 template <typename rnumber>
 int get_rfields<rnumber>::finalize(void)
 {
+    TIMEZONE("get_rfields::finalize");
     delete this->kk;
     this->NSVE_field_stats<rnumber>::finalize();
     return EXIT_SUCCESS;
diff --git a/bfps/cpp/full_code/get_rfields.hpp b/cpp/full_code/get_rfields.hpp
similarity index 100%
rename from bfps/cpp/full_code/get_rfields.hpp
rename to cpp/full_code/get_rfields.hpp
diff --git a/bfps/cpp/full_code/joint_acc_vel_stats.cpp b/cpp/full_code/joint_acc_vel_stats.cpp
similarity index 73%
rename from bfps/cpp/full_code/joint_acc_vel_stats.cpp
rename to cpp/full_code/joint_acc_vel_stats.cpp
index e4f4d5d40772292f44c7e776dcd4d1b82c4ce222..be2cd9fe5a38dfb28df12d2c221b37c4d152212e 100644
--- a/bfps/cpp/full_code/joint_acc_vel_stats.cpp
+++ b/cpp/full_code/joint_acc_vel_stats.cpp
@@ -1,3 +1,28 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
 #include <string>
 #include <cmath>
 #include "joint_acc_vel_stats.hpp"
@@ -7,6 +32,7 @@
 template <typename rnumber>
 int joint_acc_vel_stats<rnumber>::initialize(void)
 {
+    TIMEZONE("joint_acc_vel_stats::initialize");
     this->NSVE_field_stats<rnumber>::initialize();
     this->kk = new kspace<FFTW, SMOOTH>(
             this->vorticity->clayout, this->dkx, this->dky, this->dkz);
@@ -85,7 +111,7 @@ int joint_acc_vel_stats<rnumber>::initialize(void)
 template <typename rnumber>
 int joint_acc_vel_stats<rnumber>::work_on_current_iteration(void)
 {
-    DEBUG_MSG("entered joint_acc_vel_stats::work_on_current_iteration\n");
+    TIMEZONE("joint_acc_vel_stats::work_on_current_iteration");
     /// read current vorticity, place it in this->ve->cvorticity
     this->read_current_cvorticity();
     *this->ve->cvorticity = this->vorticity->get_cdata();
@@ -109,7 +135,7 @@ int joint_acc_vel_stats<rnumber>::work_on_current_iteration(void)
     vel = new field<rnumber, FFTW, THREE>(
             this->nx, this->ny, this->nz,
             this->comm,
-            DEFAULT_FFTW_FLAG);
+            this->vorticity->fftw_plan_rigor);
     invert_curl(kk, this->ve->cvorticity, vel);
     vel->ift();
 
@@ -156,6 +182,7 @@ int joint_acc_vel_stats<rnumber>::work_on_current_iteration(void)
 template <typename rnumber>
 int joint_acc_vel_stats<rnumber>::finalize(void)
 {
+    DEBUG_MSG("entered joint_acc_vel_stats::finalize\n");
     delete this->ve;
     delete this->kk;
     if (this->myrank == 0)
diff --git a/bfps/cpp/full_code/joint_acc_vel_stats.hpp b/cpp/full_code/joint_acc_vel_stats.hpp
similarity index 100%
rename from bfps/cpp/full_code/joint_acc_vel_stats.hpp
rename to cpp/full_code/joint_acc_vel_stats.hpp
diff --git a/bfps/cpp/full_code/main_code.hpp b/cpp/full_code/main_code.hpp
similarity index 100%
rename from bfps/cpp/full_code/main_code.hpp
rename to cpp/full_code/main_code.hpp
diff --git a/cpp/full_code/native_binary_to_hdf5.cpp b/cpp/full_code/native_binary_to_hdf5.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0c2d738493aea060838c4acfed05251066b99bae
--- /dev/null
+++ b/cpp/full_code/native_binary_to_hdf5.cpp
@@ -0,0 +1,99 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#include <string>
+#include <cmath>
+#include "native_binary_to_hdf5.hpp"
+#include "scope_timer.hpp"
+
+
+template <typename rnumber>
+int native_binary_to_hdf5<rnumber>::initialize(void)
+{
+    TIMEZONE("native_binary_to_hdf5::initialize");
+    this->read_parameters();
+    this->vec_field = new field<rnumber, FFTW, THREE>(
+            nx, ny, nz,
+            this->comm,
+            FFTW_ESTIMATE);
+    this->vec_field->real_space_representation = false;
+    this->bin_IO = new field_binary_IO<rnumber, COMPLEX, THREE>(
+            this->vec_field->clayout->sizes,
+            this->vec_field->clayout->subsizes,
+            this->vec_field->clayout->starts,
+            this->vec_field->clayout->comm);
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int native_binary_to_hdf5<rnumber>::work_on_current_iteration(void)
+{
+    TIMEZONE("native_binary_to_hdf5::work_on_current_iteration");
+    char itername[16];
+    sprintf(itername, "i%.5x", this->iteration);
+    std::string native_binary_fname = (
+            this->simname +
+            std::string("_cvorticity_") +
+            std::string(itername));
+    this->bin_IO->read(
+            native_binary_fname,
+            this->vec_field->get_cdata());
+    this->vec_field->io(
+            (native_binary_fname +
+             std::string(".h5")),
+            "vorticity",
+            this->iteration,
+            false);
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int native_binary_to_hdf5<rnumber>::finalize(void)
+{
+    TIMEZONE("native_binary_to_hdf5::finalize");
+    delete this->bin_IO;
+    delete this->vec_field;
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int native_binary_to_hdf5<rnumber>::read_parameters(void)
+{
+    TIMEZONE("native_binary_to_hdf5::read_parameters");
+    this->postprocess::read_parameters();
+    hid_t parameter_file = H5Fopen(
+            (this->simname + std::string(".h5")).c_str(),
+            H5F_ACC_RDONLY,
+            H5P_DEFAULT);
+    this->iteration_list = hdf5_tools::read_vector<int>(
+            parameter_file,
+            "/native_binary_to_hdf5/iteration_list");
+    H5Fclose(parameter_file);
+    return EXIT_SUCCESS;
+}
+
+template class native_binary_to_hdf5<float>;
+template class native_binary_to_hdf5<double>;
+
diff --git a/bfps/cpp/full_code/native_binary_to_hdf5.hpp b/cpp/full_code/native_binary_to_hdf5.hpp
similarity index 100%
rename from bfps/cpp/full_code/native_binary_to_hdf5.hpp
rename to cpp/full_code/native_binary_to_hdf5.hpp
diff --git a/cpp/full_code/postprocess.cpp b/cpp/full_code/postprocess.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e8c7fb279821fff0e3fd82c85b1c490a0b6a68e7
--- /dev/null
+++ b/cpp/full_code/postprocess.cpp
@@ -0,0 +1,78 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#include <cstdlib>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "scope_timer.hpp"
+#include "hdf5_tools.hpp"
+#include "full_code/postprocess.hpp"
+
+
+int postprocess::main_loop(void)
+{
+    TIMEZONE("postprocess::main_loop");
+    this->start_simple_timer();
+    for (unsigned int iteration_counter = 0;
+         iteration_counter < iteration_list.size();
+         iteration_counter++)
+    {
+        this->iteration = iteration_list[iteration_counter];
+    #ifdef USE_TIMINGOUTPUT
+        const std::string loopLabel = ("postprocess::main_loop-" +
+                                       std::to_string(this->iteration));
+        TIMEZONE(loopLabel.c_str());
+    #endif
+        this->work_on_current_iteration();
+        this->print_simple_timer(
+                "iteration " + std::to_string(this->iteration));
+
+        this->check_stopping_condition();
+        if (this->stop_code_now)
+            break;
+    }
+    return EXIT_SUCCESS;
+}
+
+
+int postprocess::read_parameters()
+{
+    TIMEZONE("postprocess::read_parameters");
+    this->code_base::read_parameters();
+    hid_t parameter_file = H5Fopen((this->simname + ".h5").c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
+    this->nu = hdf5_tools::read_value<double>(parameter_file, "parameters/nu");
+    this->dt = hdf5_tools::read_value<double>(parameter_file, "parameters/dt");
+    this->fmode = hdf5_tools::read_value<int>(parameter_file, "parameters/fmode");
+    this->famplitude = hdf5_tools::read_value<double>(parameter_file, "parameters/famplitude");
+    this->friction_coefficient = hdf5_tools::read_value<double>(parameter_file, "parameters/friction_coefficient");
+    this->fk0 = hdf5_tools::read_value<double>(parameter_file, "parameters/fk0");
+    this->fk1 = hdf5_tools::read_value<double>(parameter_file, "parameters/fk1");
+    this->energy = hdf5_tools::read_value<double>(parameter_file, "parameters/energy");
+    std::string tmp = hdf5_tools::read_string(parameter_file, "parameters/forcing_type");
+    snprintf(this->forcing_type, 511, "%s", tmp.c_str());
+    H5Fclose(parameter_file);
+    return EXIT_SUCCESS;
+}
+
diff --git a/bfps/cpp/full_code/postprocess.hpp b/cpp/full_code/postprocess.hpp
similarity index 97%
rename from bfps/cpp/full_code/postprocess.hpp
rename to cpp/full_code/postprocess.hpp
index c80fc3f2dfdc35691d9e69442fa3ad7b6e592891..65e6eadd1fd05615eb69cb7d8ca1754abd1b7e42 100644
--- a/bfps/cpp/full_code/postprocess.hpp
+++ b/cpp/full_code/postprocess.hpp
@@ -43,8 +43,10 @@ class postprocess: public code_base
         /* parameters that are read in read_parameters */
         double dt;
         double famplitude;
+        double friction_coefficient;
         double fk0;
         double fk1;
+        double energy;
         int fmode;
         char forcing_type[512];
         double nu;
diff --git a/cpp/full_code/resize.cpp b/cpp/full_code/resize.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6d372dc462df8fc45729afe961488979f18ef818
--- /dev/null
+++ b/cpp/full_code/resize.cpp
@@ -0,0 +1,101 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#include <string>
+#include <cmath>
+#include "resize.hpp"
+#include "scope_timer.hpp"
+
+
+template <typename rnumber>
+int resize<rnumber>::initialize(void)
+{
+    TIMEZONE("resize::initialize");
+    this->NSVE_field_stats<rnumber>::initialize();
+    DEBUG_MSG("after NSVE_field_stats::initialize\n");
+    hid_t parameter_file = H5Fopen(
+            (this->simname + std::string(".h5")).c_str(),
+            H5F_ACC_RDONLY,
+            H5P_DEFAULT);
+
+    this->niter_out = hdf5_tools::read_value<int>(
+            parameter_file, "/parameters/niter_out");
+    H5Fclose(parameter_file);
+    parameter_file = H5Fopen(
+            (this->simname + std::string("_post.h5")).c_str(),
+            H5F_ACC_RDONLY,
+            H5P_DEFAULT);
+    DEBUG_MSG("before read_vector\n");
+    this->iteration_list = hdf5_tools::read_vector<int>(
+            parameter_file,
+            "/resize/parameters/iteration_list");
+
+    this->new_nx = hdf5_tools::read_value<int>(
+            parameter_file, "/resize/parameters/new_nx");
+    this->new_ny = hdf5_tools::read_value<int>(
+            parameter_file, "/resize/parameters/new_ny");
+    this->new_nz = hdf5_tools::read_value<int>(
+            parameter_file, "/resize/parameters/new_nz");
+    this->new_simname = hdf5_tools::read_string(
+            parameter_file, "/resize/parameters/new_simname");
+    H5Fclose(parameter_file);
+
+    this->new_field = new field<rnumber, FFTW, THREE>(
+            this->new_nx, this->new_ny, this->new_nz,
+            this->comm,
+            this->vorticity->fftw_plan_rigor);
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int resize<rnumber>::work_on_current_iteration(void)
+{
+    TIMEZONE("resize::work_on_current_iteration");
+    this->read_current_cvorticity();
+
+    std::string fname = (
+            this->new_simname +
+            std::string("_fields.h5"));
+    *this->new_field = *this->vorticity;
+    this->new_field->io(
+            fname,
+            "vorticity",
+            this->iteration,
+            false);
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int resize<rnumber>::finalize(void)
+{
+    TIMEZONE("resize::finalize");
+    delete this->new_field;
+    this->NSVE_field_stats<rnumber>::finalize();
+    return EXIT_SUCCESS;
+}
+
+template class resize<float>;
+template class resize<double>;
+
diff --git a/cpp/full_code/resize.hpp b/cpp/full_code/resize.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..de227c886615ad48c8d7872f6533a0ad93b65307
--- /dev/null
+++ b/cpp/full_code/resize.hpp
@@ -0,0 +1,67 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2017 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+
+#ifndef RESIZE_HPP
+#define RESIZE_HPP
+
+#include <cstdlib>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <vector>
+#include "base.hpp"
+#include "field.hpp"
+#include "field_binary_IO.hpp"
+#include "full_code/NSVE_field_stats.hpp"
+
+template <typename rnumber>
+class resize: public NSVE_field_stats<rnumber>
+{
+    public:
+        std::string new_simname;
+
+        int new_nx;
+        int new_ny;
+        int new_nz;
+
+        int niter_out;
+
+        field<rnumber, FFTW, THREE> *new_field;
+
+        resize(
+                const MPI_Comm COMMUNICATOR,
+                const std::string &simulation_name):
+            NSVE_field_stats<rnumber>(
+                    COMMUNICATOR,
+                    simulation_name){}
+        virtual ~resize(){}
+
+        int initialize(void);
+        int work_on_current_iteration(void);
+        int finalize(void);
+};
+
+#endif//RESIZE_HPP
+
diff --git a/cpp/full_code/symmetrize_test.cpp b/cpp/full_code/symmetrize_test.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..111d3a833815b3f9abf539db19b81f2d18d33a99
--- /dev/null
+++ b/cpp/full_code/symmetrize_test.cpp
@@ -0,0 +1,220 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#include <string>
+#include <cmath>
+#include <random>
+#include "symmetrize_test.hpp"
+#include "fftw_tools.hpp"
+#include "scope_timer.hpp"
+
+
+template <typename rnumber>
+int symmetrize_test<rnumber>::initialize(void)
+{
+    TIMEZONE("symmetrize_test::initialize");
+    this->read_parameters();
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int symmetrize_test<rnumber>::finalize(void)
+{
+    TIMEZONE("symmetrize_test::finalize");
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int symmetrize_test<rnumber>::read_parameters()
+{
+    TIMEZONE("symmetrize_test::read_parameters");
+    this->test::read_parameters();
+    hid_t parameter_file = H5Fopen(
+            (this->simname + std::string(".h5")).c_str(),
+            H5F_ACC_RDONLY,
+            H5P_DEFAULT);
+    this->random_seed = hdf5_tools::read_value<int>(
+            parameter_file, "/parameters/random_seed");
+    this->fftw_plan_rigor = hdf5_tools::read_string(parameter_file, "parameters/fftw_plan_rigor");
+    H5Fclose(parameter_file);
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int symmetrize_test<rnumber>::do_work(void)
+{
+    TIMEZONE("symmetrize_test::do_work");
+    // allocate
+    DEBUG_MSG("about to allocate field0\n");
+    field<rnumber, FFTW, THREE> *test_field0 = new field<rnumber, FFTW, THREE>(
+            this->nx, this->ny, this->nz,
+            this->comm,
+            fftw_planner_string_to_flag[this->fftw_plan_rigor]);
+    DEBUG_MSG("finished allocating field0\n");
+    DEBUG_MSG("about to allocate field1\n");
+    field<rnumber, FFTW, THREE> *test_field1 = new field<rnumber, FFTW, THREE>(
+            this->nx, this->ny, this->nz,
+            this->comm,
+            fftw_planner_string_to_flag[this->fftw_plan_rigor]);
+    DEBUG_MSG("finished allocating field1\n");
+    std::default_random_engine rgen;
+    std::normal_distribution<rnumber> rdist;
+    rgen.seed(1);
+    kspace<FFTW,SMOOTH> *kk = new kspace<FFTW, SMOOTH>(
+            test_field0->clayout, this->dkx, this->dky, this->dkz);
+
+    if (this->myrank == 0)
+    {
+        hid_t stat_file = H5Fopen(
+                (this->simname + std::string(".h5")).c_str(),
+                H5F_ACC_RDWR,
+                H5P_DEFAULT);
+        kk->store(stat_file);
+        H5Fclose(stat_file);
+    }
+
+    // fill up test_field0
+    *test_field0 = 0.0;
+    test_field0->real_space_representation = false;
+    kk->CLOOP_K2(
+            [&](ptrdiff_t cindex,
+                ptrdiff_t xindex,
+                ptrdiff_t yindex,
+                ptrdiff_t zindex,
+                double k2){
+            test_field0->cval(cindex, 0, 0) = rdist(rgen);
+            test_field0->cval(cindex, 0, 1) = rdist(rgen);
+            test_field0->cval(cindex, 1, 0) = rdist(rgen);
+            test_field0->cval(cindex, 1, 1) = rdist(rgen);
+            test_field0->cval(cindex, 2, 0) = rdist(rgen);
+            test_field0->cval(cindex, 2, 1) = rdist(rgen);
+            if (k2 > 0)
+            {
+                test_field0->cval(cindex, 0, 0) /= sqrt(k2);
+                test_field0->cval(cindex, 0, 1) /= sqrt(k2);
+                test_field0->cval(cindex, 1, 0) /= sqrt(k2);
+                test_field0->cval(cindex, 1, 1) /= sqrt(k2);
+                test_field0->cval(cindex, 2, 0) /= sqrt(k2);
+                test_field0->cval(cindex, 2, 1) /= sqrt(k2);
+            }
+            else
+            {
+                test_field0->cval(cindex, 0, 0) = 0;
+                test_field0->cval(cindex, 0, 1) = 0;
+                test_field0->cval(cindex, 1, 0) = 0;
+                test_field0->cval(cindex, 1, 1) = 0;
+                test_field0->cval(cindex, 2, 0) = 0;
+                test_field0->cval(cindex, 2, 1) = 0;
+            }
+            });
+    // dealias (?!)
+    kk->template dealias<rnumber, THREE>(test_field0->get_cdata());
+    // make the field divergence free
+    kk->template force_divfree<rnumber>(test_field0->get_cdata());
+    // apply symmetrize to test_field0
+    test_field0->symmetrize();
+
+
+    // make copy in test_field1
+    // this MUST be made after symmetrizing test_field0
+    // (alternatively, we may symmetrize test_field1 as well before the ift-dft cycle
+    test_field1->real_space_representation = false;
+    *test_field1 = test_field0->get_cdata();
+
+    // go back and forth with test_field1, to enforce symmetry
+    test_field1->ift();
+    test_field1->dft();
+    test_field1->normalize();
+
+    // now compare the two fields
+    double max_diff = 0;
+    ptrdiff_t ix, iy, iz;
+    double k_at_max_diff = 0;
+    double a0, a1;
+
+    kk->CLOOP_K2(
+            [&](ptrdiff_t cindex,
+                ptrdiff_t xindex,
+                ptrdiff_t yindex,
+                ptrdiff_t zindex,
+                double k2){
+            double diff_re0 = test_field0->cval(cindex, 0, 0) - test_field1->cval(cindex, 0, 0);
+            double diff_re1 = test_field0->cval(cindex, 1, 0) - test_field1->cval(cindex, 1, 0);
+            double diff_re2 = test_field0->cval(cindex, 2, 0) - test_field1->cval(cindex, 2, 0);
+            double diff_im0 = test_field0->cval(cindex, 0, 1) - test_field1->cval(cindex, 0, 1);
+            double diff_im1 = test_field0->cval(cindex, 1, 1) - test_field1->cval(cindex, 1, 1);
+            double diff_im2 = test_field0->cval(cindex, 2, 1) - test_field1->cval(cindex, 2, 1);
+            double diff = sqrt(diff_re0*diff_re0 + diff_re1*diff_re1 + diff_re2*diff_re2 +
+                               diff_im0*diff_im0 + diff_im1*diff_im1 + diff_im2*diff_im2);
+            double amplitude0 = (test_field0->cval(cindex, 0, 0)*test_field0->cval(cindex, 0, 0) +
+                                 test_field0->cval(cindex, 1, 0)*test_field0->cval(cindex, 1, 0) +
+                                 test_field0->cval(cindex, 2, 0)*test_field0->cval(cindex, 2, 0) +
+                                 test_field0->cval(cindex, 0, 1)*test_field0->cval(cindex, 0, 1) +
+                                 test_field0->cval(cindex, 1, 1)*test_field0->cval(cindex, 1, 1) +
+                                 test_field0->cval(cindex, 2, 1)*test_field0->cval(cindex, 2, 1));
+            double amplitude1 = (test_field1->cval(cindex, 0, 0)*test_field1->cval(cindex, 0, 0) +
+                                 test_field1->cval(cindex, 1, 0)*test_field1->cval(cindex, 1, 0) +
+                                 test_field1->cval(cindex, 2, 0)*test_field1->cval(cindex, 2, 0) +
+                                 test_field1->cval(cindex, 0, 1)*test_field1->cval(cindex, 0, 1) +
+                                 test_field1->cval(cindex, 1, 1)*test_field1->cval(cindex, 1, 1) +
+                                 test_field1->cval(cindex, 2, 1)*test_field1->cval(cindex, 2, 1));
+            double amplitude = sqrt((amplitude0 + amplitude1)/2);
+            if (amplitude > 0)
+            if (diff/amplitude > max_diff)
+            {
+                max_diff = diff / amplitude;
+                ix = xindex;
+                iy = yindex + test_field0->clayout->starts[0];
+                iz = zindex;
+                k_at_max_diff = sqrt(k2);
+                a0 = sqrt(amplitude0);
+                a1 = sqrt(amplitude1);
+            }
+            });
+    DEBUG_MSG("found maximum relative difference %g at ix = %ld, iy = %ld, iz = %ld, wavenumber = %g, amplitudes %g %g\n",
+            max_diff, ix, iy, iz, k_at_max_diff, a0, a1);
+
+    test_field1->io(
+            this->simname + "_fields.h5",
+            "field1",
+            0,
+            false);
+    test_field1->ift();
+    test_field1->io(
+            this->simname + "_fields.h5",
+            "field1",
+            0,
+            false);
+
+    // deallocate
+    delete kk;
+    delete test_field1;
+    delete test_field0;
+    return EXIT_SUCCESS;
+}
+
+template class symmetrize_test<float>;
+template class symmetrize_test<double>;
+
diff --git a/cpp/full_code/symmetrize_test.hpp b/cpp/full_code/symmetrize_test.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..628aee6f5ba3fac23cfbe551418a6ff1213d7d5c
--- /dev/null
+++ b/cpp/full_code/symmetrize_test.hpp
@@ -0,0 +1,63 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2018 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+
+#ifndef SYMMETRIZE_TEST_HPP
+#define SYMMETRIZE_TEST_HPP
+
+
+
+#include <cstdlib>
+#include "base.hpp"
+#include "kspace.hpp"
+#include "field.hpp"
+#include "full_code/test.hpp"
+
+/** \brief A class for testing basic field class functionality.
+ */
+
+template <typename rnumber>
+class symmetrize_test: public test
+{
+    public:
+        std::string fftw_plan_rigor;
+        int random_seed;
+
+        symmetrize_test(
+                const MPI_Comm COMMUNICATOR,
+                const std::string &simulation_name):
+            test(
+                    COMMUNICATOR,
+                    simulation_name){}
+        ~symmetrize_test(){}
+
+        int initialize(void);
+        int do_work(void);
+        int finalize(void);
+        int read_parameters(void);
+};
+
+#endif//SYMMETRIZE_TEST_HPP
+
diff --git a/cpp/full_code/test.cpp b/cpp/full_code/test.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5fd265ec7ba661994f2e0664013770db0834fb7d
--- /dev/null
+++ b/cpp/full_code/test.cpp
@@ -0,0 +1,43 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#include <cstdlib>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "scope_timer.hpp"
+#include "hdf5_tools.hpp"
+#include "full_code/test.hpp"
+
+
+int test::main_loop(void)
+{
+    TIMEZONE("test::main_loop");
+    this->start_simple_timer();
+    this->do_work();
+    this->print_simple_timer(
+            "do_work required ");
+    return EXIT_SUCCESS;
+}
+
diff --git a/bfps/cpp/full_code/test.hpp b/cpp/full_code/test.hpp
similarity index 98%
rename from bfps/cpp/full_code/test.hpp
rename to cpp/full_code/test.hpp
index 134a01512b3fd836a8ac4d40068b3954752c4844..96ddaf8104f1dd3d050b4acf16a68dbcd539b290 100644
--- a/bfps/cpp/full_code/test.hpp
+++ b/cpp/full_code/test.hpp
@@ -56,7 +56,6 @@ class test: public code_base
         virtual int finalize(void) = 0;
 
         int main_loop(void);
-        virtual int read_parameters(void);
 };
 
 #endif//TEST_HPP
diff --git a/cpp/full_code/test_interpolation.cpp b/cpp/full_code/test_interpolation.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e5722fc0cdb3c851695c0c3eeb34a5da97a405eb
--- /dev/null
+++ b/cpp/full_code/test_interpolation.cpp
@@ -0,0 +1,234 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#include "full_code/test_interpolation.hpp"
+
+
+template <typename rnumber>
+int test_interpolation<rnumber>::read_parameters(void)
+{
+    TIMEZONE("test_interpolation::read_parameters");
+    this->test::read_parameters();
+    hid_t parameter_file = H5Fopen(
+            (this->simname + std::string(".h5")).c_str(),
+            H5F_ACC_RDONLY,
+            H5P_DEFAULT);
+    this->nparticles = hdf5_tools::read_value<int>(
+            parameter_file, "/parameters/nparticles");
+    this->tracers0_integration_steps = hdf5_tools::read_value<int>(
+            parameter_file, "/parameters/tracers0_integration_steps");
+    this->tracers0_neighbours = hdf5_tools::read_value<int>(
+            parameter_file, "/parameters/tracers0_neighbours");
+    this->tracers0_smoothness = hdf5_tools::read_value<int>(
+            parameter_file, "/parameters/tracers0_smoothness");
+    H5Fclose(parameter_file);
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int test_interpolation<rnumber>::initialize(void)
+{
+    TIMEZONE("test_interpolation::initialize");
+    this->read_parameters();
+    this->vorticity = new field<rnumber, FFTW, THREE>(
+            this->nx, this->ny, this->nz,
+            this->comm,
+            FFTW_ESTIMATE);
+    this->vorticity->real_space_representation = false;
+
+    this->velocity = new field<rnumber, FFTW, THREE>(
+            this->nx, this->ny, this->nz,
+            this->comm,
+            FFTW_ESTIMATE);
+
+    this->nabla_u = new field<rnumber, FFTW, THREExTHREE>(
+            this->nx, this->ny, this->nz,
+            this->comm,
+            FFTW_ESTIMATE);
+
+    this->kk = new kspace<FFTW, SMOOTH>(
+            this->vorticity->clayout, this->dkx, this->dky, this->dkz);
+
+    if (this->myrank == 0)
+    {
+        hid_t stat_file = H5Fopen(
+                (this->simname + std::string(".h5")).c_str(),
+                H5F_ACC_RDWR,
+                H5P_DEFAULT);
+        this->kk->store(stat_file);
+        H5Fclose(stat_file);
+    }
+
+    this->ps = particles_system_builder(
+                this->velocity,                // (field object)
+                this->kk,                      // (kspace object, contains dkx, dky, dkz)
+                this->tracers0_integration_steps, // to check coherency between parameters and hdf input file (nb rhs)
+                (long long int)nparticles,      // to check coherency between parameters and hdf input file
+                this->simname + "_input.h5",    // particles input filename
+                std::string("/tracers0/state/0"), // dataset name for initial input
+                std::string("/tracers0/rhs/0")  , // dataset name for initial input
+                this->tracers0_neighbours,        // parameter (interpolation no neighbours)
+                this->tracers0_smoothness,        // parameter
+                this->comm,
+                1);
+    this->particles_output_writer_mpi = new particles_output_hdf5<
+        long long int, double, 3>(
+                MPI_COMM_WORLD,
+                "tracers0",
+                nparticles,
+                this->tracers0_integration_steps);
+    this->particles_sample_writer_mpi = new particles_output_sampling_hdf5<
+        long long int, double, 3>(
+                MPI_COMM_WORLD,
+                this->ps->getGlobalNbParticles(),
+                (this->simname + "_output.h5"),
+                "tracers0",
+                "position/0");
+
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int test_interpolation<rnumber>::finalize(void)
+{
+    TIMEZONE("test_interpolation::finalize");
+    delete this->nabla_u;
+    delete this->velocity;
+    delete this->vorticity;
+    delete this->ps.release();
+    delete this->kk;
+    delete particles_output_writer_mpi;
+    delete particles_sample_writer_mpi;
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int test_interpolation<rnumber>::do_work()
+{
+    TIMEZONE("test_interpolation::do_work");
+    *this->nabla_u = 0.0;
+    this->velocity->real_space_representation = false;
+    this->vorticity->real_space_representation = false;
+    this->nabla_u->real_space_representation = false;
+    // read vorticity field
+    this->vorticity->io(
+            this->simname + std::string("_input.h5"),
+            "vorticity",
+            0, true);
+    this->kk->template force_divfree<rnumber>(this->vorticity->get_cdata());
+
+    // compute velocity
+    invert_curl(this->kk, this->vorticity, this->velocity);
+
+    // compute velocity gradient
+    compute_gradient(this->kk, this->velocity, this->nabla_u);
+
+    // go to real space
+    this->vorticity->ift();
+    this->velocity->ift();
+    this->nabla_u->ift();
+
+    DEBUG_MSG("some vorticity values: %g %g %g\n",
+            this->vorticity->rval(20, 1),
+            this->vorticity->rval(200, 2),
+            this->vorticity->rval(741, 0));
+    DEBUG_MSG("corresponding velocity gradient to vorticity values: %g %g %g\n",
+            this->nabla_u->rval( 20, 2, 0) - this->nabla_u->rval( 20, 0, 2),
+            this->nabla_u->rval(200, 1, 0) - this->nabla_u->rval(200, 0, 1),
+            this->nabla_u->rval(741, 1, 2) - this->nabla_u->rval(741, 2, 1));
+
+    // allocate interpolation arrays
+    std::unique_ptr<double[]> p3data;
+    std::unique_ptr<double[]> p9data;
+    if(this->ps->getLocalNbParticles()){
+        p3data.reset(new double[3*this->ps->getLocalNbParticles()]);
+        p9data.reset(new double[9*this->ps->getLocalNbParticles()]);
+    }
+
+    /// sample position
+    std::copy(this->ps->getParticlesState(),
+              this->ps->getParticlesState()+3*this->ps->getLocalNbParticles(),
+              p3data.get());
+    this->particles_sample_writer_mpi->template save_dataset<3>(
+            "tracers0",
+            "position",
+            this->ps->getParticlesState(),
+            &p3data,
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->ps->get_step_idx()-1);
+
+    /// sample velocity at particles' position
+    std::fill_n(p3data.get(), 3*this->ps->getLocalNbParticles(), 0);
+    this->ps->sample_compute_field(*this->velocity, p3data.get());
+    if(p3data){
+        DEBUG_MSG("first vel value is %g\n", p3data.get()[0]);
+    }
+    this->particles_sample_writer_mpi->template save_dataset<3>(
+            "tracers0",
+            "velocity",
+            this->ps->getParticlesState(),
+            &p3data,
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->ps->get_step_idx()-1);
+    /// sample vorticity at particles' position
+    std::fill_n(p3data.get(), 3*this->ps->getLocalNbParticles(), 0);
+    this->ps->sample_compute_field(*this->vorticity, p3data.get());
+    if(p3data){
+        DEBUG_MSG("first vort value is %g\n", p3data.get()[0]);
+    }
+    this->particles_sample_writer_mpi->template save_dataset<3>(
+            "tracers0",
+            "vorticity",
+            this->ps->getParticlesState(),
+            &p3data,
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->ps->get_step_idx()-1);
+    /// sample velocity gradient at particles' position
+    std::fill_n(p9data.get(), 9*this->ps->getLocalNbParticles(), 0);
+    this->ps->sample_compute_field(*this->nabla_u, p9data.get());
+    if(p9data){
+        DEBUG_MSG("first vel gradient value is %g\n", p9data.get()[0]);
+    }
+    this->particles_sample_writer_mpi->template save_dataset<9>(
+            "tracers0",
+            "velocity_gradient",
+            this->ps->getParticlesState(),
+            &p9data,
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->ps->get_step_idx()-1);
+
+    // deallocate temporary arrays
+    delete[] p3data.release();
+    delete[] p9data.release();
+    return EXIT_SUCCESS;
+}
+
+template class test_interpolation<float>;
+template class test_interpolation<double>;
+
diff --git a/cpp/full_code/test_interpolation.hpp b/cpp/full_code/test_interpolation.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..4d65d4fa8105d8e5f78f3fc9f623929038f8ac0e
--- /dev/null
+++ b/cpp/full_code/test_interpolation.hpp
@@ -0,0 +1,84 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#ifndef TEST_INTERPOLATION_HPP
+#define TEST_INTERPOLATION_HPP
+
+
+
+#include <cstdlib>
+#include "base.hpp"
+#include "kspace.hpp"
+#include "full_code/test.hpp"
+#include "particles/particles_system_builder.hpp"
+#include "particles/particles_output_hdf5.hpp"
+#include "particles/particles_sampling.hpp"
+
+/** \brief Interpolation tester.
+ *
+ */
+
+template <typename rnumber>
+class test_interpolation: public test
+{
+    public:
+        int nparticles;
+        int tracers0_integration_steps;
+        int tracers0_neighbours;
+        int tracers0_smoothness;
+
+        std::unique_ptr<abstract_particles_system<long long int, double>> ps;
+
+        particles_output_hdf5<long long int, double,3> *particles_output_writer_mpi;
+        particles_output_sampling_hdf5<long long int, double, 3> *particles_sample_writer_mpi;
+
+        field<rnumber, FFTW, THREE> *velocity, *vorticity;
+        field<rnumber, FFTW, THREExTHREE> *nabla_u;
+
+        kspace<FFTW, SMOOTH> *kk;
+
+        test_interpolation(
+                const MPI_Comm COMMUNICATOR,
+                const std::string &simulation_name):
+            test(
+                    COMMUNICATOR,
+                    simulation_name),
+            particles_output_writer_mpi(nullptr),
+            particles_sample_writer_mpi(nullptr),
+            velocity(nullptr),
+            vorticity(nullptr),
+            nabla_u(nullptr),
+            kk(nullptr) {}
+        ~test_interpolation(){}
+
+        int initialize(void);
+        int do_work(void);
+        int finalize(void);
+
+        int read_parameters(void);
+};
+
+#endif//TEST_INTERPOLATION_HPP
+
diff --git a/bfps/cpp/hdf5_tools.cpp b/cpp/hdf5_tools.cpp
similarity index 71%
rename from bfps/cpp/hdf5_tools.cpp
rename to cpp/hdf5_tools.cpp
index 4328b28703ac60de7e82e4e3e729134ee3ff1520..5a3aef39caa2824f4d08e579d35734a1438ba5ec 100644
--- a/bfps/cpp/hdf5_tools.cpp
+++ b/cpp/hdf5_tools.cpp
@@ -1,4 +1,6 @@
 #include "hdf5_tools.hpp"
+#include <cfloat>
+#include <climits>
 
 int hdf5_tools::require_size_single_dataset(hid_t dset, int tsize)
 {
@@ -136,6 +138,37 @@ std::vector<number> hdf5_tools::read_vector(
     return result;
 }
 
+template <typename number>
+number hdf5_tools::read_value(
+        const hid_t group,
+        const std::string dset_name)
+{
+    number result;
+    hid_t dset;
+    hid_t mem_dtype;
+    if (typeid(number) == typeid(int))
+        mem_dtype = H5Tcopy(H5T_NATIVE_INT);
+    else if (typeid(number) == typeid(double))
+        mem_dtype = H5Tcopy(H5T_NATIVE_DOUBLE);
+    if (H5Lexists(group, dset_name.c_str(), H5P_DEFAULT))
+    {
+        dset = H5Dopen(group, dset_name.c_str(), H5P_DEFAULT);
+        H5Dread(dset, mem_dtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &result);
+        H5Dclose(dset);
+    }
+    else
+    {
+        DEBUG_MSG("attempted to read dataset %s which does not exist.\n",
+                dset_name.c_str());
+        if (typeid(number) == typeid(int))
+            result = INT_MAX;
+        else if (typeid(number) == typeid(double))
+            result = number(DBL_MAX);
+    }
+    H5Tclose(mem_dtype);
+    return result;
+}
+
 template <typename dtype>
 std::vector<dtype> hdf5_tools::read_vector_with_single_rank(
         const int myrank,
@@ -175,17 +208,29 @@ std::string hdf5_tools::read_string(
         const hid_t group,
         const std::string dset_name)
 {
-    hid_t dset = H5Dopen(group, dset_name.c_str(), H5P_DEFAULT);
-    hid_t space = H5Dget_space(dset);
-    hid_t memtype = H5Dget_type(dset);
-    char *string_data = (char*)malloc(256);
-    H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data);
-    std::string std_string_data = std::string(string_data);
-    free(string_data);
-    H5Sclose(space);
-    H5Tclose(memtype);
-    H5Dclose(dset);
-    return std_string_data;
+    if (H5Lexists(group, dset_name.c_str(), H5P_DEFAULT))
+    {
+        hid_t dset = H5Dopen(group, dset_name.c_str(), H5P_DEFAULT);
+        hid_t space = H5Dget_space(dset);
+        hid_t memtype = H5Dget_type(dset);
+        // fsanitize complains unless I have a static array here
+        // but that doesn't actually work (data is read incorrectly).
+        // this is caught by bfps.test_NSVEparticles
+        char *string_data = (char*)malloc(256);
+        H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string_data);
+        std::string std_string_data = std::string(string_data);
+        free(string_data);
+        H5Sclose(space);
+        H5Tclose(memtype);
+        H5Dclose(dset);
+        return std_string_data;
+    }
+    else
+    {
+        DEBUG_MSG("attempted to read dataset %s which does not exist.\n",
+                dset_name.c_str());
+        return std::string("parameter does not exist");
+    }
 }
 
 template
@@ -214,3 +259,13 @@ std::vector<double> hdf5_tools::read_vector_with_single_rank<double>(
         const hid_t file_id,
         const std::string dset_name);
 
+template
+int hdf5_tools::read_value<int>(
+        const hid_t,
+        const std::string);
+
+template
+double hdf5_tools::read_value<double>(
+        const hid_t,
+        const std::string);
+
diff --git a/bfps/cpp/hdf5_tools.hpp b/cpp/hdf5_tools.hpp
similarity index 95%
rename from bfps/cpp/hdf5_tools.hpp
rename to cpp/hdf5_tools.hpp
index 456beefe362c5d0871f8014c7a1cc468614e6374..99ba45a1c25593e063e33c54521ed492822aca45 100644
--- a/bfps/cpp/hdf5_tools.hpp
+++ b/cpp/hdf5_tools.hpp
@@ -79,6 +79,11 @@ namespace hdf5_tools
     std::string read_string(
             const hid_t group,
             const std::string dset_name);
+
+    template <typename number>
+    number read_value(
+            const hid_t group,
+            const std::string dset_name);
 }
 
 #endif//HDF5_TOOLS_HPP
diff --git a/bfps/cpp/kspace.cpp b/cpp/kspace.cpp
similarity index 73%
rename from bfps/cpp/kspace.cpp
rename to cpp/kspace.cpp
index 01accf4dc1f24d7fe92ee622dd4faf9eaf12a485..5accb969cd93735b0cfb4d5deecfbb8ccd914910 100644
--- a/bfps/cpp/kspace.cpp
+++ b/cpp/kspace.cpp
@@ -23,6 +23,9 @@
 **********************************************************************/
 
 
+
+#define NDEBUG
+
 #include <cmath>
 #include <cstdlib>
 #include <algorithm>
@@ -31,6 +34,8 @@
 #include "scope_timer.hpp"
 #include "shared_array.hpp"
 
+
+
 template <field_backend be,
           kspace_dealias_type dt>
 template <field_components fc>
@@ -66,17 +71,17 @@ kspace<be, dt>::kspace(
             for (i = 0; i<int(this->layout->subsizes[0]); i++)
             {
                 ii = i + this->layout->starts[0];
-                if (ii <= int(this->layout->sizes[1]/2))
+                if (ii <= int(this->layout->sizes[0]/2))
                     this->ky[i] = this->dky*ii;
                 else
-                    this->ky[i] = this->dky*(ii - int(this->layout->sizes[1]));
+                    this->ky[i] = this->dky*(ii - int(this->layout->sizes[0]));
             }
             for (i = 0; i<int(this->layout->sizes[1]); i++)
             {
-                if (i <= int(this->layout->sizes[0]/2))
+                if (i <= int(this->layout->sizes[1]/2))
                     this->kz[i] = this->dkz*i;
                 else
-                    this->kz[i] = this->dkz*(i - int(this->layout->sizes[0]));
+                    this->kz[i] = this->dkz*(i - int(this->layout->sizes[1]));
             }
             switch(dt)
             {
@@ -116,8 +121,6 @@ kspace<be, dt>::kspace(
         std::fill_n(nshell_local, this->nshells, 0);
     });
 
-    std::vector<std::unordered_map<int, double>> dealias_filter_threaded(omp_get_max_threads());
-
     this->CLOOP_K2_NXMODES(
             [&](ptrdiff_t cindex,
                 ptrdiff_t xindex,
@@ -131,9 +134,6 @@ kspace<be, dt>::kspace(
                 kshell_local_thread.getMine()[int(knorm/this->dk)] += nxmodes*knorm;
                 nshell_local_thread.getMine()[int(knorm/this->dk)] += nxmodes;
             }
-            if (dt == SMOOTH){
-                dealias_filter_threaded[omp_get_thread_num()][int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.));
-            }
     });
 
     // Merge results
@@ -141,14 +141,6 @@ kspace<be, dt>::kspace(
     kshell_local_thread.mergeParallel();
     nshell_local_thread.mergeParallel();
 
-    if (dt == SMOOTH){
-        for(int idxMerge = 0 ; idxMerge < int(dealias_filter_threaded.size()) ; ++idxMerge){
-            for(const auto kv : dealias_filter_threaded[idxMerge]){
-                this->dealias_filter[kv.first] = kv.second;
-            }
-        }
-    }
-
     MPI_Allreduce(
             nshell_local_thread.getMasterData(),
             &this->nshell.front(),
@@ -293,6 +285,42 @@ void kspace<be, dt>::ball_filter(
                 });
 }
 
+/** \brief Filter a field using a M-filter to reproduce dissipation range.
+ *
+ *  Filter's  Fourier space expression:
+ *  \f[
+ *       \hat{\phi^M_\ell}(k) =
+ *       \exp(-\frac{(3.54 k \ell)^(122*(\ell)^(0.0836))}{2}) 
+ *       \left( 1 + \frac{(k \eta/0.0636)^3.44}{1 + (k \eta/ 0.0621)^3.44} \right)^{1/2}
+ *  \f]
+ */
+template <field_backend be,
+          kspace_dealias_type dt>
+template <typename rnumber,
+          field_components fc>
+void kspace<be, dt>::general_M_filter(
+        typename fftw_interface<rnumber>::complex *__restrict__ a,
+        const double ell)
+{
+    const double prefactor0 = 1.0;
+    this->CLOOP_K2(
+            [&](ptrdiff_t cindex,
+                ptrdiff_t xindex,
+                ptrdiff_t yindex,
+                ptrdiff_t zindex,
+                double k2){
+                if (k2 > 0)
+                {
+                    double argument = sqrt(k2)*ell;
+                    double prefactor = prefactor0;
+                    for (unsigned int tcounter=0; tcounter<2*ncomp(fc); tcounter++)
+                        ((rnumber*)a)[2*ncomp(fc)*cindex + tcounter] *= (
+                            prefactor * (exp(-0.5*pow((2.9*argument),(68.0*(pow(ell,0.74))))) * sqrt(1.0 + (pow((argument/0.06),3.8))/(1.0 + (pow((argument/0.057),3.8))))));
+                }
+                });
+}
+
+
 /** \brief Filter a field using a Gaussian kernel.
  *
  *  Filter's mathematical expression in Fourier space is as follows:
@@ -386,6 +414,12 @@ int kspace<be, dt>::filter(
                 a,
                 2*acos(0.)/wavenumber);
     }
+    else if (filter_type == std::string("general_M"))
+    {
+        this->template general_M_filter<rnumber, fc>(
+                a,
+                2*acos(0.)/wavenumber);
+    }
     return EXIT_SUCCESS;
 }
 
@@ -437,6 +471,7 @@ int kspace<be, dt>::filter_calibrated_ell(
         const double ell,
         std::string filter_type)
 {
+    TIMEZONE("kspace::filter_calibrated_ell");
     if (filter_type == std::string("sharp_Fourier_sphere"))
     {
         this->template low_pass<rnumber, fc>(
@@ -455,6 +490,12 @@ int kspace<be, dt>::filter_calibrated_ell(
                 a,
                 ell);
     }
+    else if (filter_type == std::string("general_M"))
+    {
+        this->template general_M_filter<rnumber, fc>(
+                a,
+                ell);
+    }
     return EXIT_SUCCESS;
 }
 
@@ -464,19 +505,22 @@ template <typename rnumber,
           field_components fc>
 void kspace<be, dt>::dealias(typename fftw_interface<rnumber>::complex *__restrict__ a)
 {
+    TIMEZONE("kspace::dealias");
     switch(dt)
     {
         case TWO_THIRDS:
             this->low_pass<rnumber, fc>(a, this->kM);
             break;
         case SMOOTH:
-            this->CLOOP_K2(
+            this->CLOOP(
                 [&](ptrdiff_t cindex,
                     ptrdiff_t xindex,
                     ptrdiff_t yindex,
-                    ptrdiff_t zindex,
-                    double k2){
-                    double tval = this->dealias_filter[int(round(k2 / this->dk2))];
+                    ptrdiff_t zindex){
+                    double kk2 = (pow(this->kx[xindex]/this->kMx, 2) +
+                                  pow(this->ky[yindex]/this->kMy, 2) +
+                                  pow(this->kz[zindex]/this->kMz, 2));
+                    double tval = exp(-36.0 * (pow(kk2, 18)));
                     for (unsigned int tcounter=0; tcounter<2*ncomp(fc); tcounter++)
                         ((rnumber*)a)[2*ncomp(fc)*cindex + tcounter] *= tval;
                 });
@@ -529,7 +573,7 @@ void kspace<be, dt>::cospectrum(
         const std::string dset_name,
         const hsize_t toffset)
 {
-    TIMEZONE("field::cospectrum");
+    TIMEZONE("field::cospectrum2");
     shared_array<double> spec_local_thread(this->nshells*ncomp(fc)*ncomp(fc),[&](double* spec_local){
         std::fill_n(spec_local, this->nshells*ncomp(fc)*ncomp(fc), 0);
     });
@@ -575,13 +619,13 @@ void kspace<be, dt>::cospectrum(
             case THREExTHREE:
                 offset[4] = 0;
                 offset[5] = 0;
-                count[4] = ncomp(fc);
-                count[5] = ncomp(fc);
+                count[4] = 3;
+                count[5] = 3;
             case THREE:
                 offset[2] = 0;
                 offset[3] = 0;
-                count[2] = ncomp(fc);
-                count[3] = ncomp(fc);
+                count[2] = 3;
+                count[3] = 3;
             default:
                 offset[0] = toffset;
                 offset[1] = 0;
@@ -597,6 +641,124 @@ void kspace<be, dt>::cospectrum(
     }
 }
 
+template <field_backend be,
+          kspace_dealias_type dt>
+template <typename rnumber,
+          field_components fc>
+void kspace<be, dt>::cospectrum(
+        const rnumber(* __restrict a)[2],
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset)
+{
+    TIMEZONE("field::cospectrum1");
+    shared_array<double> spec_local_thread(this->nshells*ncomp(fc)*ncomp(fc),[&](double* spec_local){
+        std::fill_n(spec_local, this->nshells*ncomp(fc)*ncomp(fc), 0);
+    });
+
+    this->CLOOP_K2_NXMODES(
+            [&](ptrdiff_t cindex,
+                ptrdiff_t xindex,
+                ptrdiff_t yindex,
+                ptrdiff_t zindex,
+                double k2,
+                int nxmodes){
+            if (k2 <= this->kM2)
+            {
+                double* spec_local = spec_local_thread.getMine();
+                int tmp_int = int(sqrt(k2) / this->dk)*ncomp(fc)*ncomp(fc);
+                for (hsize_t i=0; i<ncomp(fc); i++)
+                for (hsize_t j=0; j<ncomp(fc); j++){
+                    spec_local[tmp_int + i*ncomp(fc)+j] += nxmodes * (
+                        (a[ncomp(fc)*cindex + i][0] * a[ncomp(fc)*cindex + j][0]) +
+                        (a[ncomp(fc)*cindex + i][1] * a[ncomp(fc)*cindex + j][1]));
+                }
+            }
+            });
+
+    spec_local_thread.mergeParallel();
+
+    std::vector<double> spec;
+    spec.resize(this->nshells*ncomp(fc)*ncomp(fc), 0);
+    MPI_Allreduce(
+            spec_local_thread.getMasterData(),
+            &spec.front(),
+            spec.size(),
+            MPI_DOUBLE, MPI_SUM, this->layout->comm);
+    if (this->layout->myrank == 0)
+    {
+        hid_t dset, wspace, mspace;
+        hsize_t count[(ndim(fc)-2)*2], offset[(ndim(fc)-2)*2], dims[(ndim(fc)-2)*2];
+        dset = H5Dopen(group, ("spectra/" + dset_name).c_str(), H5P_DEFAULT);
+        wspace = H5Dget_space(dset);
+        H5Sget_simple_extent_dims(wspace, dims, NULL);
+        switch (fc)
+        {
+            case THREExTHREE:
+                offset[4] = 0;
+                offset[5] = 0;
+                count[4] = 3;
+                count[5] = 3;
+            case THREE:
+                offset[2] = 0;
+                offset[3] = 0;
+                count[2] = 3;
+                count[3] = 3;
+            default:
+                offset[0] = toffset;
+                offset[1] = 0;
+                count[0] = 1;
+                count[1] = this->nshells;
+        }
+        mspace = H5Screate_simple((ndim(fc)-2)*2, count, NULL);
+        H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL);
+        H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, &spec.front());
+        H5Sclose(wspace);
+        H5Sclose(mspace);
+        H5Dclose(dset);
+    }
+}
+
+template <field_backend be,
+          kspace_dealias_type dt>
+template <typename rnumber,
+          field_components fc>
+double kspace<be, dt>::L2norm(
+        const rnumber(* __restrict a)[2])
+{
+    TIMEZONE("field::L2norm");
+    shared_array<double> L2_local_thread(1,[&](double* spec_local){
+        std::fill_n(spec_local, 1, 0);
+    });
+
+    this->CLOOP_K2_NXMODES(
+            [&](ptrdiff_t cindex,
+                ptrdiff_t xindex,
+                ptrdiff_t yindex,
+                ptrdiff_t zindex,
+                double k2,
+                int nxmodes){
+            {
+                double* L2_local = L2_local_thread.getMine();
+                for (hsize_t i=0; i<ncomp(fc); i++){
+                    L2_local[0] += nxmodes * (
+                        (a[ncomp(fc)*cindex + i][0] * a[ncomp(fc)*cindex + i][0]) +
+                        (a[ncomp(fc)*cindex + i][1] * a[ncomp(fc)*cindex + i][1]));
+                }
+            }
+            });
+
+    L2_local_thread.mergeParallel();
+
+    double L2;
+    MPI_Allreduce(
+            L2_local_thread.getMasterData(),
+            &L2,
+            1,
+            MPI_DOUBLE, MPI_SUM, this->layout->comm);
+    return sqrt(L2 * this->dkx * this->dky * this->dkz);
+}
+
 
 template class kspace<FFTW, TWO_THIRDS>;
 template class kspace<FFTW, SMOOTH>;
@@ -801,6 +963,94 @@ template void kspace<FFTW, SMOOTH>::cospectrum<double, THREExTHREE>(
         const std::string dset_name,
         const hsize_t toffset);
 
+template void kspace<FFTW, TWO_THIRDS>::cospectrum<float, ONE>(
+        const typename fftw_interface<float>::complex *__restrict__ a,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, TWO_THIRDS>::cospectrum<float, THREE>(
+        const typename fftw_interface<float>::complex *__restrict__ a,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, TWO_THIRDS>::cospectrum<float, THREExTHREE>(
+        const typename fftw_interface<float>::complex *__restrict__ a,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, TWO_THIRDS>::cospectrum<double, ONE>(
+        const typename fftw_interface<double>::complex *__restrict__ a,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, TWO_THIRDS>::cospectrum<double, THREE>(
+        const typename fftw_interface<double>::complex *__restrict__ a,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, TWO_THIRDS>::cospectrum<double, THREExTHREE>(
+        const typename fftw_interface<double>::complex *__restrict__ a,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+
+template void kspace<FFTW, SMOOTH>::cospectrum<float, ONE>(
+        const typename fftw_interface<float>::complex *__restrict__ a,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, SMOOTH>::cospectrum<float, THREE>(
+        const typename fftw_interface<float>::complex *__restrict__ a,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, SMOOTH>::cospectrum<float, THREExTHREE>(
+        const typename fftw_interface<float>::complex *__restrict__ a,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, SMOOTH>::cospectrum<double, ONE>(
+        const typename fftw_interface<double>::complex *__restrict__ a,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, SMOOTH>::cospectrum<double, THREE>(
+        const typename fftw_interface<double>::complex *__restrict__ a,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+template void kspace<FFTW, SMOOTH>::cospectrum<double, THREExTHREE>(
+        const typename fftw_interface<double>::complex *__restrict__ a,
+        const hid_t group,
+        const std::string dset_name,
+        const hsize_t toffset);
+
+template double kspace<FFTW, TWO_THIRDS>::L2norm<float, ONE>(
+        const typename fftw_interface<float>::complex *__restrict__ a);
+template double kspace<FFTW, TWO_THIRDS>::L2norm<float, THREE>(
+        const typename fftw_interface<float>::complex *__restrict__ a);
+template double kspace<FFTW, TWO_THIRDS>::L2norm<float, THREExTHREE>(
+        const typename fftw_interface<float>::complex *__restrict__ a);
+template double kspace<FFTW, TWO_THIRDS>::L2norm<double, ONE>(
+        const typename fftw_interface<double>::complex *__restrict__ a);
+template double kspace<FFTW, TWO_THIRDS>::L2norm<double, THREE>(
+        const typename fftw_interface<double>::complex *__restrict__ a);
+template double kspace<FFTW, TWO_THIRDS>::L2norm<double, THREExTHREE>(
+        const typename fftw_interface<double>::complex *__restrict__ a);
+
+template double kspace<FFTW, SMOOTH>::L2norm<float, ONE>(
+        const typename fftw_interface<float>::complex *__restrict__ a);
+template double kspace<FFTW, SMOOTH>::L2norm<float, THREE>(
+        const typename fftw_interface<float>::complex *__restrict__ a);
+template double kspace<FFTW, SMOOTH>::L2norm<float, THREExTHREE>(
+        const typename fftw_interface<float>::complex *__restrict__ a);
+template double kspace<FFTW, SMOOTH>::L2norm<double, ONE>(
+        const typename fftw_interface<double>::complex *__restrict__ a);
+template double kspace<FFTW, SMOOTH>::L2norm<double, THREE>(
+        const typename fftw_interface<double>::complex *__restrict__ a);
+template double kspace<FFTW, SMOOTH>::L2norm<double, THREExTHREE>(
+        const typename fftw_interface<double>::complex *__restrict__ a);
+
 template void kspace<FFTW, SMOOTH>::force_divfree<float>(
        typename fftw_interface<float>::complex *__restrict__ a);
 template void kspace<FFTW, SMOOTH>::force_divfree<double>(
diff --git a/bfps/cpp/kspace.hpp b/cpp/kspace.hpp
similarity index 91%
rename from bfps/cpp/kspace.hpp
rename to cpp/kspace.hpp
index d8bc008daade0704c5f8c1981c4a4f24400a5868..0d36e9cceece13cd7fec2e741b4a087aa4e26156 100644
--- a/bfps/cpp/kspace.hpp
+++ b/cpp/kspace.hpp
@@ -54,7 +54,6 @@ class kspace
         /* mode and dealiasing information */
         double kMx, kMy, kMz, kM, kM2;
         std::vector<double> kx, ky, kz;
-        std::unordered_map<int, double> dealias_filter;
         std::vector<double> kshell;
         std::vector<int64_t> nshell;
         int nshells;
@@ -88,6 +87,12 @@ class kspace
                 typename fftw_interface<rnumber>::complex *__restrict__ a,
                 const double sigma);
 
+        template <typename rnumber,
+                  field_components fc>
+        void general_M_filter(
+                typename fftw_interface<rnumber>::complex *__restrict__ a,
+                const double sigma);
+
         template <typename rnumber,
                   field_components fc>
         int filter(
@@ -114,6 +119,20 @@ class kspace
                 const hid_t group,
                 const std::string dset_name,
                 const hsize_t toffset);
+
+        template <typename rnumber,
+                  field_components fc>
+        void cospectrum(
+                const rnumber(* __restrict__ a)[2],
+                const hid_t group,
+                const std::string dset_name,
+                const hsize_t toffset);
+
+        template <typename rnumber,
+                  field_components fc>
+        double L2norm(
+                const rnumber(* __restrict__ a)[2]);
+
         template <class func_type>
         void CLOOP(func_type expression)
         {
@@ -150,8 +169,8 @@ class kspace
                         for (hsize_t xindex = 0; xindex < this->layout->subsizes[2]; xindex++)
                         {
                             double k2 = (this->kx[xindex]*this->kx[xindex] +
-                                  this->ky[yindex]*this->ky[yindex] +
-                                  this->kz[zindex]*this->kz[zindex]);
+                                         this->ky[yindex]*this->ky[yindex] +
+                                         this->kz[zindex]*this->kz[zindex]);
                             expression(cindex, xindex, yindex, zindex, k2);
                             cindex++;
                         }
@@ -173,7 +192,6 @@ class kspace
                                             + zindex*this->layout->subsizes[2];
                         hsize_t xindex = 0;
                         double k2 = (
-                                this->kx[xindex]*this->kx[xindex] +
                                 this->ky[yindex]*this->ky[yindex] +
                                 this->kz[zindex]*this->kz[zindex]);
                         expression(cindex, xindex, yindex, zindex, k2, 1);
diff --git a/bfps/cpp/omputils.hpp b/cpp/omputils.hpp
similarity index 100%
rename from bfps/cpp/omputils.hpp
rename to cpp/omputils.hpp
diff --git a/cpp/particles/.tocompile b/cpp/particles/.tocompile
new file mode 100644
index 0000000000000000000000000000000000000000..02874ed792f4eedb859e1b779facd3d2c775ec08
--- /dev/null
+++ b/cpp/particles/.tocompile
@@ -0,0 +1,2 @@
+mpicxx -g main_tocompile.cpp -o /tmp/main_test_part.exe -I/home/bbramas/Projects/bfps/bfps/cpp/ -I/home/bbramas/Downloads/hdf5install/include -L/home/bbramas/Downloads/hdf5install/lib -lhdf5 -lsz -lz
+mpicxx -fPIC -rdynamic -g NSVE-v2.0.1-single.cpp -o /tmp/NSVE-v2.0.1-single.exe -I/home/bbramas/Projects/bfps/bfps/cpp/ -I/home/bbramas/Downloads/hdf5install/include -I/home/bbramas/Downloads/fftw-3.3.4/install/include/ -L/home/bbramas/Downloads/hdf5install/lib -lhdf5 -lsz -lz -L/home/bbramas/.local/lib/python2.7/site-packages/bfps-2.0.1.post31+g12693ea-py2.7.egg/bfps/ -lbfps -fopenmp -lgomp -L/home/bbramas/Downloads/fftw-3.3.4/install/lib/ -lfftw3_mpi -lfftw3f_mpi -lfftw3_omp -lfftw3f_omp -lfftw3 -lfftw3f
diff --git a/cpp/particles/abstract_particles_input.hpp b/cpp/particles/abstract_particles_input.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..48c38bc592ddc442489d437327b421312bfd3f55
--- /dev/null
+++ b/cpp/particles/abstract_particles_input.hpp
@@ -0,0 +1,46 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#ifndef ABSTRACT_PARTICLES_INPUT_HPP
+#define ABSTRACT_PARTICLES_INPUT_HPP
+
+#include <tuple>
+
+template <class partsize_t, class real_number>
+class abstract_particles_input {
+public:
+    virtual ~abstract_particles_input(){}
+
+    virtual partsize_t getTotalNbParticles()  = 0;
+    virtual partsize_t getLocalNbParticles()  = 0;
+    virtual int getNbRhs()  = 0;
+
+    virtual std::unique_ptr<real_number[]> getMyParticles()  = 0;
+    virtual std::unique_ptr<partsize_t[]> getMyParticlesIndexes()  = 0;
+    virtual std::vector<std::unique_ptr<real_number[]>> getMyRhs()  = 0;
+};
+
+
+#endif
diff --git a/bfps/cpp/particles/abstract_particles_output.hpp b/cpp/particles/abstract_particles_output.hpp
similarity index 71%
rename from bfps/cpp/particles/abstract_particles_output.hpp
rename to cpp/particles/abstract_particles_output.hpp
index a6eccaea003618b8acbf1a9252c1e6c5bedb3378..6dc85cebba83e8650329700f15284081301ba3c5 100644
--- a/bfps/cpp/particles/abstract_particles_output.hpp
+++ b/cpp/particles/abstract_particles_output.hpp
@@ -1,3 +1,28 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
 #ifndef ABSTRACT_PARTICLES_OUTPUT
 #define ABSTRACT_PARTICLES_OUTPUT
 
@@ -13,7 +38,7 @@
 #include "scope_timer.hpp"
 #include "env_utils.hpp"
 
-template <class partsize_t, class real_number, int size_particle_positions, int size_particle_rhs>
+template <class partsize_t, class real_number, int size_particle_positions>
 class abstract_particles_output {
     MPI_Comm mpi_com;
     MPI_Comm mpi_com_writer;
@@ -28,11 +53,13 @@ class abstract_particles_output {
     std::unique_ptr<real_number[]> buffer_particles_positions_send;
     std::vector<std::unique_ptr<real_number[]>> buffer_particles_rhs_send;
     partsize_t size_buffers_send;
+    int buffers_size_particle_rhs_send;
 
     std::unique_ptr<real_number[]> buffer_particles_positions_recv;
     std::vector<std::unique_ptr<real_number[]>> buffer_particles_rhs_recv;
     std::unique_ptr<partsize_t[]> buffer_indexes_recv;
-    partsize_t size_buffers_recv;
+    partsize_t size_buffers_recv;    
+    int buffers_size_particle_rhs_recv;
 
     int nb_processes_involved;
     bool current_is_involved;
@@ -41,6 +68,10 @@ class abstract_particles_output {
     partsize_t particles_chunk_current_offset;
 
 protected:
+    MPI_Comm& getCom(){
+        return mpi_com;
+    }
+
     MPI_Comm& getComWriter(){
         return mpi_com_writer;
     }
@@ -61,8 +92,10 @@ public:
     abstract_particles_output(MPI_Comm in_mpi_com, const partsize_t inTotalNbParticles, const int in_nb_rhs) throw()
             : mpi_com(in_mpi_com), my_rank(-1), nb_processes(-1),
                 total_nb_particles(inTotalNbParticles), nb_rhs(in_nb_rhs),
-                buffer_particles_rhs_send(in_nb_rhs), size_buffers_send(-1),
-                buffer_particles_rhs_recv(in_nb_rhs), size_buffers_recv(-1),
+                buffer_particles_rhs_send(in_nb_rhs), size_buffers_send(0),
+                buffers_size_particle_rhs_send(0),
+                buffer_particles_rhs_recv(in_nb_rhs), size_buffers_recv(0),
+                buffers_size_particle_rhs_recv(0),
                 nb_processes_involved(0), current_is_involved(true), particles_chunk_per_process(0),
                 particles_chunk_current_size(0), particles_chunk_current_offset(0) {
 
@@ -129,18 +162,21 @@ public:
     }
 
     void releaseMemory(){
-        buffer_indexes_send.release();
-        buffer_particles_positions_send.release();
-        size_buffers_send = -1;
-        buffer_indexes_recv.release();
-        buffer_particles_positions_recv.release();
-        size_buffers_recv = -1;
+        delete[] buffer_indexes_send.release();
+        delete[] buffer_particles_positions_send.release();
+        size_buffers_send = 0;
+        delete[] buffer_indexes_recv.release();
+        delete[] buffer_particles_positions_recv.release();
+        size_buffers_recv = 0;
         for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){
-            buffer_particles_rhs_send[idx_rhs].release();
-            buffer_particles_rhs_recv[idx_rhs].release();
+            delete[] buffer_particles_rhs_send[idx_rhs].release();
+            delete[] buffer_particles_rhs_recv[idx_rhs].release();
         }
+        buffers_size_particle_rhs_send = 0;
+        buffers_size_particle_rhs_recv = 0;
     }
 
+    template <int size_particle_rhs>
     void save(
             const real_number input_particles_positions[],
             const std::unique_ptr<real_number[]> input_particles_rhs[],
@@ -153,13 +189,25 @@ public:
         {
             TIMEZONE("sort-to-distribute");
 
-            if(size_buffers_send < nb_particles && nb_particles){
-                buffer_indexes_send.reset(new std::pair<partsize_t,partsize_t>[nb_particles]);
-                buffer_particles_positions_send.reset(new real_number[nb_particles*size_particle_positions]);
+            if(size_buffers_send < nb_particles){
+                size_buffers_send = nb_particles;
+                buffer_indexes_send.reset(new std::pair<partsize_t,partsize_t>[size_buffers_send]);
+                buffer_particles_positions_send.reset(new real_number[size_buffers_send*size_particle_positions]);
+                
+                if(buffers_size_particle_rhs_send < size_particle_rhs){
+                    buffers_size_particle_rhs_send = size_particle_rhs;
+                }
                 for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){
-                    buffer_particles_rhs_send[idx_rhs].reset(new real_number[nb_particles*size_particle_rhs]);
+                    buffer_particles_rhs_send[idx_rhs].reset(new real_number[size_buffers_send*buffers_size_particle_rhs_send]);
+                }
+            }
+            else if(buffers_size_particle_rhs_send < size_particle_rhs){
+                buffers_size_particle_rhs_send = size_particle_rhs;
+                if(size_buffers_send > 0){
+                    for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){
+                        buffer_particles_rhs_send[idx_rhs].reset(new real_number[size_buffers_send*buffers_size_particle_rhs_send]);
+                    }
                 }
-                size_buffers_send = nb_particles;
             }
 
             for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){
@@ -204,13 +252,23 @@ public:
         const int nb_to_receive = exchanger.getTotalToRecv();
         assert(nb_to_receive == particles_chunk_current_size);
 
-        if(size_buffers_recv < nb_to_receive && nb_to_receive){
-            buffer_indexes_recv.reset(new partsize_t[nb_to_receive]);
-            buffer_particles_positions_recv.reset(new real_number[nb_to_receive*size_particle_positions]);
+        if(size_buffers_recv < nb_to_receive){
+            size_buffers_recv = nb_to_receive;
+            buffer_indexes_recv.reset(new partsize_t[size_buffers_recv]);
+            buffer_particles_positions_recv.reset(new real_number[size_buffers_recv*size_particle_positions]);
+
+            buffers_size_particle_rhs_recv = size_particle_rhs;
             for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){
-                buffer_particles_rhs_recv[idx_rhs].reset(new real_number[nb_to_receive*size_particle_rhs]);
+                buffer_particles_rhs_recv[idx_rhs].reset(new real_number[size_buffers_recv*buffers_size_particle_rhs_recv]);
+            }
+        }
+        else if(buffers_size_particle_rhs_recv < size_particle_rhs){
+            buffers_size_particle_rhs_recv = size_particle_rhs;
+            if(size_buffers_recv > 0){
+                for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){
+                    buffer_particles_rhs_recv[idx_rhs].reset(new real_number[size_buffers_recv*buffers_size_particle_rhs_recv]);
+                }
             }
-            size_buffers_recv = nb_to_receive;
         }
 
         {
@@ -229,13 +287,14 @@ public:
             return;
         }
 
-        if(size_buffers_send < nb_to_receive && nb_to_receive){
-            buffer_indexes_send.reset(new std::pair<partsize_t,partsize_t>[nb_to_receive]);
-            buffer_particles_positions_send.reset(new real_number[nb_to_receive*size_particle_positions]);
+        if(size_buffers_send < nb_to_receive){
+            size_buffers_send = nb_to_receive;
+            buffer_indexes_send.reset(new std::pair<partsize_t,partsize_t>[size_buffers_send]);
+            buffer_particles_positions_send.reset(new real_number[size_buffers_send*size_particle_positions]);
+            buffers_size_particle_rhs_send = size_particle_rhs;
             for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){
-                buffer_particles_rhs_send[idx_rhs].reset(new real_number[nb_to_receive*size_particle_rhs]);
+                buffer_particles_rhs_send[idx_rhs].reset(new real_number[size_buffers_send*buffers_size_particle_rhs_send]);
             }
-            size_buffers_send = nb_to_receive;
         }
 
         {
@@ -260,11 +319,11 @@ public:
         }
 
         write(idx_time_step, buffer_particles_positions_send.get(), buffer_particles_rhs_send.data(),
-              nb_to_receive, particles_chunk_current_offset);
+              nb_to_receive, particles_chunk_current_offset, size_particle_rhs);
     }
 
     virtual void write(const int idx_time_step, const real_number* positions, const std::unique_ptr<real_number[]>* rhs,
-                       const partsize_t nb_particles, const partsize_t particles_idx_offset) = 0;
+                       const partsize_t nb_particles, const partsize_t particles_idx_offset, const int size_particle_rhs) = 0;
 };
 
 #endif
diff --git a/cpp/particles/abstract_particles_system.hpp b/cpp/particles/abstract_particles_system.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..2f2f510f4bdad22b26b243607c7bbddcd2536771
--- /dev/null
+++ b/cpp/particles/abstract_particles_system.hpp
@@ -0,0 +1,129 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#ifndef ABSTRACT_PARTICLES_SYSTEM_HPP
+#define ABSTRACT_PARTICLES_SYSTEM_HPP
+
+#include <memory>
+
+//- Not generic to enable sampling begin
+#include "field.hpp"
+#include "kspace.hpp"
+//- Not generic to enable sampling end
+
+
+template <class partsize_t, class real_number>
+class abstract_particles_system {
+public:
+    virtual ~abstract_particles_system(){}
+
+    virtual void compute() = 0;
+
+    virtual void compute_p2p() = 0;
+
+    virtual void compute_particles_inner() = 0;
+
+    virtual void enforce_unit_orientation() = 0;
+
+    virtual void add_Lagrange_multipliers() = 0;
+
+    virtual void compute_sphere_particles_inner(const real_number particle_extra_rhs[]) = 0;
+    virtual void compute_ellipsoid_particles_inner(const real_number particle_extra_rhs[]) = 0;
+
+    virtual void move(const real_number dt) = 0;
+
+    virtual void redistribute() = 0;
+
+    virtual void inc_step_idx() = 0;
+
+    virtual void shift_rhs_vectors() = 0;
+
+    virtual void completeLoop(const real_number dt) = 0;
+
+    virtual void completeLoopWithVorticity(
+            const real_number dt,
+            const real_number sampled_vorticity[]) = 0;
+
+    virtual void completeLoopWithVelocityGradient(
+            const real_number dt,
+            const real_number sampled_velocity_gradient[]) = 0;
+
+    virtual const real_number* getParticlesState() const = 0;
+
+    virtual std::unique_ptr<real_number[]> extractParticlesState(const int firstState, const int lastState) const = 0;
+
+    virtual const std::unique_ptr<real_number[]>* getParticlesRhs() const = 0;
+
+    virtual const partsize_t* getParticlesIndexes() const = 0;
+
+    virtual partsize_t getLocalNbParticles() const = 0;
+
+    virtual partsize_t getGlobalNbParticles() const = 0;
+
+    virtual int getNbRhs() const = 0;
+
+    virtual int get_step_idx() const = 0;
+
+    //- Not generic to enable sampling begin
+    virtual void sample_compute_field(const field<float, FFTW, ONE>& sample_field,
+                                real_number sample_rhs[]) = 0;
+    virtual void sample_compute_field(const field<float, FFTW, THREE>& sample_field,
+                                real_number sample_rhs[]) = 0;
+    virtual void sample_compute_field(const field<float, FFTW, THREExTHREE>& sample_field,
+                                real_number sample_rhs[]) = 0;
+    virtual void sample_compute_field(const field<double, FFTW, ONE>& sample_field,
+                                real_number sample_rhs[]) = 0;
+    virtual void sample_compute_field(const field<double, FFTW, THREE>& sample_field,
+                                real_number sample_rhs[]) = 0;
+    virtual void sample_compute_field(const field<double, FFTW, THREExTHREE>& sample_field,
+                                real_number sample_rhs[]) = 0;
+    //- Not generic to enable sampling end
+
+    template <typename rnumber, field_backend be, field_components fc>
+    void completeLoopWithExtraField(
+            const real_number dt,
+            const field<rnumber, be, fc>& in_field) {
+        static_assert((fc == THREE) || (fc == THREExTHREE), "only THREE or THREExTHREE is supported for now");
+        if (fc == THREE)
+        {
+            std::unique_ptr<real_number[]> extra_rhs(new real_number[getLocalNbParticles()*3]());
+            std::fill_n(extra_rhs.get(), 3*getLocalNbParticles(), 0);
+            sample_compute_field(in_field, extra_rhs.get());
+            completeLoopWithVorticity(dt, extra_rhs.get());
+        }
+        else if (fc == THREExTHREE)
+        {
+            std::unique_ptr<real_number[]> extra_rhs(new real_number[getLocalNbParticles()*9]());
+            std::fill_n(extra_rhs.get(), 9*getLocalNbParticles(), 0);
+            sample_compute_field(in_field, extra_rhs.get());
+            completeLoopWithVelocityGradient(dt, extra_rhs.get());
+        }
+    }
+
+    virtual int setParticleFileLayout(std::vector<hsize_t>) = 0;
+    virtual std::vector<hsize_t> getParticleFileLayout() = 0;
+};
+
+#endif
diff --git a/bfps/cpp/particles/alltoall_exchanger.hpp b/cpp/particles/alltoall_exchanger.hpp
similarity index 79%
rename from bfps/cpp/particles/alltoall_exchanger.hpp
rename to cpp/particles/alltoall_exchanger.hpp
index 2beaf092e8e6c7a801efd492270d29c2d4dba398..d3423523d9b9d02347514972c3bcb3f92129df56 100644
--- a/bfps/cpp/particles/alltoall_exchanger.hpp
+++ b/cpp/particles/alltoall_exchanger.hpp
@@ -1,3 +1,28 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
 #ifndef ALLTOALL_EXCHANGER_HPP
 #define ALLTOALL_EXCHANGER_HPP
 
diff --git a/bfps/cpp/particles/env_utils.hpp b/cpp/particles/env_utils.hpp
similarity index 59%
rename from bfps/cpp/particles/env_utils.hpp
rename to cpp/particles/env_utils.hpp
index cd6fb3026ac19397fb525235f3d4f87e2cc2bb94..829fd5b46f879c4485276d3f3866b8ae3d81e8d5 100644
--- a/bfps/cpp/particles/env_utils.hpp
+++ b/cpp/particles/env_utils.hpp
@@ -1,3 +1,28 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
 #ifndef ENV_UTILS_HPP
 #define ENV_UTILS_HPP
 
diff --git a/cpp/particles/lock_free_bool_array.hpp b/cpp/particles/lock_free_bool_array.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..5e32a7d41bec3ddc7d56962d14d338a78f2b084a
--- /dev/null
+++ b/cpp/particles/lock_free_bool_array.hpp
@@ -0,0 +1,58 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#ifndef LOCK_FREE_BOOL_ARRAY_HPP
+#define LOCK_FREE_BOOL_ARRAY_HPP
+
+#include <vector>
+#include <memory>
+
+class lock_free_bool_array{
+    std::vector<std::unique_ptr<long int>> keys;
+
+public:
+    explicit lock_free_bool_array(const long int inNbKeys = 512){
+        keys.resize(inNbKeys);
+        for(std::unique_ptr<long int>& k : keys){
+            k.reset(new long int(0));
+        }
+    }
+
+    void lock(const long int inKey){
+        volatile long int* k = keys[inKey%keys.size()].get();
+        long int res = 1;
+        while(res == 1){
+            res = __sync_val_compare_and_swap(k, 0, res);
+        }
+    }
+
+    void unlock(const long int inKey){
+        volatile long int* k = keys[inKey%keys.size()].get();
+        assert(k && *k);
+        (*k) = 0;
+    }
+};
+
+#endif
diff --git a/cpp/particles/p2p_computer.hpp b/cpp/particles/p2p_computer.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..74d9c9ebeff2e61864fe5e827f103d1691709e4d
--- /dev/null
+++ b/cpp/particles/p2p_computer.hpp
@@ -0,0 +1,110 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#ifndef P2P_COMPUTER_HPP
+#define P2P_COMPUTER_HPP
+
+#include <cstring>
+#include <cassert>
+
+template <class real_number, class partsize_t>
+class p2p_computer{
+
+    bool isActive;
+
+    /** \brief A simple distance weighting function.
+     *
+     *  This function returns 1 if a distance is smaller than a cut-off length,
+     *  i.e. particle 1 interacts with particle 2 if particle 2 is inside a
+     *  sphere of radius `cutoff' centered on particle 1.
+     */
+    static double dumb_distance_weight(
+            const double dist_pow2,
+            const double cutoff){
+        // this function should only be called for interacting particles,
+        // and particles interact if they are closer than cutoff.
+        assert(dist_pow2 < cutoff*cutoff);
+        return 1.0;
+    }
+
+
+public:
+    p2p_computer() : isActive(true){}
+
+    template <int size_particle_rhs>
+    void init_result_array(real_number rhs[], const partsize_t nbParticles) const{
+        memset(rhs, 0, sizeof(real_number)*nbParticles*size_particle_rhs);
+    }
+
+    template <int size_particle_rhs>
+    void reduce_particles_rhs(real_number rhs_dst[], const real_number rhs_src[], const partsize_t nbParticles) const{
+        static_assert(size_particle_rhs == 6, "This kernel works only with 6 values per particle's rhs");
+        for(int idx_part = 0 ; idx_part < nbParticles ; ++idx_part){
+            // We merge only the values modified by the current kernel (3-5)
+            for(int idx_rhs = 3 ; idx_rhs < size_particle_rhs ; ++idx_rhs){
+                rhs_dst[idx_part*size_particle_rhs+idx_rhs] += rhs_src[idx_part*size_particle_rhs+idx_rhs];
+            }
+        }
+    }
+
+    template <int size_particle_positions, int size_particle_rhs>
+    void compute_interaction(const real_number pos_part1[], real_number rhs_part1[],
+                             const real_number pos_part2[], real_number rhs_part2[],
+                             const real_number dist_pow2, const real_number cutoff,
+                             const real_number /*xshift_coef*/, const real_number /*yshift_coef*/, const real_number /*zshift_coef*/) const{
+        static_assert(size_particle_positions == 6, "This kernel works only with 6 values for one particle's position+orientation");
+        static_assert(size_particle_rhs == 6, "This kernel works only with 6 values per particle's rhs");
+
+        // TODO: a reasonable way of choosing between different distance_weight functions should be thought of.
+        // We need to ask Michael about how flexible this distance_weight needs to be.
+        const double ww = dumb_distance_weight(dist_pow2, cutoff);
+        ///
+        /// term in equation is:
+        ///
+        /// \f[
+        ///     (4 / \tau) \sum_j W_\ell ( | x^i - x^j | ) (p^i \cdot p^j)p^j
+        /// \f]
+        ///
+        const double dot_product = (pos_part1[3+IDXC_X]*pos_part2[3+IDXC_X] +
+                              pos_part1[3+IDXC_Y]*pos_part2[3+IDXC_Y] +
+                              pos_part1[3+IDXC_Z]*pos_part2[3+IDXC_Z]);
+        rhs_part1[3+IDXC_X] += pos_part2[3+IDXC_X] * 4 * ww * dot_product;
+        rhs_part1[3+IDXC_Y] += pos_part2[3+IDXC_Y] * 4 * ww * dot_product;
+        rhs_part1[3+IDXC_Z] += pos_part2[3+IDXC_Z] * 4 * ww * dot_product;
+        rhs_part2[3+IDXC_X] += pos_part1[3+IDXC_X] * 4 * ww * dot_product;
+        rhs_part2[3+IDXC_Y] += pos_part1[3+IDXC_Y] * 4 * ww * dot_product;
+        rhs_part2[3+IDXC_Z] += pos_part1[3+IDXC_Z] * 4 * ww * dot_product;
+    }
+
+    bool isEnable() const {
+        return isActive;
+    }
+
+    void setEnable(const bool inIsActive) {
+        isActive = inIsActive;
+    }
+};
+
+#endif
diff --git a/cpp/particles/p2p_computer_empty.hpp b/cpp/particles/p2p_computer_empty.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..0599dc1abb08207fcb761a534c282f8fceda5ce3
--- /dev/null
+++ b/cpp/particles/p2p_computer_empty.hpp
@@ -0,0 +1,54 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#ifndef P2P_COMPUTER_EMPTY_HPP
+#define P2P_COMPUTER_EMPTY_HPP
+
+#include <cstring>
+
+template <class real_number, class partsize_t>
+class p2p_computer_empty{
+public:
+    template <int size_particle_rhs>
+    void init_result_array(real_number /*rhs*/[], const partsize_t /*nbParticles*/) const{
+    }
+
+    template <int size_particle_rhs>
+    void reduce_particles_rhs(real_number /*rhs_dst*/[], const real_number /*rhs_src*/[], const partsize_t /*nbParticles*/) const{
+    }
+
+    template <int size_particle_positions, int size_particle_rhs>
+    void compute_interaction(const real_number /*pos_part1*/[], real_number /*rhs_part1*/[],
+                             const real_number /*pos_part2*/[], real_number /*rhs_part2*/[],
+                             const real_number /*dist_pow2*/,  const real_number /*cutoff*/,
+                             const real_number /*xshift_coef*/, const real_number /*yshift_coef*/, const real_number /*zshift_coef*/) const{
+    }
+
+    constexpr static bool isEnable() {
+        return false;
+    }
+};
+
+#endif
diff --git a/cpp/particles/p2p_distr_mpi.hpp b/cpp/particles/p2p_distr_mpi.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..7ab3a8b36722b8aa03ec9f4c070a68aa1fbe1776
--- /dev/null
+++ b/cpp/particles/p2p_distr_mpi.hpp
@@ -0,0 +1,784 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#ifndef P2P_DISTR_MPI_HPP
+#define P2P_DISTR_MPI_HPP
+
+#include <mpi.h>
+
+#include <vector>
+#include <memory>
+#include <cassert>
+
+#include <type_traits>
+#include <omp.h>
+#include <algorithm>
+
+#include "scope_timer.hpp"
+#include "particles_utils.hpp"
+#include "p2p_tree.hpp"
+#include "lock_free_bool_array.hpp"
+
+template <class partsize_t, class real_number>
+class p2p_distr_mpi {
+protected:
+    static const int MaxNbRhs = 10;
+
+    enum MpiTag{
+        TAG_NB_PARTICLES,
+        TAG_POSITION_PARTICLES,
+        TAG_RESULT_PARTICLES,
+    };
+
+    struct NeighborDescriptor{
+        partsize_t nbParticlesToExchange;
+        int destProc;
+        int nbLevelsToExchange;
+        bool isRecv;
+
+        std::unique_ptr<real_number[]> toRecvAndMerge;
+        std::unique_ptr<real_number[]> toCompute;
+        std::unique_ptr<real_number[]> results;
+    };
+
+    enum Action{
+        NOTHING_TODO = 512,
+        RECV_PARTICLES,
+        COMPUTE_PARTICLES,
+        RELEASE_BUFFER_PARTICLES,
+        MERGE_PARTICLES
+    };
+
+    MPI_Comm current_com;
+
+    int my_rank;
+    int nb_processes;
+    int nb_processes_involved;
+
+    const std::pair<int,int> current_partition_interval;
+    const int current_partition_size;
+    const std::array<size_t,3> field_grid_dim;
+
+    std::unique_ptr<int[]> partition_interval_size_per_proc;
+    std::unique_ptr<int[]> partition_interval_offset_per_proc;
+
+    std::unique_ptr<partsize_t[]> current_offset_particles_for_partition;
+
+    std::vector<std::pair<Action,int>> whatNext;
+    std::vector<MPI_Request> mpiRequests;
+    std::vector<NeighborDescriptor> neigDescriptors;
+
+    std::array<real_number,3> spatial_box_width;
+    std::array<real_number,3> spatial_box_offset;
+
+    const real_number cutoff_radius_compute;
+    const int nb_cells_factor;
+    const real_number cutoff_radius;
+    std::array<long int,3> nb_cell_levels;
+
+    template <class DataType, int sizeElement>
+    static void permute_copy(const partsize_t offsetIdx, const partsize_t nbElements,
+                             const std::pair<long int,partsize_t> permutation[],
+                             DataType data[], std::vector<unsigned char>* buffer){
+        buffer->resize(nbElements*sizeof(DataType)*sizeElement);
+        DataType* dataBuffer = reinterpret_cast<DataType*>(buffer->data());
+
+        // Permute
+        for(partsize_t idxPart = 0 ; idxPart < nbElements ; ++idxPart){
+            const partsize_t srcData = permutation[idxPart].second;
+            const partsize_t destData = idxPart;
+            for(int idxVal = 0 ; idxVal < sizeElement ; ++idxVal){
+                dataBuffer[destData*sizeElement + idxVal]
+                        = data[srcData*sizeElement + idxVal];
+            }
+        }
+
+        // Copy back
+        for(partsize_t idxPart = 0 ; idxPart < nbElements ; ++idxPart){
+            const partsize_t srcData = idxPart;
+            const partsize_t destData = idxPart+offsetIdx;
+            for(int idxVal = 0 ; idxVal < sizeElement ; ++idxVal){
+                data[destData*sizeElement + idxVal]
+                        = dataBuffer[srcData*sizeElement + idxVal];
+            }
+        }
+    }
+
+    static int foundGridFactor(const real_number in_cutoff_radius, const std::array<real_number,3>& in_spatial_box_width){
+        int idx_factor = 1;
+        while(in_cutoff_radius <= in_spatial_box_width[IDXC_Z]/real_number(idx_factor+1)){
+            idx_factor += 1;
+        }
+        return idx_factor;
+    }
+
+public:
+    ////////////////////////////////////////////////////////////////////////////
+
+    p2p_distr_mpi(MPI_Comm in_current_com,
+                     const std::pair<int,int>& in_current_partitions,
+                     const std::array<size_t,3>& in_field_grid_dim,
+                     const std::array<real_number,3>& in_spatial_box_width,
+                     const std::array<real_number,3>& in_spatial_box_offset,
+                     const real_number in_cutoff_radius)
+        : current_com(in_current_com),
+            my_rank(-1), nb_processes(-1),nb_processes_involved(-1),
+            current_partition_interval(in_current_partitions),
+            current_partition_size(current_partition_interval.second-current_partition_interval.first),
+            field_grid_dim(in_field_grid_dim),
+            spatial_box_width(in_spatial_box_width), spatial_box_offset(in_spatial_box_offset),
+            cutoff_radius_compute(in_cutoff_radius),
+            nb_cells_factor(foundGridFactor(in_cutoff_radius, in_spatial_box_width)),
+            cutoff_radius(in_spatial_box_width[IDXC_Z]/real_number(nb_cells_factor)){
+
+        AssertMpi(MPI_Comm_rank(current_com, &my_rank));
+        AssertMpi(MPI_Comm_size(current_com, &nb_processes));
+
+        partition_interval_size_per_proc.reset(new int[nb_processes]);
+        AssertMpi( MPI_Allgather( const_cast<int*>(&current_partition_size), 1, MPI_INT,
+                                  partition_interval_size_per_proc.get(), 1, MPI_INT,
+                                  current_com) );
+        assert(partition_interval_size_per_proc[my_rank] == current_partition_size);
+
+        partition_interval_offset_per_proc.reset(new int[nb_processes+1]);
+        partition_interval_offset_per_proc[0] = 0;
+        for(int idxProc = 0 ; idxProc < nb_processes ; ++idxProc){
+            partition_interval_offset_per_proc[idxProc+1] = partition_interval_offset_per_proc[idxProc] + partition_interval_size_per_proc[idxProc];
+        }
+
+        current_offset_particles_for_partition.reset(new partsize_t[current_partition_size+1]);
+
+        nb_processes_involved = nb_processes;
+        while(nb_processes_involved != 0 && partition_interval_size_per_proc[nb_processes_involved-1] == 0){
+            nb_processes_involved -= 1;
+        }
+        assert(nb_processes_involved != 0);
+        for(int idx_proc_involved = 0 ; idx_proc_involved < nb_processes_involved ; ++idx_proc_involved){
+            assert(partition_interval_size_per_proc[idx_proc_involved] != 0);
+        }
+
+        assert(int(field_grid_dim[IDXC_Z]) == partition_interval_offset_per_proc[nb_processes_involved]);
+
+        nb_cell_levels[IDXC_X] = nb_cells_factor;
+        nb_cell_levels[IDXC_Y] = nb_cells_factor;
+        nb_cell_levels[IDXC_Z] = nb_cells_factor;
+    }
+
+    virtual ~p2p_distr_mpi(){}
+
+    ////////////////////////////////////////////////////////////////////////////
+
+    int getGridFactor() const{
+        return nb_cells_factor;
+    }
+
+    real_number getGridCutoff() const{
+        return cutoff_radius;
+    }
+
+    long int get_cell_coord_x_from_index(const long int index) const{
+        return index % nb_cell_levels[IDXC_X];
+    }
+
+    long int get_cell_coord_y_from_index(const long int index) const{
+        return (index % (nb_cell_levels[IDXC_X]*nb_cell_levels[IDXC_Y]))
+                / nb_cell_levels[IDXC_X];
+    }
+
+    long int get_cell_coord_z_from_index(const long int index) const{
+        return index / (nb_cell_levels[IDXC_X]*nb_cell_levels[IDXC_Y]);
+    }
+
+    long int first_cell_level_proc(const int dest_proc) const{
+        const real_number field_section_width_z = spatial_box_width[IDXC_Z]/real_number(field_grid_dim[IDXC_Z]);
+        return static_cast<long int>((field_section_width_z*real_number(partition_interval_offset_per_proc[dest_proc]))/cutoff_radius);
+    }
+
+    long int last_cell_level_proc(const int dest_proc) const{
+        const real_number field_section_width_z = spatial_box_width[IDXC_Z]/real_number(field_grid_dim[IDXC_Z]);
+        const long int limite = static_cast<long int>((field_section_width_z*real_number(partition_interval_offset_per_proc[dest_proc+1])
+                                     - std::numeric_limits<real_number>::epsilon())/cutoff_radius);
+        if(static_cast<real_number>(limite)*cutoff_radius
+                == field_section_width_z*real_number(partition_interval_offset_per_proc[dest_proc+1])){
+            return limite-1;
+        }
+        return limite;
+    }
+
+    real_number apply_pbc(real_number pos, IDX_COMPONENT_3D dim) const{
+        while( pos < spatial_box_offset[dim] ){
+            pos += spatial_box_width[dim];
+        }
+        while( spatial_box_width[dim]+spatial_box_offset[dim] <= pos){
+            pos -= spatial_box_width[dim];
+        }
+        return pos;
+    }
+
+    std::array<long int,3> get_cell_coordinate(const real_number pos_x, const real_number pos_y,
+                                               const real_number pos_z) const {
+        const real_number diff_x = apply_pbc(pos_x,IDXC_X) - spatial_box_offset[IDXC_X];
+        const real_number diff_y = apply_pbc(pos_y,IDXC_Y) - spatial_box_offset[IDXC_Y];
+        const real_number diff_z = apply_pbc(pos_z,IDXC_Z) - spatial_box_offset[IDXC_Z];
+        std::array<long int,3> coord;
+        coord[IDXC_X] = static_cast<long int>(diff_x/cutoff_radius);
+        coord[IDXC_Y] = static_cast<long int>(diff_y/cutoff_radius);
+        coord[IDXC_Z] = static_cast<long int>(diff_z/cutoff_radius);
+        return coord;
+    }
+
+    long int get_cell_idx(const real_number pos_x, const real_number pos_y,
+                          const real_number pos_z) const {
+        std::array<long int,3> coord = get_cell_coordinate(pos_x, pos_y, pos_z);
+        return ((coord[IDXC_Z]*nb_cell_levels[IDXC_Y])+coord[IDXC_Y])*nb_cell_levels[IDXC_X]+coord[IDXC_X];
+    }
+
+    real_number compute_distance_r2(const real_number x1, const real_number y1, const real_number z1,
+                                    const real_number x2, const real_number y2, const real_number z2,
+                                    const real_number xshift_coef, const real_number yshift_coef, const real_number zshift_coef) const {
+        real_number diff_x = std::abs(apply_pbc(x1,IDXC_X)-apply_pbc(x2,IDXC_X)+xshift_coef*spatial_box_width[IDXC_X]);
+        assert(diff_x <= 2*cutoff_radius);
+
+        real_number diff_y = std::abs(apply_pbc(y1,IDXC_X)-apply_pbc(y2,IDXC_X)+yshift_coef*spatial_box_width[IDXC_Y]);
+        assert(diff_y <= 2*cutoff_radius);
+
+        real_number diff_z = std::abs(apply_pbc(z1,IDXC_X)-apply_pbc(z2,IDXC_X)+zshift_coef*spatial_box_width[IDXC_Z]);
+        assert(diff_z <= 2*cutoff_radius);
+
+        return (diff_x*diff_x) + (diff_y*diff_y) + (diff_z*diff_z);
+    }
+
+    template <class computer_class, int size_particle_positions, int size_particle_rhs>
+    void compute_distr(computer_class& in_computer,
+                       const partsize_t current_my_nb_particles_per_partition[],
+                       real_number particles_positions[],
+                       real_number particles_current_rhs[],
+                       partsize_t inout_index_particles[]){
+        TIMEZONE("compute_distr");
+
+        // Some processes might not be involved
+        if(nb_processes_involved <= my_rank){
+            return;
+        }
+
+        const long int my_top_z_cell_level = last_cell_level_proc(my_rank);
+        const long int my_down_z_cell_level = first_cell_level_proc(my_rank);
+        const long int my_nb_cell_levels = 1+my_top_z_cell_level-my_down_z_cell_level;
+
+        current_offset_particles_for_partition[0] = 0;
+        partsize_t myTotalNbParticles = 0;
+        for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){
+            myTotalNbParticles += current_my_nb_particles_per_partition[idxPartition];
+            current_offset_particles_for_partition[idxPartition+1] = current_offset_particles_for_partition[idxPartition] + current_my_nb_particles_per_partition[idxPartition];
+        }
+
+        // Compute box idx for each particle
+        std::unique_ptr<long int[]> particles_coord(new long int[current_offset_particles_for_partition[current_partition_size]]);
+
+        {
+            for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){
+                #pragma omp parallel for schedule(static)
+                for(partsize_t idxPart = current_offset_particles_for_partition[idxPartition] ; idxPart < current_offset_particles_for_partition[idxPartition+1] ; ++idxPart ){
+                    particles_coord[idxPart] = get_cell_idx(particles_positions[(idxPart)*size_particle_positions + IDXC_X],
+                                                                              particles_positions[(idxPart)*size_particle_positions + IDXC_Y],
+                                                                              particles_positions[(idxPart)*size_particle_positions + IDXC_Z]);
+                    assert(my_down_z_cell_level <= get_cell_coord_z_from_index(particles_coord[idxPart]));
+                    assert(get_cell_coord_z_from_index(particles_coord[idxPart]) <= my_top_z_cell_level);
+                }
+            }
+
+            std::vector<std::pair<long int,partsize_t>> part_to_sort;
+
+            // Sort each partition in cells
+            for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){
+                part_to_sort.clear();
+
+                for(partsize_t idxPart = current_offset_particles_for_partition[idxPartition] ; idxPart < current_offset_particles_for_partition[idxPartition+1] ; ++idxPart ){
+                    part_to_sort.emplace_back();
+                    part_to_sort.back().first = particles_coord[idxPart];
+                    part_to_sort.back().second = idxPart;
+                }
+
+                assert(partsize_t(part_to_sort.size()) == (current_my_nb_particles_per_partition[idxPartition]));
+
+                std::sort(part_to_sort.begin(), part_to_sort.end(),
+                          [](const std::pair<long int,partsize_t>& p1,
+                             const std::pair<long int,partsize_t>& p2){
+                    return p1.first < p2.first;
+                });
+
+                // Permute array using buffer
+                std::vector<unsigned char> buffer;
+                permute_copy<real_number, size_particle_positions>(current_offset_particles_for_partition[idxPartition],
+                                                                   current_my_nb_particles_per_partition[idxPartition],
+                                                                   part_to_sort.data(), particles_positions, &buffer);
+                permute_copy<real_number, size_particle_rhs>(current_offset_particles_for_partition[idxPartition],
+                                                             current_my_nb_particles_per_partition[idxPartition],
+                                                             part_to_sort.data(), particles_current_rhs, &buffer);
+                permute_copy<partsize_t, 1>(current_offset_particles_for_partition[idxPartition],
+                                            current_my_nb_particles_per_partition[idxPartition],
+                                            part_to_sort.data(), inout_index_particles, &buffer);
+                permute_copy<long int, 1>(current_offset_particles_for_partition[idxPartition],
+                                            current_my_nb_particles_per_partition[idxPartition],
+                                            part_to_sort.data(), particles_coord.get(), &buffer);
+            }
+        }
+
+        // Build the tree
+        p2p_tree<std::vector<std::pair<partsize_t,partsize_t>>> my_tree(nb_cell_levels);
+
+        for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){
+            long int current_cell_idx = -1;
+            partsize_t current_nb_particles_in_cell = 0;
+            partsize_t current_cell_offset = 0;
+
+            for(partsize_t idx_part = current_offset_particles_for_partition[idxPartition] ;
+                            idx_part != current_offset_particles_for_partition[idxPartition+1]; ++idx_part){
+                if(particles_coord[idx_part] != current_cell_idx){
+                    if(current_nb_particles_in_cell){
+                        my_tree.getCell(current_cell_idx).emplace_back(current_cell_offset,current_nb_particles_in_cell);
+                    }
+                    current_cell_idx = particles_coord[idx_part];
+                    current_nb_particles_in_cell = 1;
+                    current_cell_offset = idx_part;
+                }
+                else{
+                    current_nb_particles_in_cell += 1;
+                }
+            }
+            if(current_nb_particles_in_cell){
+                my_tree.getCell(current_cell_idx).emplace_back(current_cell_offset,current_nb_particles_in_cell);
+
+            }
+        }
+
+        // Offset per cell layers
+        long int previous_index = 0;
+        variable_used_only_in_assert(previous_index);
+        std::unique_ptr<partsize_t[]> particles_offset_layers(new partsize_t[my_nb_cell_levels+1]());
+        for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){
+            for(partsize_t idx_part = current_offset_particles_for_partition[idxPartition] ;
+                            idx_part != current_offset_particles_for_partition[idxPartition+1]; ++idx_part){
+                const long int part_box_z_index = get_cell_coord_z_from_index(particles_coord[idx_part]);
+                assert(my_down_z_cell_level <= part_box_z_index);
+                assert(part_box_z_index <= my_top_z_cell_level);
+                particles_offset_layers[part_box_z_index+1-my_down_z_cell_level] += 1;
+                assert(previous_index <= part_box_z_index);
+                previous_index = part_box_z_index;
+            }
+        }
+        for(long int idx_layer = 0 ; idx_layer < my_nb_cell_levels ; ++idx_layer){
+            particles_offset_layers[idx_layer+1] += particles_offset_layers[idx_layer];
+        }
+
+        // Reset vectors
+        assert(whatNext.size() == 0);
+        assert(mpiRequests.size() == 0);
+        neigDescriptors.clear();
+
+        // Find process with at least one neighbor
+        {
+            int dest_proc = (my_rank+1)%nb_processes_involved;
+            while(dest_proc != my_rank
+                  && (my_top_z_cell_level == first_cell_level_proc(dest_proc)
+                      || (my_top_z_cell_level+1)%nb_cell_levels[IDXC_Z] == first_cell_level_proc(dest_proc))){
+                // Find if we have to send 1 or 2 cell levels
+                int nb_levels_to_send = 1;
+                if(my_nb_cell_levels > 1 // I have more than one level
+                        && (my_top_z_cell_level-1+2)%nb_cell_levels[IDXC_Z] <= last_cell_level_proc(dest_proc)){
+                    nb_levels_to_send += 1;
+                }
+
+                NeighborDescriptor descriptor;
+                descriptor.destProc = dest_proc;
+                descriptor.nbLevelsToExchange = nb_levels_to_send;
+                descriptor.nbParticlesToExchange = particles_offset_layers[my_nb_cell_levels] - particles_offset_layers[my_nb_cell_levels-nb_levels_to_send];
+                descriptor.isRecv = false;
+
+                neigDescriptors.emplace_back(std::move(descriptor));
+
+                dest_proc = (dest_proc+1)%nb_processes_involved;
+            }
+
+            int src_proc = (my_rank-1+nb_processes_involved)%nb_processes_involved;
+            while(src_proc != my_rank
+                  && (last_cell_level_proc(src_proc) == my_down_z_cell_level
+                      || (last_cell_level_proc(src_proc)+1)%nb_cell_levels[IDXC_Z] == my_down_z_cell_level)){
+                // Find if we have to send 1 or 2 cell levels
+                int nb_levels_to_recv = 1;
+                if(my_nb_cell_levels > 1 // I have more than one level
+                        && first_cell_level_proc(src_proc) <= (my_down_z_cell_level-1+2)%nb_cell_levels[IDXC_Z]){
+                    nb_levels_to_recv += 1;
+                }
+
+                NeighborDescriptor descriptor;
+                descriptor.destProc = src_proc;
+                descriptor.nbLevelsToExchange = nb_levels_to_recv;
+                descriptor.nbParticlesToExchange = -1;
+                descriptor.isRecv = true;
+
+                neigDescriptors.emplace_back(std::move(descriptor));
+
+                src_proc = (src_proc-1+nb_processes_involved)%nb_processes_involved;
+            }
+        }
+
+        //////////////////////////////////////////////////////////////////////
+        /// Exchange the number of particles in each partition
+        /// Could involve only here but I do not think it will be a problem
+        //////////////////////////////////////////////////////////////////////
+
+        assert(whatNext.size() == 0);
+        assert(mpiRequests.size() == 0);
+#ifndef NDEBUG // Just for assertion
+        std::vector<int> willsend(nb_processes_involved, 0);
+        std::vector<int> willrecv(nb_processes_involved, 0);
+#endif
+
+        for(int idxDescr = 0 ; idxDescr < int(neigDescriptors.size()) ; ++idxDescr){
+            NeighborDescriptor& descriptor = neigDescriptors[idxDescr];
+
+            if(descriptor.isRecv == false){
+                whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1});
+                mpiRequests.emplace_back();
+                AssertMpi(MPI_Isend(const_cast<partsize_t*>(&descriptor.nbParticlesToExchange),
+                                    1, particles_utils::GetMpiType(partsize_t()),
+                                    descriptor.destProc, TAG_NB_PARTICLES,
+                                    current_com, &mpiRequests.back()));
+#ifndef NDEBUG // Just for assertion
+                willsend[descriptor.destProc] += 1;
+#endif
+                if(descriptor.nbParticlesToExchange){
+                    whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1});
+                    mpiRequests.emplace_back();
+                    assert(descriptor.nbParticlesToExchange*size_particle_positions < std::numeric_limits<int>::max());
+                    AssertMpi(MPI_Isend(const_cast<real_number*>(&particles_positions[particles_offset_layers[my_nb_cell_levels-descriptor.nbLevelsToExchange]*size_particle_positions]),
+                              int(descriptor.nbParticlesToExchange*size_particle_positions), particles_utils::GetMpiType(real_number()),
+                              descriptor.destProc, TAG_POSITION_PARTICLES,
+                              current_com, &mpiRequests.back()));
+
+                    assert(descriptor.toRecvAndMerge == nullptr);
+                    descriptor.toRecvAndMerge.reset(new real_number[descriptor.nbParticlesToExchange*size_particle_rhs]);
+                    whatNext.emplace_back(std::pair<Action,int>{MERGE_PARTICLES, idxDescr});
+                    mpiRequests.emplace_back();
+                    assert(descriptor.nbParticlesToExchange*size_particle_rhs < std::numeric_limits<int>::max());
+                    AssertMpi(MPI_Irecv(descriptor.toRecvAndMerge.get(), int(descriptor.nbParticlesToExchange*size_particle_rhs),
+                                        particles_utils::GetMpiType(real_number()), descriptor.destProc, TAG_RESULT_PARTICLES,
+                                        current_com, &mpiRequests.back()));
+                }
+            }
+            else{
+#ifndef NDEBUG // Just for assertion
+                willrecv[descriptor.destProc] += 1;
+#endif
+                whatNext.emplace_back(std::pair<Action,int>{RECV_PARTICLES, idxDescr});
+                mpiRequests.emplace_back();
+                AssertMpi(MPI_Irecv(&descriptor.nbParticlesToExchange,
+                      1, particles_utils::GetMpiType(partsize_t()), descriptor.destProc, TAG_NB_PARTICLES,
+                      current_com, &mpiRequests.back()));
+            }
+        }
+
+#ifndef NDEBUG // Just for assertion
+        {
+            if(myrank == 0){
+                std::vector<int> willsendall(nb_processes_involved*nb_processes_involved, 0);// TODO debug
+                std::vector<int> willrecvall(nb_processes_involved*nb_processes_involved, 0);// TODO debug
+
+                MPI_Gather(willrecv.data(), nb_processes_involved, MPI_INT, willrecvall.data(),
+                            nb_processes_involved, MPI_INT, 0, MPI_COMM_WORLD);
+                MPI_Gather(willsend.data(), nb_processes_involved, MPI_INT, willsendall.data(),
+                            nb_processes_involved, MPI_INT, 0, MPI_COMM_WORLD);
+
+                for(int idxproc = 0 ; idxproc < nb_processes_involved ; ++idxproc){
+                    for(int idxtest = 0 ; idxtest < nb_processes_involved ; ++idxtest){
+                        assert(willsendall[idxproc*nb_processes_involved + idxtest]
+                                == willrecvall[idxtest*nb_processes_involved + idxproc]);
+                    }
+                }
+            }
+            else{
+                MPI_Gather(willrecv.data(), nb_processes_involved, MPI_INT, nullptr,
+                            0, MPI_INT, 0, MPI_COMM_WORLD);
+                MPI_Gather(willsend.data(), nb_processes_involved, MPI_INT, nullptr,
+                            0, MPI_INT, 0, MPI_COMM_WORLD);
+            }
+        }
+#endif
+
+        lock_free_bool_array cells_locker(512);
+
+        TIMEZONE_OMP_INIT_PREPARALLEL(omp_get_max_threads())
+        #pragma omp parallel default(shared)
+        {
+            #pragma omp master
+            {
+                while(mpiRequests.size()){
+                    TIMEZONE("wait-loop");
+                    assert(mpiRequests.size() == whatNext.size());
+
+                    int idxDone = int(mpiRequests.size());
+                    {
+                        TIMEZONE("wait");
+                        AssertMpi(MPI_Waitany(int(mpiRequests.size()), mpiRequests.data(), &idxDone, MPI_STATUSES_IGNORE));
+                    }
+                    const std::pair<Action, int> releasedAction = whatNext[idxDone];
+                    std::swap(mpiRequests[idxDone], mpiRequests[mpiRequests.size()-1]);
+                    std::swap(whatNext[idxDone], whatNext[mpiRequests.size()-1]);
+                    mpiRequests.pop_back();
+                    whatNext.pop_back();
+
+                    //////////////////////////////////////////////////////////////////////
+                    /// Data to exchange particles
+                    //////////////////////////////////////////////////////////////////////
+                    if(releasedAction.first == RECV_PARTICLES){
+                        TIMEZONE("post-recv-particles");
+                        NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second];
+                        assert(descriptor.isRecv);
+                        const int destProc = descriptor.destProc;
+                        const partsize_t NbParticlesToReceive = descriptor.nbParticlesToExchange;
+                        assert(NbParticlesToReceive != -1);
+                        assert(descriptor.toCompute == nullptr);
+
+                        if(NbParticlesToReceive){
+                            descriptor.toCompute.reset(new real_number[NbParticlesToReceive*size_particle_positions]);
+                            whatNext.emplace_back(std::pair<Action,int>{COMPUTE_PARTICLES, releasedAction.second});
+                            mpiRequests.emplace_back();
+                            assert(NbParticlesToReceive*size_particle_positions < std::numeric_limits<int>::max());
+                            AssertMpi(MPI_Irecv(descriptor.toCompute.get(), int(NbParticlesToReceive*size_particle_positions),
+                                                particles_utils::GetMpiType(real_number()), destProc, TAG_POSITION_PARTICLES,
+                                                current_com, &mpiRequests.back()));
+                        }
+                    }
+
+                    //////////////////////////////////////////////////////////////////////
+                    /// Computation
+                    //////////////////////////////////////////////////////////////////////
+                    if(releasedAction.first == COMPUTE_PARTICLES){
+                        TIMEZONE("compute-particles");
+                        NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second];
+                        assert(descriptor.isRecv);
+                        const partsize_t NbParticlesToReceive = descriptor.nbParticlesToExchange;
+
+                        assert(descriptor.toCompute != nullptr);
+                        descriptor.results.reset(new real_number[NbParticlesToReceive*size_particle_rhs]);
+                        in_computer.template init_result_array<size_particle_rhs>(descriptor.results.get(), NbParticlesToReceive);
+
+                        // Compute
+                        partsize_t idxPart = 0;
+                        while(idxPart != NbParticlesToReceive){
+                            const long int current_cell_idx = get_cell_idx(descriptor.toCompute[idxPart*size_particle_positions + IDXC_X],
+                                                                           descriptor.toCompute[idxPart*size_particle_positions + IDXC_Y],
+                                                                           descriptor.toCompute[idxPart*size_particle_positions + IDXC_Z]);
+                            partsize_t nb_parts_in_cell = 1;
+                            while(idxPart+nb_parts_in_cell != NbParticlesToReceive
+                                  && current_cell_idx == get_cell_idx(descriptor.toCompute[(idxPart+nb_parts_in_cell)*size_particle_positions + IDXC_X],
+                                                                     descriptor.toCompute[(idxPart+nb_parts_in_cell)*size_particle_positions + IDXC_Y],
+                                                                     descriptor.toCompute[(idxPart+nb_parts_in_cell)*size_particle_positions + IDXC_Z])){
+                                nb_parts_in_cell += 1;
+                            }
+
+                            #pragma omp task default(shared) firstprivate(idxPart, nb_parts_in_cell, current_cell_idx)
+                            {
+                                const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27];
+                                long int neighbors_indexes[27];
+                                std::array<real_number,3> shift[27];
+                                const int nbNeighbors = my_tree.getNeighbors(current_cell_idx, neighbors, neighbors_indexes, shift, true);
+
+                                // with other interval
+                                for(int idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){
+                                    cells_locker.lock(neighbors_indexes[idx_neighbor]);
+
+                                    for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){
+                                        for(partsize_t idx_p1 = 0 ; idx_p1 < nb_parts_in_cell ; ++idx_p1){
+                                            for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){
+                                                const real_number dist_r2 = compute_distance_r2(descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_X],
+                                                                                                descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_Y],
+                                                                                                descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_Z],
+                                                                                                particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_X],
+                                                                                                particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y],
+                                                                                                particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z],
+                                                                                                shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]);
+                                                if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
+                                                    in_computer.template compute_interaction<size_particle_positions, size_particle_rhs>(
+                                                                        &descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions],
+                                                                        &descriptor.results[(idxPart+idx_p1)*size_particle_rhs],
+                                                                        &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions],
+                                                                        &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs],
+                                                                        dist_r2, cutoff_radius_compute, shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]);
+                                                }
+                                            }
+                                        }
+                                    }
+
+                                    cells_locker.unlock(neighbors_indexes[idx_neighbor]);
+                                }
+                            }
+
+                            idxPart += nb_parts_in_cell;
+                        }
+
+                        #pragma omp taskwait
+
+                        // Send back
+                        const int destProc = descriptor.destProc;
+                        whatNext.emplace_back(std::pair<Action,int>{RELEASE_BUFFER_PARTICLES, releasedAction.second});
+                        mpiRequests.emplace_back();
+                        assert(NbParticlesToReceive*size_particle_rhs < std::numeric_limits<int>::max());
+                        AssertMpi(MPI_Isend(descriptor.results.get(), int(NbParticlesToReceive*size_particle_rhs),
+                                            particles_utils::GetMpiType(real_number()), destProc, TAG_RESULT_PARTICLES,
+                                            current_com, &mpiRequests.back()));
+                        delete[] descriptor.toCompute.release();
+                    }
+                    //////////////////////////////////////////////////////////////////////
+                    /// Release memory that was sent back
+                    //////////////////////////////////////////////////////////////////////
+                    if(releasedAction.first == RELEASE_BUFFER_PARTICLES){
+                        NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second];
+                        assert(descriptor.results != nullptr);
+                        assert(descriptor.isRecv);
+                        delete[] descriptor.results.release();
+                    }
+                    //////////////////////////////////////////////////////////////////////
+                    /// Merge
+                    //////////////////////////////////////////////////////////////////////
+                    if(releasedAction.first == MERGE_PARTICLES){
+                        TIMEZONE("merge");
+                        NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second];
+                        assert(descriptor.isRecv == false);
+                        assert(descriptor.toRecvAndMerge != nullptr);
+                        in_computer.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[particles_offset_layers[my_nb_cell_levels-descriptor.nbLevelsToExchange]*size_particle_rhs],
+                                descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToExchange);
+                        delete[] descriptor.toRecvAndMerge.release();
+                    }
+                }
+            }
+        }
+
+        assert(whatNext.size() == 0);
+        assert(mpiRequests.size() == 0);
+
+        // Compute self data
+        for(const auto& iter_cell : my_tree){
+            TIMEZONE("proceed-leaf");
+            const long int currenct_cell_idx = iter_cell.first;
+            const std::vector<std::pair<partsize_t,partsize_t>>* intervals_ptr = &iter_cell.second;
+
+#pragma omp task default(shared) firstprivate(currenct_cell_idx, intervals_ptr)
+            {
+                const std::vector<std::pair<partsize_t,partsize_t>>& intervals = (*intervals_ptr);
+
+                cells_locker.lock(currenct_cell_idx);
+
+                for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){
+                    // self interval
+                    for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
+                        for(partsize_t idx_p2 = idx_p1+1 ; idx_p2 < intervals[idx_1].second ; ++idx_p2){
+                            const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X],
+                                                                            particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y],
+                                                                            particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z],
+                                                                            particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_X],
+                                                                            particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_Y],
+                                                                            particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_Z],
+                                                                            0, 0, 0);
+                            if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
+                                in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>(
+                                                    &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
+                                                    &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
+                                                    &particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions],
+                                                    &particles_current_rhs[(intervals[idx_1].first+idx_p2)*size_particle_rhs],
+                                                    dist_r2, cutoff_radius_compute, 0, 0, 0);
+                            }
+                        }
+                    }
+
+                    // with other interval
+                    for(size_t idx_2 = idx_1+1 ; idx_2 < intervals.size() ; ++idx_2){
+                        for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
+                            for(partsize_t idx_p2 = 0 ; idx_p2 < intervals[idx_2].second ; ++idx_p2){
+                                const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X],
+                                                                                particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y],
+                                                                                particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z],
+                                                                                particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_X],
+                                                                                particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y],
+                                                                                particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z],
+                                                                                0, 0, 0);
+                                if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
+                                    in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>(
+                                                        &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
+                                                        &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
+                                                        &particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions],
+                                                        &particles_current_rhs[(intervals[idx_2].first+idx_p2)*size_particle_rhs],
+                                                        dist_r2, cutoff_radius_compute, 0, 0, 0);
+                                }
+                            }
+                        }
+                    }
+                }
+
+                const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27];
+                long int neighbors_indexes[27];
+                std::array<real_number,3> shift[27];
+                const int nbNeighbors = my_tree.getNeighbors(currenct_cell_idx, neighbors, neighbors_indexes, shift, false);
+
+                for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){
+                    // with other interval
+                    for(int idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){
+                        if(currenct_cell_idx < neighbors_indexes[idx_neighbor]){
+                            cells_locker.lock(neighbors_indexes[idx_neighbor]);
+
+                            for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){
+                                for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
+                                    for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){
+                                        const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X],
+                                                                                        particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y],
+                                                                                        particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z],
+                                                                                        particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_X],
+                                                                                        particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y],
+                                                                                        particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z],
+                                                                                        shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]);
+                                        if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
+                                            in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>(
+                                                                &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
+                                                                &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
+                                                                &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions],
+                                                                &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs],
+                                                                dist_r2, cutoff_radius_compute, shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]);
+                                        }
+                                    }
+                                }
+                            }
+                            cells_locker.unlock(neighbors_indexes[idx_neighbor]);
+                        }
+                    }
+                }
+
+                cells_locker.unlock(currenct_cell_idx);
+            }
+        }
+    }
+};
+
+#endif
diff --git a/cpp/particles/p2p_tree.hpp b/cpp/particles/p2p_tree.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..cdb3089174ff888cbfc13810d18c617b4a8358e7
--- /dev/null
+++ b/cpp/particles/p2p_tree.hpp
@@ -0,0 +1,153 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#ifndef P2P_TREE_HPP
+#define P2P_TREE_HPP
+
+#include <unordered_map>
+#include <vector>
+
+template <class CellClass>
+class p2p_tree{
+    std::unordered_map<long int, CellClass> data;
+    CellClass emptyCell;
+    std::array<long int,3> nb_cell_levels;
+
+    long int get_cell_coord_x_from_index(const long int index) const{
+        return index % nb_cell_levels[IDXC_X];
+    }
+
+    long int get_cell_coord_y_from_index(const long int index) const{
+        return (index % (nb_cell_levels[IDXC_X]*nb_cell_levels[IDXC_Y]))
+                / nb_cell_levels[IDXC_X];
+    }
+
+    long int get_cell_coord_z_from_index(const long int index) const{
+        return index / (nb_cell_levels[IDXC_X]*nb_cell_levels[IDXC_Y]);
+    }
+
+    long int get_cell_idx(const long int idx_x, const long int idx_y,
+                          const long int idx_z) const {
+        return (((idx_z*nb_cell_levels[IDXC_Y])+idx_y)*nb_cell_levels[IDXC_X])+idx_x;
+    }
+
+public:
+    explicit p2p_tree(std::array<long int,3> in_nb_cell_levels)
+        : nb_cell_levels(in_nb_cell_levels){
+    }
+
+    CellClass& getCell(const long int idx){
+        return data[idx];
+    }
+
+
+    const CellClass& getCell(const long int idx) const {
+        const auto& iter = data.find(idx);
+        if(iter != data.end()){
+            return iter->second;
+        }
+        return emptyCell;
+    }
+
+    template <class ShiftType>
+    int getNeighbors(const long int idx, const CellClass* output[27], long int output_indexes[27],
+                     std::array<ShiftType,3> shift[27], const bool include_target) const{
+        int nbNeighbors = 0;
+
+        std::fill_n(output, 27, nullptr);
+
+        const long int idx_x = get_cell_coord_x_from_index(idx);
+        const long int idx_y = get_cell_coord_y_from_index(idx);
+        const long int idx_z = get_cell_coord_z_from_index(idx);
+
+        for(long int neigh_x = -1 ; neigh_x <= 1 ; ++neigh_x){
+            long int neigh_x_pbc = neigh_x+idx_x;
+            ShiftType shift_x = 0;
+            if(neigh_x_pbc < 0){
+                neigh_x_pbc += nb_cell_levels[IDXC_X];
+                shift_x = 1;
+            }
+            else if(nb_cell_levels[IDXC_X] <= neigh_x_pbc){
+                neigh_x_pbc -= nb_cell_levels[IDXC_X];
+                shift_x = -1;
+            }
+
+            for(long int neigh_y = -1 ; neigh_y <= 1 ; ++neigh_y){
+                long int neigh_y_pbc = neigh_y+idx_y;
+                ShiftType shift_y = 0;
+                if(neigh_y_pbc < 0){
+                    neigh_y_pbc += nb_cell_levels[IDXC_Y];
+                    shift_y = 1;
+                }
+                else if(nb_cell_levels[IDXC_Y] <= neigh_y_pbc){
+                    neigh_y_pbc -= nb_cell_levels[IDXC_Y];
+                    shift_y = -1;
+                }
+
+                for(long int neigh_z = -1 ; neigh_z <= 1 ; ++neigh_z){
+                    long int neigh_z_pbc = neigh_z+idx_z;
+                    ShiftType shift_z = 0;
+                    if(neigh_z_pbc < 0){
+                        neigh_z_pbc += nb_cell_levels[IDXC_Z];
+                        shift_z = 1;
+                    }
+                    else if(nb_cell_levels[IDXC_Z] <= neigh_z_pbc){
+                        neigh_z_pbc -= nb_cell_levels[IDXC_Z];
+                        shift_z = -1;
+                    }
+
+                    if(include_target || neigh_x_pbc != idx_x || neigh_y_pbc != idx_y || neigh_z_pbc != idx_z){
+                        const long int idx_neigh = get_cell_idx(neigh_x_pbc,
+                                                                  neigh_y_pbc,
+                                                                  neigh_z_pbc);
+                        const auto& iter = data.find(idx_neigh);
+                        if(iter != data.end()){
+                            output[nbNeighbors] = &(iter->second);
+                            output_indexes[nbNeighbors] = idx_neigh;
+
+                            shift[nbNeighbors][IDXC_X] = shift_x;
+                            shift[nbNeighbors][IDXC_Y] = shift_y;
+                            shift[nbNeighbors][IDXC_Z] = shift_z;
+
+                            nbNeighbors += 1;
+                        }
+                    }
+                }
+            }
+        }
+
+        return nbNeighbors;
+    }
+
+    typename std::unordered_map<long int, CellClass>::iterator begin(){
+        return data.begin();
+    }
+
+    typename std::unordered_map<long int, CellClass>::iterator end(){
+        return data.end();
+    }
+};
+
+#endif
diff --git a/bfps/cpp/particles/particles_adams_bashforth.hpp b/cpp/particles/particles_adams_bashforth.hpp
similarity index 76%
rename from bfps/cpp/particles/particles_adams_bashforth.hpp
rename to cpp/particles/particles_adams_bashforth.hpp
index 2fb61462f7970d823acd6dc3405799e362fa15af..21412e3530408a5980c376453cd6f5199466d830 100644
--- a/bfps/cpp/particles/particles_adams_bashforth.hpp
+++ b/cpp/particles/particles_adams_bashforth.hpp
@@ -1,3 +1,28 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
 #ifndef PARTICLES_ADAMS_BASHFORTH_HPP
 #define PARTICLES_ADAMS_BASHFORTH_HPP
 
@@ -7,11 +32,10 @@
 #include "scope_timer.hpp"
 #include "particles_utils.hpp"
 
-template <class partsize_t, class real_number, int size_particle_positions = 3, int size_particle_rhs = 3>
-class particles_adams_bashforth {
-    static_assert(size_particle_positions == size_particle_rhs,
-                  "Not having the same dimension for positions and rhs looks like a bug,"
-                  "otherwise comment this assertion.");
+template <class partsize_t, class real_number, int size_particle_positions, int size_particle_rhs>
+class particles_adams_bashforth{
+    static_assert(size_particle_positions == size_particle_rhs, "This class is designed for the same number of values in positions and rhs");
+
 public:
     static const int Max_steps = 6;
 
diff --git a/bfps/cpp/particles/particles_distr_mpi.hpp b/cpp/particles/particles_distr_mpi.hpp
similarity index 88%
rename from bfps/cpp/particles/particles_distr_mpi.hpp
rename to cpp/particles/particles_distr_mpi.hpp
index 485595181f69b9fe1cf204b06df550a9ca74215d..43d61ca407af23e3cf3c3979d678af08cd7b5ff8 100644
--- a/bfps/cpp/particles/particles_distr_mpi.hpp
+++ b/cpp/particles/particles_distr_mpi.hpp
@@ -1,3 +1,28 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
 #ifndef PARTICLES_DISTR_MPI_HPP
 #define PARTICLES_DISTR_MPI_HPP
 
@@ -17,7 +42,7 @@
 template <class partsize_t, class real_number>
 class particles_distr_mpi {
 protected:
-    static const int MaxNbRhs = 100;
+    static const int MaxNbRhs = 10;
 
     enum MpiTag{
         TAG_LOW_UP_NB_PARTICLES,
@@ -127,7 +152,7 @@ public:
             assert(partition_interval_size_per_proc[idx_proc_involved] != 0);
         }
 
-        assert(int(field_grid_dim[IDX_Z]) == partition_interval_offset_per_proc[nb_processes_involved]);
+        assert(int(field_grid_dim[IDXC_Z]) == partition_interval_offset_per_proc[nb_processes_involved]);
     }
 
     virtual ~particles_distr_mpi(){}
@@ -136,12 +161,12 @@ public:
 
     template <class computer_class, class field_class, int size_particle_positions, int size_particle_rhs>
     void compute_distr(computer_class& in_computer,
-                       field_class& in_field,
+                       const field_class& in_field,
                        const partsize_t current_my_nb_particles_per_partition[],
                        const real_number particles_positions[],
                        real_number particles_current_rhs[],
                        const int interpolation_size){
-        TIMEZONE("compute_distr");
+        TIMEZONE("particle_distr_mpi::compute_distr");
 
         // Some processes might not be involved
         if(nb_processes_involved <= my_rank){
@@ -235,6 +260,7 @@ public:
         }
         const int nbProcToRecvUpper = int(neigDescriptors.size())-nbProcToRecvLower;
         const int nbProcToRecv = nbProcToRecvUpper + nbProcToRecvLower;
+        variable_used_only_in_assert(nbProcToRecv);
         assert(int(neigDescriptors.size()) == nbProcToRecv);
 
         for(int idxDescr = 0 ; idxDescr < int(neigDescriptors.size()) ; ++idxDescr){
@@ -383,7 +409,7 @@ public:
                         in_computer.template init_result_array<size_particle_rhs>(descriptor.results.get(), NbParticlesToReceive);
 
                         if(more_than_one_thread == false){
-                            in_computer.template apply_computation<field_class, size_particle_rhs>(in_field, descriptor.toCompute.get(), descriptor.results.get(), NbParticlesToReceive);
+                            in_computer.template apply_computation<field_class, size_particle_positions, size_particle_rhs>(in_field, descriptor.toCompute.get(), descriptor.results.get(), NbParticlesToReceive);
                         }
                         else{
                             TIMEZONE_OMP_INIT_PRETASK(timeZoneTaskKey)
@@ -396,7 +422,7 @@ public:
                                              TIMEZONE_OMP_PRAGMA_TASK_KEY(timeZoneTaskKey)
                                     {
                                         TIMEZONE_OMP_TASK("in_computer.apply_computation", timeZoneTaskKey);
-                                        in_computer.template apply_computation<field_class, size_particle_rhs>(in_field, &ptr_descriptor->toCompute[idxPart*size_particle_positions],
+                                        in_computer.template apply_computation<field_class, size_particle_positions, size_particle_rhs>(in_field, &ptr_descriptor->toCompute[idxPart*size_particle_positions],
                                                 &ptr_descriptor->results[idxPart*size_particle_rhs], sizeToDo);
                                     }
                                 }
@@ -417,7 +443,7 @@ public:
                     if(releasedAction.first == RELEASE_BUFFER_PARTICLES){
                         NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second];
                         assert(descriptor.toCompute != nullptr);
-                        descriptor.toCompute.release();
+                        delete[] descriptor.toCompute.release();
                     }
                     //////////////////////////////////////////////////////////////////////
                     /// Merge
@@ -429,14 +455,14 @@ public:
                             TIMEZONE("reduce");
                             assert(descriptor.toRecvAndMerge != nullptr);
                             in_computer.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[0], descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToSend);
-                            descriptor.toRecvAndMerge.release();
+                            delete[] descriptor.toRecvAndMerge.release();
                         }
                         else {
                             TIMEZONE("reduce");
                             assert(descriptor.toRecvAndMerge != nullptr);
                             in_computer.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[(current_offset_particles_for_partition[current_partition_size]-descriptor.nbParticlesToSend)*size_particle_rhs],
                                              descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToSend);
-                            descriptor.toRecvAndMerge.release();
+                            delete[] descriptor.toRecvAndMerge.release();
                         }
                     }
                 }
@@ -456,7 +482,7 @@ public:
                             #pragma omp task default(shared) firstprivate(idxPart, sizeToDo) priority(0) TIMEZONE_OMP_PRAGMA_TASK_KEY(timeZoneTaskKey)
                             {
                                 TIMEZONE_OMP_TASK("in_computer.apply_computation", timeZoneTaskKey);
-                                in_computer.template apply_computation<field_class, size_particle_rhs>(in_field, &particles_positions[idxPart*size_particle_positions],
+                                in_computer.template apply_computation<field_class, size_particle_positions, size_particle_rhs>(in_field, &particles_positions[idxPart*size_particle_positions],
                                                   &particles_current_rhs[idxPart*size_particle_rhs],
                                                   sizeToDo);
                             }
@@ -474,14 +500,14 @@ public:
                         TIMEZONE("reduce_later");
                         assert(descriptor.toRecvAndMerge != nullptr);
                         in_computer.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[0], descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToSend);
-                        descriptor.toRecvAndMerge.release();
+                        delete[] descriptor.toRecvAndMerge.release();
                     }
                     else {
                         TIMEZONE("reduce_later");
                         assert(descriptor.toRecvAndMerge != nullptr);
                         in_computer.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[(current_offset_particles_for_partition[current_partition_size]-descriptor.nbParticlesToSend)*size_particle_rhs],
                                          descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToSend);
-                        descriptor.toRecvAndMerge.release();
+                        delete[] descriptor.toRecvAndMerge.release();
                     }
                 }
             }
@@ -492,7 +518,7 @@ public:
             TIMEZONE("compute-my_compute");
             // Compute my particles
             if(myTotalNbParticles){
-                in_computer.template apply_computation<field_class, size_particle_rhs>(in_field, particles_positions, particles_current_rhs, myTotalNbParticles);
+                in_computer.template apply_computation<field_class, size_particle_positions, size_particle_rhs>(in_field, particles_positions, particles_current_rhs, myTotalNbParticles);
             }
         }
 
@@ -517,6 +543,20 @@ public:
             return;
         }
 
+        {// TODO remove
+            partsize_t partOffset = 0;
+            for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){
+                for(partsize_t idx = 0 ; idx < current_my_nb_particles_per_partition[idxPartition] ; ++idx){
+                    const int partition_level = in_computer.pbc_field_layer((*inout_positions_particles)[(idx+partOffset)*size_particle_positions+IDXC_Z], IDXC_Z);
+                    variable_used_only_in_assert(partition_level);
+                    assert(partition_level == current_partition_interval.first + idxPartition
+                           || partition_level == (current_partition_interval.first + idxPartition-1+int(field_grid_dim[IDXC_Z]))%int(field_grid_dim[IDXC_Z])
+                           || partition_level == (current_partition_interval.first + idxPartition+1)%int(field_grid_dim[IDXC_Z]));
+                }
+                partOffset += current_my_nb_particles_per_partition[idxPartition];
+            }
+        }
+
         current_offset_particles_for_partition[0] = 0;
         partsize_t myTotalNbParticles = 0;
         for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){
@@ -528,16 +568,17 @@ public:
         // Find particles outside my interval
         const partsize_t nbOutLower = particles_utils::partition_extra<partsize_t, size_particle_positions>(&(*inout_positions_particles)[0], current_my_nb_particles_per_partition[0],
                     [&](const real_number val[]){
-            const int partition_level = in_computer.pbc_field_layer(val[IDX_Z], IDX_Z);
+            const int partition_level = in_computer.pbc_field_layer(val[IDXC_Z], IDXC_Z);
             assert(partition_level == current_partition_interval.first
-                   || partition_level == (current_partition_interval.first-1+int(field_grid_dim[IDX_Z]))%int(field_grid_dim[IDX_Z])
-                   || partition_level == (current_partition_interval.first+1)%int(field_grid_dim[IDX_Z]));
-            const bool isLower = partition_level == (current_partition_interval.first-1+int(field_grid_dim[IDX_Z]))%int(field_grid_dim[IDX_Z]);
+                   || partition_level == (current_partition_interval.first-1+int(field_grid_dim[IDXC_Z]))%int(field_grid_dim[IDXC_Z])
+                   || partition_level == (current_partition_interval.first+1)%int(field_grid_dim[IDXC_Z]));
+            const bool isLower = partition_level == (current_partition_interval.first-1+int(field_grid_dim[IDXC_Z]))%int(field_grid_dim[IDXC_Z]);
             return isLower;
         },
                     [&](const partsize_t idx1, const partsize_t idx2){
             for(int idx_val = 0 ; idx_val < size_particle_index ; ++idx_val){
-                std::swap((*inout_index_particles)[idx1], (*inout_index_particles)[idx2]);
+                std::swap((*inout_index_particles)[size_particle_index*idx1+idx_val],
+                          (*inout_index_particles)[size_particle_index*idx2+idx_val]);
             }
 
             for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){
@@ -553,16 +594,17 @@ public:
                     &(*inout_positions_particles)[(current_offset_particles_for_partition[current_partition_size-1]+offesetOutLow)*size_particle_positions],
                     myTotalNbParticles - (current_offset_particles_for_partition[current_partition_size-1]+offesetOutLow),
                     [&](const real_number val[]){
-            const int partition_level = in_computer.pbc_field_layer(val[IDX_Z], IDX_Z);
+            const int partition_level = in_computer.pbc_field_layer(val[IDXC_Z], IDXC_Z);
             assert(partition_level == (current_partition_interval.second-1)
-                   || partition_level == ((current_partition_interval.second-1)-1+int(field_grid_dim[IDX_Z]))%int(field_grid_dim[IDX_Z])
-                   || partition_level == ((current_partition_interval.second-1)+1)%int(field_grid_dim[IDX_Z]));
-            const bool isUpper = (partition_level == ((current_partition_interval.second-1)+1)%int(field_grid_dim[IDX_Z]));
+                   || partition_level == ((current_partition_interval.second-1)-1+int(field_grid_dim[IDXC_Z]))%int(field_grid_dim[IDXC_Z])
+                   || partition_level == ((current_partition_interval.second-1)+1)%int(field_grid_dim[IDXC_Z]));
+            const bool isUpper = (partition_level == ((current_partition_interval.second-1)+1)%int(field_grid_dim[IDXC_Z]));
             return !isUpper;
         },
                     [&](const partsize_t idx1, const partsize_t idx2){
             for(int idx_val = 0 ; idx_val < size_particle_index ; ++idx_val){
-                std::swap((*inout_index_particles)[idx1], (*inout_index_particles)[idx2]);
+                std::swap((*inout_index_particles)[size_particle_index*idx1+idx_val],
+                          (*inout_index_particles)[size_particle_index*idx2+idx_val]);
             }
 
             for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){
@@ -607,10 +649,11 @@ public:
                 assert(nbOutLower*size_particle_positions < std::numeric_limits<int>::max());
                 AssertMpi(MPI_Isend(&(*inout_positions_particles)[0], int(nbOutLower*size_particle_positions), particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES,
                           MPI_COMM_WORLD, &mpiRequests.back()));
+
                 whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1});
                 mpiRequests.emplace_back();
-                assert(nbOutLower < std::numeric_limits<int>::max());
-                AssertMpi(MPI_Isend(&(*inout_index_particles)[0], int(nbOutLower), particles_utils::GetMpiType(partsize_t()),
+                assert(nbOutLower*size_particle_index < std::numeric_limits<int>::max());
+                AssertMpi(MPI_Isend(&(*inout_index_particles)[0], int(nbOutLower*size_particle_index), particles_utils::GetMpiType(partsize_t()),
                           (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_INDEXES,
                           MPI_COMM_WORLD, &mpiRequests.back()));
 
@@ -643,14 +686,14 @@ public:
                 AssertMpi(MPI_Isend(&(*inout_positions_particles)[(myTotalNbParticles-nbOutUpper)*size_particle_positions],
                           int(nbOutUpper*size_particle_positions), particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES,
                           MPI_COMM_WORLD, &mpiRequests.back()));
+
                 whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1});
                 mpiRequests.emplace_back();
-                assert(nbOutUpper < std::numeric_limits<int>::max());
-                AssertMpi(MPI_Isend(&(*inout_index_particles)[(myTotalNbParticles-nbOutUpper)], int(nbOutUpper),
+                assert(nbOutUpper*size_particle_index < std::numeric_limits<int>::max());
+                AssertMpi(MPI_Isend(&(*inout_index_particles)[(myTotalNbParticles-nbOutUpper)*size_particle_index], int(nbOutUpper*size_particle_index),
                           particles_utils::GetMpiType(partsize_t()), (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_INDEXES,
                           MPI_COMM_WORLD, &mpiRequests.back()));
 
-
                 for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){
                     whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1});
                     mpiRequests.emplace_back();
@@ -684,11 +727,12 @@ public:
                                   (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES,
                                   MPI_COMM_WORLD, &mpiRequests.back()));
 
-                        newParticlesLowIndexes.reset(new partsize_t[nbNewFromLow]);
+                        newParticlesLowIndexes.reset(new partsize_t[nbNewFromLow*size_particle_index]);
                         whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1});
                         mpiRequests.emplace_back();
-                        assert(nbNewFromLow < std::numeric_limits<int>::max());
-                        AssertMpi(MPI_Irecv(&newParticlesLowIndexes[0], int(nbNewFromLow), particles_utils::GetMpiType(partsize_t()),
+                        assert(nbNewFromLow*size_particle_index < std::numeric_limits<int>::max());
+                        AssertMpi(MPI_Irecv(&newParticlesLowIndexes[0], int(nbNewFromLow*size_particle_index),
+                                  particles_utils::GetMpiType(partsize_t()),
                                   (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_INDEXES,
                                   MPI_COMM_WORLD, &mpiRequests.back()));
 
@@ -713,11 +757,12 @@ public:
                         AssertMpi(MPI_Irecv(&newParticlesUp[0], int(nbNewFromUp*size_particle_positions), particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES,
                                   MPI_COMM_WORLD, &mpiRequests.back()));
 
-                        newParticlesUpIndexes.reset(new partsize_t[nbNewFromUp]);
+                        newParticlesUpIndexes.reset(new partsize_t[nbNewFromUp*size_particle_index]);
                         whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1});
                         mpiRequests.emplace_back();
-                        assert(nbNewFromUp < std::numeric_limits<int>::max());
-                        AssertMpi(MPI_Irecv(&newParticlesUpIndexes[0], int(nbNewFromUp), particles_utils::GetMpiType(partsize_t()),
+                        assert(nbNewFromUp*size_particle_index < std::numeric_limits<int>::max());
+                        AssertMpi(MPI_Irecv(&newParticlesUpIndexes[0], int(nbNewFromUp*size_particle_index),
+                                  particles_utils::GetMpiType(partsize_t()),
                                   (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_INDEXES,
                                   MPI_COMM_WORLD, &mpiRequests.back()));
 
@@ -750,7 +795,7 @@ public:
             const partsize_t myTotalNewNbParticles = nbOldParticlesInside + nbNewFromLow + nbNewFromUp;
 
             std::unique_ptr<real_number[]> newArray(new real_number[myTotalNewNbParticles*size_particle_positions]);
-            std::unique_ptr<partsize_t[]> newArrayIndexes(new partsize_t[myTotalNewNbParticles]);
+            std::unique_ptr<partsize_t[]> newArrayIndexes(new partsize_t[myTotalNewNbParticles*size_particle_index]);
             std::vector<std::unique_ptr<real_number[]>> newArrayRhs(in_nb_rhs);
             for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){
                 newArrayRhs[idx_rhs].reset(new real_number[myTotalNewNbParticles*size_particle_rhs]);
@@ -760,7 +805,7 @@ public:
             if(nbNewFromLow){
                 const particles_utils::fixed_copy fcp(0, 0, nbNewFromLow);
                 fcp.copy(newArray, newParticlesLow, size_particle_positions);
-                fcp.copy(newArrayIndexes, newParticlesLowIndexes);
+                fcp.copy(newArrayIndexes, newParticlesLowIndexes, size_particle_index);
                 for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){
                     fcp.copy(newArrayRhs[idx_rhs], newParticlesLowRhs[idx_rhs], size_particle_rhs);
                 }
@@ -770,7 +815,7 @@ public:
             {
                 const particles_utils::fixed_copy fcp(nbNewFromLow, nbOutLower, nbOldParticlesInside);
                 fcp.copy(newArray, (*inout_positions_particles), size_particle_positions);
-                fcp.copy(newArrayIndexes, (*inout_index_particles));
+                fcp.copy(newArrayIndexes, (*inout_index_particles), size_particle_index);
                 for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){
                     fcp.copy(newArrayRhs[idx_rhs], inout_rhs_particles[idx_rhs], size_particle_rhs);
                 }
@@ -780,7 +825,7 @@ public:
             if(nbNewFromUp){
                 const particles_utils::fixed_copy fcp(nbNewFromLow+nbOldParticlesInside, 0, nbNewFromUp);
                 fcp.copy(newArray, newParticlesUp, size_particle_positions);
-                fcp.copy(newArrayIndexes, newParticlesUpIndexes);
+                fcp.copy(newArrayIndexes, newParticlesUpIndexes, size_particle_index);
                 for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){
                     fcp.copy(newArrayRhs[idx_rhs], newParticlesUpRhs[idx_rhs], size_particle_rhs);
                 }
@@ -802,13 +847,14 @@ public:
                                              myTotalNbParticles,current_partition_size,
                                              current_my_nb_particles_per_partition, current_offset_particles_for_partition.get(),
                                              [&](const real_number& z_pos){
-                const int partition_level = in_computer.pbc_field_layer(z_pos, IDX_Z);
+                const int partition_level = in_computer.pbc_field_layer(z_pos, IDXC_Z);
                 assert(current_partition_interval.first <= partition_level && partition_level < current_partition_interval.second);
                 return partition_level - current_partition_interval.first;
             },
             [&](const partsize_t idx1, const partsize_t idx2){
                 for(int idx_val = 0 ; idx_val < size_particle_index ; ++idx_val){
-                    std::swap((*inout_index_particles)[idx1], (*inout_index_particles)[idx2]);
+                    std::swap((*inout_index_particles)[size_particle_index*idx1 + idx_val],
+                              (*inout_index_particles)[size_particle_index*idx2 + idx_val]);
                 }
 
                 for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){
@@ -824,7 +870,7 @@ public:
                     assert(current_my_nb_particles_per_partition[idxPartition] ==
                            current_offset_particles_for_partition[idxPartition+1] - current_offset_particles_for_partition[idxPartition]);
                     for(partsize_t idx = current_offset_particles_for_partition[idxPartition] ; idx < current_offset_particles_for_partition[idxPartition+1] ; ++idx){
-                        assert(in_computer.pbc_field_layer((*inout_positions_particles)[idx*3+IDX_Z], IDX_Z)-current_partition_interval.first == idxPartition);
+                        assert(in_computer.pbc_field_layer((*inout_positions_particles)[idx*size_particle_positions+IDXC_Z], IDXC_Z)-current_partition_interval.first == idxPartition);
                     }
                 }
             }
diff --git a/bfps/cpp/particles/particles_field_computer.hpp b/cpp/particles/particles_field_computer.hpp
similarity index 68%
rename from bfps/cpp/particles/particles_field_computer.hpp
rename to cpp/particles/particles_field_computer.hpp
index f68f2fc02b4ee40aa9583385c0bd18195b92b6dc..f6494ecd0b937b02038fb7eb8a498ee9f29212fd 100644
--- a/bfps/cpp/particles/particles_field_computer.hpp
+++ b/cpp/particles/particles_field_computer.hpp
@@ -1,3 +1,28 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
 #ifndef PARTICLES_FIELD_COMPUTER_HPP
 #define PARTICLES_FIELD_COMPUTER_HPP
 
@@ -12,6 +37,16 @@ template <class partsize_t,
           class interpolator_class,
           int interp_neighbours>
 class particles_field_computer {
+    // TODO but not critical, add in field:
+    // static const int nb_components = ncomp(fc);
+    // and use it as field_class::nb_components
+    // but failed up to now....
+    template <typename rnumber,
+              field_backend be,
+              field_components fc>
+    static constexpr int nbcomp(const field<rnumber, be, fc>& /*field*/){
+        return ncomp(fc);
+    }
 
     const std::array<int,3> field_grid_dim;
     const std::pair<int,int> current_partition_interval;
@@ -34,9 +69,9 @@ public:
         : field_grid_dim({{int(in_field_grid_dim[0]),int(in_field_grid_dim[1]),int(in_field_grid_dim[2])}}), current_partition_interval(in_current_partitions),
           interpolator(in_interpolator),
           spatial_box_width(in_spatial_box_width), spatial_box_offset(in_spatial_box_offset), box_step_width(in_box_step_width){
-        deriv[IDX_X] = 0;
-        deriv[IDX_Y] = 0;
-        deriv[IDX_Z] = 0;
+        deriv[IDXC_X] = 0;
+        deriv[IDXC_Y] = 0;
+        deriv[IDXC_Z] = 0;
     }
 
     ////////////////////////////////////////////////////////////////////////
@@ -62,33 +97,35 @@ public:
         return pos_in_cell;
     }
 
-    template <class field_class, int size_particle_rhs>
+    template <class field_class, int size_particle_positions, int size_particle_rhs>
     void apply_computation(const field_class& field,
                                    const real_number particles_positions[],
                                    real_number particles_current_rhs[],
                                    const partsize_t nb_particles) const {
+        constexpr int nb_components_in_field = nbcomp(field);
+        static_assert(nb_components_in_field <= size_particle_rhs, "Cannot store all the component in the given array");
         TIMEZONE("particles_field_computer::apply_computation");
-        //DEBUG_MSG("just entered particles_field_computer::apply_computation\n");
+
         for(partsize_t idxPart = 0 ; idxPart < nb_particles ; ++idxPart){
-            const real_number reltv_x = get_norm_pos_in_cell(particles_positions[idxPart*3+IDX_X], IDX_X);
-            const real_number reltv_y = get_norm_pos_in_cell(particles_positions[idxPart*3+IDX_Y], IDX_Y);
-            const real_number reltv_z = get_norm_pos_in_cell(particles_positions[idxPart*3+IDX_Z], IDX_Z);
+            const real_number reltv_x = get_norm_pos_in_cell(particles_positions[idxPart*size_particle_positions+IDXC_X], IDXC_X);
+            const real_number reltv_y = get_norm_pos_in_cell(particles_positions[idxPart*size_particle_positions+IDXC_Y], IDXC_Y);
+            const real_number reltv_z = get_norm_pos_in_cell(particles_positions[idxPart*size_particle_positions+IDXC_Z], IDXC_Z);
 
             typename interpolator_class::real_number
                 bx[interp_neighbours*2+2],
                 by[interp_neighbours*2+2],
                 bz[interp_neighbours*2+2];
-            interpolator.compute_beta(deriv[IDX_X], reltv_x, bx);
-            interpolator.compute_beta(deriv[IDX_Y], reltv_y, by);
-            interpolator.compute_beta(deriv[IDX_Z], reltv_z, bz);
+            interpolator.compute_beta(deriv[IDXC_X], reltv_x, bx);
+            interpolator.compute_beta(deriv[IDXC_Y], reltv_y, by);
+            interpolator.compute_beta(deriv[IDXC_Z], reltv_z, bz);
 
-            const int partGridIdx_x = pbc_field_layer(particles_positions[idxPart*3+IDX_X], IDX_X);
-            const int partGridIdx_y = pbc_field_layer(particles_positions[idxPart*3+IDX_Y], IDX_Y);
-            const int partGridIdx_z = pbc_field_layer(particles_positions[idxPart*3+IDX_Z], IDX_Z);
+            const int partGridIdx_x = pbc_field_layer(particles_positions[idxPart*size_particle_positions+IDXC_X], IDXC_X);
+            const int partGridIdx_y = pbc_field_layer(particles_positions[idxPart*size_particle_positions+IDXC_Y], IDXC_Y);
+            const int partGridIdx_z = pbc_field_layer(particles_positions[idxPart*size_particle_positions+IDXC_Z], IDXC_Z);
 
-            assert(0 <= partGridIdx_x && partGridIdx_x < int(field_grid_dim[IDX_X]));
-            assert(0 <= partGridIdx_y && partGridIdx_y < int(field_grid_dim[IDX_Y]));
-            assert(0 <= partGridIdx_z && partGridIdx_z < int(field_grid_dim[IDX_Z]));
+            assert(0 <= partGridIdx_x && partGridIdx_x < int(field_grid_dim[IDXC_X]));
+            assert(0 <= partGridIdx_y && partGridIdx_y < int(field_grid_dim[IDXC_Y]));
+            assert(0 <= partGridIdx_z && partGridIdx_z < int(field_grid_dim[IDXC_Z]));
 
             const int interp_limit_mx = partGridIdx_x-interp_neighbours;
             const int interp_limit_x = partGridIdx_x+interp_neighbours+1;
@@ -101,8 +138,8 @@ public:
             int nb_z_intervals;
 
             if((partGridIdx_z-interp_neighbours) < 0){
-                assert(partGridIdx_z+interp_neighbours+1 < int(field_grid_dim[IDX_Z]));
-                interp_limit_mz[0] = std::max(current_partition_interval.first, partGridIdx_z-interp_neighbours+int(field_grid_dim[IDX_Z]));
+                assert(partGridIdx_z+interp_neighbours+1 < int(field_grid_dim[IDXC_Z]));
+                interp_limit_mz[0] = std::max(current_partition_interval.first, partGridIdx_z-interp_neighbours+int(field_grid_dim[IDXC_Z]));
                 interp_limit_z[0] = current_partition_interval.second-1;
 
                 interp_limit_mz[1] = std::max(0, current_partition_interval.first);
@@ -110,12 +147,12 @@ public:
 
                 nb_z_intervals = 2;
             }
-            else if(int(field_grid_dim[IDX_Z]) <= (partGridIdx_z+interp_neighbours+1)){
+            else if(int(field_grid_dim[IDXC_Z]) <= (partGridIdx_z+interp_neighbours+1)){
                 interp_limit_mz[0] = std::max(current_partition_interval.first, partGridIdx_z-interp_neighbours);
-                interp_limit_z[0] = std::min(int(field_grid_dim[IDX_Z])-1,current_partition_interval.second-1);
+                interp_limit_z[0] = std::min(int(field_grid_dim[IDXC_Z])-1,current_partition_interval.second-1);
 
                 interp_limit_mz[1] = std::max(0, current_partition_interval.first);
-                interp_limit_z[1] = std::min(partGridIdx_z+interp_neighbours+1-int(field_grid_dim[IDX_Z]), current_partition_interval.second-1);
+                interp_limit_z[1] = std::min(partGridIdx_z+interp_neighbours+1-int(field_grid_dim[IDXC_Z]), current_partition_interval.second-1);
 
                 nb_z_intervals = 2;
             }
@@ -127,26 +164,27 @@ public:
 
             for(int idx_inter = 0 ; idx_inter < nb_z_intervals ; ++idx_inter){
                 for(int idx_z = interp_limit_mz[idx_inter] ; idx_z <= interp_limit_z[idx_inter] ; ++idx_z ){
-                    const int idx_z_pbc = (idx_z + field_grid_dim[IDX_Z])%field_grid_dim[IDX_Z];
+                    const int idx_z_pbc = (idx_z + field_grid_dim[IDXC_Z])%field_grid_dim[IDXC_Z];
                     assert(current_partition_interval.first <= idx_z_pbc && idx_z_pbc < current_partition_interval.second);
-                    assert(((idx_z+field_grid_dim[IDX_Z]-interp_limit_mz_bz)%field_grid_dim[IDX_Z]) < interp_neighbours*2+2);
+                    assert(((idx_z+field_grid_dim[IDXC_Z]-interp_limit_mz_bz)%field_grid_dim[IDXC_Z]) < interp_neighbours*2+2);
 
                     for(int idx_x = interp_limit_mx ; idx_x <= interp_limit_x ; ++idx_x ){
-                        const int idx_x_pbc = (idx_x + field_grid_dim[IDX_X])%field_grid_dim[IDX_X];
+                        const int idx_x_pbc = (idx_x + field_grid_dim[IDXC_X])%field_grid_dim[IDXC_X];
                         assert(idx_x-interp_limit_mx < interp_neighbours*2+2);
 
                         for(int idx_y = interp_limit_my ; idx_y <= interp_limit_y ; ++idx_y ){
-                            const int idx_y_pbc = (idx_y + field_grid_dim[IDX_Y])%field_grid_dim[IDX_Y];
+                            const int idx_y_pbc = (idx_y + field_grid_dim[IDXC_Y])%field_grid_dim[IDXC_Y];
                             assert(idx_y-interp_limit_my < interp_neighbours*2+2);
 
-                            const real_number coef = (bz[((idx_z+field_grid_dim[IDX_Z]-interp_limit_mz_bz)%field_grid_dim[IDX_Z])]
+                            const real_number coef = (bz[((idx_z+field_grid_dim[IDXC_Z]-interp_limit_mz_bz)%field_grid_dim[IDXC_Z])]
                                                     * by[idx_y-interp_limit_my]
                                                     * bx[idx_x-interp_limit_mx]);
 
                             const ptrdiff_t tindex = field.get_rindex_from_global(idx_x_pbc, idx_y_pbc, idx_z_pbc);
 
                             // getValue does not necessary return real_number
-                            for(int idx_rhs_val = 0 ; idx_rhs_val < size_particle_rhs ; ++idx_rhs_val){
+                            // size_particle_rhs is just for the leading dimension of the array
+                            for(int idx_rhs_val = 0 ; idx_rhs_val < nb_components_in_field ; ++idx_rhs_val){
                                 particles_current_rhs[idxPart*size_particle_rhs+idx_rhs_val] += real_number(field.rval(tindex,idx_rhs_val))*coef;
                             }
                         }
diff --git a/bfps/cpp/particles/particles_generic_interp.hpp b/cpp/particles/particles_generic_interp.hpp
similarity index 82%
rename from bfps/cpp/particles/particles_generic_interp.hpp
rename to cpp/particles/particles_generic_interp.hpp
index 98d0363d4fcfae8c05b6ceabef620e17c1263eee..da48641ca543dd853c24d675c1fea8b96f9da449 100644
--- a/bfps/cpp/particles/particles_generic_interp.hpp
+++ b/cpp/particles/particles_generic_interp.hpp
@@ -1,3 +1,28 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
 #ifndef PARTICLES_GENERIC_INTERP_HPP
 #define PARTICLES_GENERIC_INTERP_HPP
 
diff --git a/cpp/particles/particles_inner_computer.cpp b/cpp/particles/particles_inner_computer.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3a841bee50f2849ef981cb5c585ee448570ae2ca
--- /dev/null
+++ b/cpp/particles/particles_inner_computer.cpp
@@ -0,0 +1,193 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#include "base.hpp"
+#include "particles_utils.hpp"
+#include "particles_inner_computer.hpp"
+
+#include <cmath>
+
+template <class real_number, class partsize_t>
+template <int size_particle_positions, int size_particle_rhs>
+void particles_inner_computer<real_number, partsize_t>::compute_interaction(
+        const partsize_t nb_particles,
+        const real_number pos_part[],
+        real_number rhs_part[]) const{
+    static_assert(size_particle_positions == 6, "This kernel works only with 6 values for one particle's position");
+    static_assert(size_particle_rhs == 6, "This kernel works only with 6 values per particle's rhs");
+
+    #pragma omp parallel for
+    for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){
+        // Add attr Ã— V0 to the field interpolation
+        rhs_part[idx_part*size_particle_rhs + IDXC_X] += pos_part[idx_part*size_particle_positions + 3+IDXC_X]*v0;
+        rhs_part[idx_part*size_particle_rhs + IDXC_Y] += pos_part[idx_part*size_particle_positions + 3+IDXC_Y]*v0;
+        rhs_part[idx_part*size_particle_rhs + IDXC_Z] += pos_part[idx_part*size_particle_positions + 3+IDXC_Z]*v0;
+    }
+}
+
+    // for given orientation and right-hand-side, recompute right-hand-side such
+    // that it is perpendicular to the current orientation.
+    // this is the job of the Lagrange multiplier terms, hence the
+    // "add_Lagrange_multipliers" name of the method.
+template <>
+template <>
+void particles_inner_computer<double, long long>::add_Lagrange_multipliers<6,6>(
+        const long long nb_particles,
+        const double pos_part[],
+        double rhs_part[]) const{
+
+        #pragma omp parallel for
+        for(long long idx_part = 0 ; idx_part < nb_particles ; ++idx_part){
+            const long long idx0 = idx_part*6 + 3;
+            const long long idx1 = idx_part*6 + 3;
+            // check that orientation is unit vector:
+            double orientation_size = sqrt(
+                    pos_part[idx0+IDXC_X]*pos_part[idx0+IDXC_X] +
+                    pos_part[idx0+IDXC_Y]*pos_part[idx0+IDXC_Y] +
+                    pos_part[idx0+IDXC_Z]*pos_part[idx0+IDXC_Z]);
+            variable_used_only_in_assert(orientation_size);
+            assert(orientation_size > 0.99);
+            assert(orientation_size < 1.01);
+            // I call "rotation" to be the right hand side of the orientation part of the ODE
+            // project rotation on orientation:
+            double projection = (
+                    pos_part[idx0+IDXC_X]*rhs_part[idx1+IDXC_X] +
+                    pos_part[idx0+IDXC_Y]*rhs_part[idx1+IDXC_Y] +
+                    pos_part[idx0+IDXC_Z]*rhs_part[idx1+IDXC_Z]);
+
+            // now remove parallel bit.
+            rhs_part[idx1+IDXC_X] -= pos_part[idx0+IDXC_X]*projection;
+            rhs_part[idx1+IDXC_Y] -= pos_part[idx0+IDXC_Y]*projection;
+            rhs_part[idx1+IDXC_Z] -= pos_part[idx0+IDXC_Z]*projection;
+
+            // DEBUG
+            // sanity check, for debugging purposes
+            // compute dot product between orientation and orientation change
+            //double dotproduct = (
+            //        rhs_part[idx1 + IDXC_X]*pos_part[idx0 + IDXC_X] +
+            //        rhs_part[idx1 + IDXC_Y]*pos_part[idx0 + IDXC_Y] +
+            //        rhs_part[idx1 + IDXC_Z]*pos_part[idx0 + IDXC_Z]);
+            //if (dotproduct > 0.1)
+            //{
+            //    DEBUG_MSG("dotproduct = %g, projection = %g\n"
+            //              "pos_part[%d] = %g, pos_part[%d] = %g, pos_part[%d] = %g\n"
+            //              "rhs_part[%d] = %g, rhs_part[%d] = %g, rhs_part[%d] = %g\n",
+            //            dotproduct,
+            //            projection,
+            //            IDXC_X, pos_part[idx0 + IDXC_X],
+            //            IDXC_Y, pos_part[idx0 + IDXC_Y],
+            //            IDXC_Z, pos_part[idx0 + IDXC_Z],
+            //            IDXC_X, rhs_part[idx1 + IDXC_X],
+            //            IDXC_Y, rhs_part[idx1 + IDXC_Y],
+            //            IDXC_Z, rhs_part[idx1 + IDXC_Z]);
+            //    assert(false);
+            //}
+            //assert(dotproduct <= 0.1);
+        }
+    }
+
+template <>
+template <>
+void particles_inner_computer<double, long long>::compute_interaction_with_extra<6,6,3>(
+        const long long nb_particles,
+        const double pos_part[],
+        double rhs_part[],
+        const double rhs_part_extra[]) const{
+    // call plain compute_interaction first
+    compute_interaction<6, 6>(nb_particles, pos_part, rhs_part);
+
+    // now add vorticity term
+    #pragma omp parallel for
+    for(long long idx_part = 0 ; idx_part < nb_particles ; ++idx_part){
+        // Cross product vorticity/orientation
+        rhs_part[idx_part*6 + 3+IDXC_X] += 0.5*(rhs_part_extra[idx_part*3 + IDXC_Y]*pos_part[idx_part*6 + 3+IDXC_Z] -
+                                               rhs_part_extra[idx_part*3 + IDXC_Z]*pos_part[idx_part*6 + 3+IDXC_Y]);
+        rhs_part[idx_part*6 + 3+IDXC_Y] += 0.5*(rhs_part_extra[idx_part*3 + IDXC_Z]*pos_part[idx_part*6 + 3+IDXC_X] -
+                                               rhs_part_extra[idx_part*3 + IDXC_X]*pos_part[idx_part*6 + 3+IDXC_Z]);
+        rhs_part[idx_part*6 + 3+IDXC_Z] += 0.5*(rhs_part_extra[idx_part*3 + IDXC_X]*pos_part[idx_part*6 + 3+IDXC_Y] -
+                                               rhs_part_extra[idx_part*3 + IDXC_Y]*pos_part[idx_part*6 + 3+IDXC_X]);
+    }
+}
+
+template <> //Work here
+template <>
+void particles_inner_computer<double, long long>::compute_interaction_with_extra<6,6,9>(
+        const long long nb_particles,
+        const double pos_part[],
+        double rhs_part[],
+        const double rhs_part_extra[]) const{
+    // call plain compute_interaction first
+    compute_interaction<6, 6>(nb_particles, pos_part, rhs_part);
+    const double ll2 = lambda*lambda;
+
+    // now add vorticity term
+    #pragma omp parallel for
+    for(long long idx_part = 0 ; idx_part < nb_particles ; ++idx_part){
+        long long idx_part6 = idx_part*6 + 3;
+        long long idx_part9 = idx_part*9;
+        rhs_part[idx_part6+IDXC_X] += (
+                pos_part[idx_part6+IDXC_Z]*(ll2*rhs_part_extra[idx_part9 + IDXC_DZ_X]-rhs_part_extra[idx_part9 + IDXC_DX_Z])
+              + pos_part[idx_part6+IDXC_Y]*(ll2*rhs_part_extra[idx_part9 + IDXC_DY_X]-rhs_part_extra[idx_part9 + IDXC_DX_Y])
+              + pos_part[idx_part6+IDXC_X]*(ll2-1)*rhs_part_extra[idx_part9 + IDXC_DX_X]) / (ll2+1);
+        rhs_part[idx_part6+IDXC_Y] += (
+                pos_part[idx_part6+IDXC_X]*(ll2*rhs_part_extra[idx_part9 + IDXC_DX_Y]-rhs_part_extra[idx_part9 + IDXC_DY_X])
+              + pos_part[idx_part6+IDXC_Z]*(ll2*rhs_part_extra[idx_part9 + IDXC_DZ_Y]-rhs_part_extra[idx_part9 + IDXC_DY_Z])
+              + pos_part[idx_part6+IDXC_Y]*(ll2-1)*rhs_part_extra[idx_part9 + IDXC_DY_Y]) / (ll2+1);
+        rhs_part[idx_part6+IDXC_Z] += (
+                pos_part[idx_part6+IDXC_Y]*(ll2*rhs_part_extra[idx_part9 + IDXC_DY_Z]-rhs_part_extra[idx_part9 + IDXC_DZ_Y])
+              + pos_part[idx_part6+IDXC_X]*(ll2*rhs_part_extra[idx_part9 + IDXC_DX_Z]-rhs_part_extra[idx_part9 + IDXC_DZ_X])
+              + pos_part[idx_part6+IDXC_Z]*(ll2-1)*rhs_part_extra[idx_part9 + IDXC_DZ_Z]) / (ll2+1);
+    }
+}
+
+
+// meant to be called AFTER executing the time-stepping operation.
+// once the particles have been moved, ensure that the orientation is a unit vector.
+template <>
+template <>
+void particles_inner_computer<double, long long>::enforce_unit_orientation<6>(
+        const long long nb_particles,
+        double pos_part[]) const{
+    #pragma omp parallel for
+    for(long long idx_part = 0 ; idx_part < nb_particles ; ++idx_part){
+        const long long idx0 = idx_part*6 + 3;
+        // compute orientation size:
+        double orientation_size = sqrt(
+                pos_part[idx0+IDXC_X]*pos_part[idx0+IDXC_X] +
+                pos_part[idx0+IDXC_Y]*pos_part[idx0+IDXC_Y] +
+                pos_part[idx0+IDXC_Z]*pos_part[idx0+IDXC_Z]);
+        // now renormalize
+        pos_part[idx0 + IDXC_X] /= orientation_size;
+        pos_part[idx0 + IDXC_Y] /= orientation_size;
+        pos_part[idx0 + IDXC_Z] /= orientation_size;
+    }
+}
+
+template
+void particles_inner_computer<double, long long>::compute_interaction<6, 6>(
+        const long long nb_particles,
+        const double pos_part[],
+        double rhs_part[]) const;
+
diff --git a/cpp/particles/particles_inner_computer.hpp b/cpp/particles/particles_inner_computer.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..7f30ad6829e5cfa0ac40bd59db7a9a09cbe8ac6f
--- /dev/null
+++ b/cpp/particles/particles_inner_computer.hpp
@@ -0,0 +1,109 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#ifndef PARTICLES_INNER_COMPUTER_HPP
+#define PARTICLES_INNER_COMPUTER_HPP
+
+#include <cstring>
+#include <cassert>
+#include <iostream>
+
+template <class real_number, class partsize_t>
+class particles_inner_computer{
+    bool isActive;
+    const real_number v0;
+    const real_number lambda;
+    const real_number lambda1;
+    const real_number lambda2;
+    const real_number lambda3;
+
+public:
+    explicit particles_inner_computer(const real_number inV0):
+        isActive(true),
+        v0(inV0),
+        lambda(0),
+        lambda1(0),
+        lambda2(0),
+        lambda3(0)
+    {}
+    explicit particles_inner_computer(const real_number inV0, const real_number inLambda):
+        isActive(true),
+        v0(inV0),
+        lambda(inLambda),
+        lambda1(0),
+        lambda2(0),
+        lambda3(0)
+    {}
+    explicit particles_inner_computer(
+            const real_number inV0,
+            const real_number inLambda1,
+            const real_number inLambda2,
+            const real_number inLambda3):
+        isActive(true),
+        v0(inV0),
+        lambda(0),
+        lambda1(inLambda1),
+        lambda2(inLambda2),
+        lambda3(inLambda3)
+    {}
+
+    template <int size_particle_positions, int size_particle_rhs>
+    void compute_interaction(
+            const partsize_t nb_particles,
+            const real_number pos_part[],
+            real_number rhs_part[]) const;
+    // for given orientation and right-hand-side, recompute right-hand-side such
+    // that it is perpendicular to the current orientation.
+    // this is the job of the Lagrange multiplier terms, hence the
+    // "add_Lagrange_multipliers" name of the method.
+    template <int size_particle_positions, int size_particle_rhs>
+    void add_Lagrange_multipliers(
+            const partsize_t nb_particles,
+            const real_number pos_part[],
+            real_number rhs_part[]) const;
+    template <int size_particle_positions, int size_particle_rhs, int size_particle_rhs_extra>
+    void compute_interaction_with_extra(
+            const partsize_t nb_particles,
+            const real_number pos_part[],
+            real_number rhs_part[],
+            const real_number rhs_part_extra[]) const;
+    // meant to be called AFTER executing the time-stepping operation.
+    // once the particles have been moved, ensure that the orientation is a unit vector.
+    template <int size_particle_positions>
+    void enforce_unit_orientation(
+            const partsize_t nb_particles,
+            real_number pos_part[]) const;
+
+    bool isEnable() const {
+        return isActive;
+    }
+
+    void setEnable(const bool inIsActive) {
+        isActive = inIsActive;
+    }
+};
+
+#endif
+
diff --git a/cpp/particles/particles_inner_computer_empty.hpp b/cpp/particles/particles_inner_computer_empty.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a90d3aa1b9f5ca5e9e2085173c6c55a25809b469
--- /dev/null
+++ b/cpp/particles/particles_inner_computer_empty.hpp
@@ -0,0 +1,57 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#ifndef PARTICLES_INNER_COMPUTER_EMPTY_HPP
+#define PARTICLES_INNER_COMPUTER_EMPTY_HPP
+
+#include <cstring>
+#include <cassert>
+
+template <class real_number, class partsize_t>
+class particles_inner_computer_empty{
+public:
+    template <int size_particle_positions, int size_particle_rhs>
+    void compute_interaction(const partsize_t /*nb_particles*/, real_number /*pos_part*/[], real_number /*rhs_part*/[]) const{
+    }
+
+    template <int size_particle_positions>
+    void enforce_unit_orientation(const partsize_t /*nb_particles*/, real_number /*pos_part*/[]) const{
+    }
+
+    template <int size_particle_positions, int size_particle_rhs>
+    void add_Lagrange_multipliers(const partsize_t /*nb_particles*/, real_number /*pos_part*/[], real_number /*rhs_part*/[]) const{
+    }
+
+    template <int size_particle_positions, int size_particle_rhs, int size_particle_rhs_extra>
+    void compute_interaction_with_extra(const partsize_t /*nb_particles*/, real_number /*pos_part*/[], real_number /*rhs_part*/[],
+                             const real_number /*rhs_part_extra*/[]) const{
+    }
+
+    constexpr static bool isEnable() {
+        return false;
+    }
+};
+
+#endif
diff --git a/bfps/cpp/particles/particles_input_hdf5.hpp b/cpp/particles/particles_input_hdf5.hpp
similarity index 62%
rename from bfps/cpp/particles/particles_input_hdf5.hpp
rename to cpp/particles/particles_input_hdf5.hpp
index 32cfec05ad854cd7f3ffd88d771418d0552237d8..3f895be3613030fca0a0fce1a786bb6fc541fe9c 100644
--- a/bfps/cpp/particles/particles_input_hdf5.hpp
+++ b/cpp/particles/particles_input_hdf5.hpp
@@ -1,3 +1,28 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
 #ifndef PARTICLES_INPUT_HDF5_HPP
 #define PARTICLES_INPUT_HDF5_HPP
 
@@ -14,8 +39,6 @@
 #include "scope_timer.hpp"
 
 
-// why is "size_particle_rhs" a template parameter?
-// I think it's safe to assume this will always be 3.
 template <class partsize_t, class real_number, int size_particle_positions, int size_particle_rhs>
 class particles_input_hdf5 : public abstract_particles_input<partsize_t, real_number> {
     const std::string filename;
@@ -24,16 +47,19 @@ class particles_input_hdf5 : public abstract_particles_input<partsize_t, real_nu
     int my_rank;
     int nb_processes;
 
-    hsize_t nb_total_particles;
+    hsize_t total_number_of_particles;
     hsize_t nb_rhs;
     partsize_t nb_particles_for_me;
+    std::vector<hsize_t> particle_file_layout;   // to hold the shape of initial condition array
 
     std::unique_ptr<real_number[]> my_particles_positions;
     std::unique_ptr<partsize_t[]> my_particles_indexes;
     std::vector<std::unique_ptr<real_number[]>> my_particles_rhs;
 
-    static std::vector<real_number> BuildLimitsAllProcesses(MPI_Comm mpi_comm,
-                                                       const real_number my_spatial_low_limit, const real_number my_spatial_up_limit){
+    static std::vector<real_number> BuildLimitsAllProcesses(
+            MPI_Comm mpi_comm,
+            const real_number my_spatial_low_limit,
+            const real_number my_spatial_up_limit){
         int my_rank;
         int nb_processes;
 
@@ -43,8 +69,15 @@ class particles_input_hdf5 : public abstract_particles_input<partsize_t, real_nu
         std::vector<real_number> spatial_limit_per_proc(nb_processes*2);
 
         real_number intervalToSend[2] = {my_spatial_low_limit, my_spatial_up_limit};
-        AssertMpi(MPI_Allgather(intervalToSend, 2, particles_utils::GetMpiType(real_number()),
-                                spatial_limit_per_proc.data(), 2, particles_utils::GetMpiType(real_number()), mpi_comm));
+        AssertMpi(
+                MPI_Allgather(
+                    intervalToSend,
+                    2,
+                    particles_utils::GetMpiType(real_number()),
+                    spatial_limit_per_proc.data(),
+                    2,
+                    particles_utils::GetMpiType(real_number()),
+                    mpi_comm));
 
         for(int idx_proc = 0; idx_proc < nb_processes-1 ; ++idx_proc){
             assert(spatial_limit_per_proc[idx_proc*2] <= spatial_limit_per_proc[idx_proc*2+1]);
@@ -58,18 +91,35 @@ class particles_input_hdf5 : public abstract_particles_input<partsize_t, real_nu
     }
 
 public:
-    particles_input_hdf5(const MPI_Comm in_mpi_comm,const std::string& inFilename,
-                         const std::string& inDatanameState, const std::string& inDatanameRhs,
-                         const real_number my_spatial_low_limit, const real_number my_spatial_up_limit)
-        : particles_input_hdf5(in_mpi_comm, inFilename, inDatanameState, inDatanameRhs,
-                               BuildLimitsAllProcesses(in_mpi_comm, my_spatial_low_limit, my_spatial_up_limit)){
+    particles_input_hdf5(
+            const MPI_Comm in_mpi_comm,
+            const std::string& inFilename,
+            const std::string& inDatanameState,
+            const std::string& inDatanameRhs,
+            const real_number my_spatial_low_limit,
+            const real_number my_spatial_up_limit)
+        : particles_input_hdf5(
+                in_mpi_comm,
+                inFilename,
+                inDatanameState,
+                inDatanameRhs,
+                BuildLimitsAllProcesses(
+                    in_mpi_comm,
+                    my_spatial_low_limit,
+                    my_spatial_up_limit)){
     }
 
-    particles_input_hdf5(const MPI_Comm in_mpi_comm,const std::string& inFilename,
-                         const std::string& inDatanameState, const std::string& inDatanameRhs,
-                         const std::vector<real_number>& in_spatial_limit_per_proc)
+    particles_input_hdf5(
+            const MPI_Comm in_mpi_comm,
+            const std::string& inFilename,
+            const std::string& inDatanameState,
+            const std::string& inDatanameRhs,
+            const std::vector<real_number>& in_spatial_limit_per_proc)
         : filename(inFilename),
-          mpi_comm(in_mpi_comm), my_rank(-1), nb_processes(-1), nb_total_particles(0),
+          mpi_comm(in_mpi_comm),
+          my_rank(-1),
+          nb_processes(-1),
+          total_number_of_particles(0),
           nb_particles_for_me(0){
         TIMEZONE("particles_input_hdf5");
 
@@ -81,6 +131,7 @@ public:
         assert(plist_id_par >= 0);
         {
             int retTest = H5Pset_fapl_mpio(plist_id_par, mpi_comm, MPI_INFO_NULL);
+            variable_used_only_in_assert(retTest);
             assert(retTest >= 0);
         }
 
@@ -100,13 +151,17 @@ public:
 
             std::vector<hsize_t> state_dim_array(space_dim);
             int hdfret = H5Sget_simple_extent_dims(dspace, &state_dim_array[0], NULL);
+            variable_used_only_in_assert(hdfret);
             assert(hdfret >= 0);
             // Last value is the position dim of the particles
             assert(state_dim_array.back() == size_particle_positions);
 
-            nb_total_particles = 1;
+            // compute total number of particles, store initial condition array shape
+            total_number_of_particles = 1;
+            particle_file_layout.resize(state_dim_array.size()-1);
             for (size_t idx_dim = 0; idx_dim < state_dim_array.size()-1; ++idx_dim){
-                nb_total_particles *= state_dim_array[idx_dim];
+                total_number_of_particles *= state_dim_array[idx_dim];
+                particle_file_layout[idx_dim] = state_dim_array[idx_dim];
             }
 
             hdfret = H5Sclose(dspace);
@@ -128,6 +183,7 @@ public:
 
             // Chichi comment: wouldn't &rhs_dim_array.front() be safer?
             int hdfret = H5Sget_simple_extent_dims(dspace, &rhs_dim_array[0], NULL);
+            variable_used_only_in_assert(hdfret);
             assert(hdfret >= 0);
             assert(rhs_dim_array.back() == size_particle_rhs);
             // Chichi comment: this assertion will fail in general
@@ -140,30 +196,36 @@ public:
             assert(hdfret >= 0);
         }
 
-        particles_utils::IntervalSplitter<hsize_t> load_splitter(nb_total_particles, nb_processes, my_rank);
+        particles_utils::IntervalSplitter<hsize_t> load_splitter(total_number_of_particles, nb_processes, my_rank);
 
         static_assert(std::is_same<real_number, double>::value
                       || std::is_same<real_number, float>::value, "real_number must be double or float");
         const hid_t type_id = (sizeof(real_number) == 8?H5T_NATIVE_DOUBLE:H5T_NATIVE_FLOAT);
 
         /// Load the data
-        std::unique_ptr<real_number[]> split_particles_positions(new real_number[load_splitter.getMySize()*size_particle_positions]);
+        std::unique_ptr<real_number[]> split_particles_positions;
+        if(load_splitter.getMySize()){
+            split_particles_positions.reset(new real_number[load_splitter.getMySize()*size_particle_positions]);
+        }
+
         {
             TIMEZONE("state-read");
             hid_t dset = H5Dopen(particle_file, inDatanameState.c_str(), H5P_DEFAULT);
             assert(dset >= 0);
 
-            hid_t rspace = H5Dget_space(dset);
+            hsize_t file_space_dims[2] = {total_number_of_particles, size_particle_positions};
+            hid_t rspace = H5Screate_simple(2, file_space_dims, NULL);
             assert(rspace >= 0);
 
             hsize_t offset[2] = {load_splitter.getMyOffset(), 0};
-            hsize_t mem_dims[2] = {load_splitter.getMySize(), 3};
+            hsize_t mem_dims[2] = {load_splitter.getMySize(), size_particle_positions};
 
             hid_t mspace = H5Screate_simple(2, &mem_dims[0], NULL);
             assert(mspace >= 0);
 
             int rethdf = H5Sselect_hyperslab(rspace, H5S_SELECT_SET, offset,
                                              NULL, mem_dims, NULL);
+            variable_used_only_in_assert(rethdf);
             assert(rethdf >= 0);
             rethdf = H5Dread(dset, type_id, mspace, rspace, H5P_DEFAULT, split_particles_positions.get());
             assert(rethdf >= 0);
@@ -178,12 +240,14 @@ public:
             TIMEZONE("rhs-read");
             hid_t dset = H5Dopen(particle_file, inDatanameRhs.c_str(), H5P_DEFAULT);
             assert(dset >= 0);
+            hsize_t file_space_dims[3] = {nb_rhs, total_number_of_particles, size_particle_rhs};
+            hid_t rspace = H5Screate_simple(3, file_space_dims, NULL);
+            assert(rspace >= 0);
 
             for(hsize_t idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){
-                hid_t rspace = H5Dget_space(dset);
-                assert(rspace >= 0);
-
-                split_particles_rhs[idx_rhs].reset(new real_number[load_splitter.getMySize()*size_particle_rhs]);
+                if(load_splitter.getMySize()){
+                    split_particles_rhs[idx_rhs].reset(new real_number[load_splitter.getMySize()*size_particle_rhs]);
+                }
 
                 hsize_t offset[3] = {idx_rhs, load_splitter.getMyOffset(), 0};
                 hsize_t mem_dims[3] = {1, load_splitter.getMySize(), size_particle_rhs};
@@ -193,21 +257,26 @@ public:
 
                 int rethdf = H5Sselect_hyperslab( rspace, H5S_SELECT_SET, offset,
                                                  NULL, mem_dims, NULL);
+                variable_used_only_in_assert(rethdf);
                 assert(rethdf >= 0);
                 rethdf = H5Dread(dset, type_id, mspace, rspace, H5P_DEFAULT, split_particles_rhs[idx_rhs].get());
                 assert(rethdf >= 0);
 
                 rethdf = H5Sclose(mspace);
                 assert(rethdf >= 0);
-
-                rethdf = H5Sclose(rspace);
-                assert(rethdf >= 0);
             }
-            int rethdf = H5Dclose(dset);
+
+            int rethdf = H5Sclose(rspace);
+            assert(rethdf >= 0);
+            rethdf = H5Dclose(dset);
+            variable_used_only_in_assert(rethdf);
             assert(rethdf >= 0);
         }
 
-        std::unique_ptr<partsize_t[]> split_particles_indexes(new partsize_t[load_splitter.getMySize()]);
+        std::unique_ptr<partsize_t[]> split_particles_indexes;
+        if(load_splitter.getMySize()){
+            split_particles_indexes.reset(new partsize_t[load_splitter.getMySize()]);
+        }
         for(partsize_t idx_part = 0 ; idx_part < partsize_t(load_splitter.getMySize()) ; ++idx_part){
             split_particles_indexes[idx_part] = idx_part + partsize_t(load_splitter.getMyOffset());
         }
@@ -227,7 +296,7 @@ public:
                                                 &split_particles_positions[previousOffset*size_particle_positions],
                                                  partsize_t(load_splitter.getMySize())-previousOffset,
                                                  [&](const real_number val[]){
-                    const real_number shiftPos = val[IDX_Z]-spatial_box_offset;
+                    const real_number shiftPos = val[IDXC_Z]-spatial_box_offset;
                     const real_number nbRepeat = floor(shiftPos/spatial_box_width);
                     const real_number posInBox = shiftPos - (spatial_box_width*nbRepeat);
                     return posInBox < limitPartitionShifted;
@@ -254,17 +323,23 @@ public:
             // nb_particles_per_processes cannot be used after due to move
             nb_particles_for_me = exchanger.getTotalToRecv();
 
-            my_particles_positions.reset(new real_number[exchanger.getTotalToRecv()*size_particle_positions]);
+            if(nb_particles_for_me){
+                my_particles_positions.reset(new real_number[exchanger.getTotalToRecv()*size_particle_positions]);
+            }
             exchanger.alltoallv<real_number>(split_particles_positions.get(), my_particles_positions.get(), size_particle_positions);
-            split_particles_positions.release();
+            delete[] split_particles_positions.release();
 
-            my_particles_indexes.reset(new partsize_t[exchanger.getTotalToRecv()]);
+            if(nb_particles_for_me){
+                my_particles_indexes.reset(new partsize_t[exchanger.getTotalToRecv()]);
+            }
             exchanger.alltoallv<partsize_t>(split_particles_indexes.get(), my_particles_indexes.get());
-            split_particles_indexes.release();
+            delete[] split_particles_indexes.release();
 
             my_particles_rhs.resize(nb_rhs);
             for(int idx_rhs = 0 ; idx_rhs < int(nb_rhs) ; ++idx_rhs){
-                my_particles_rhs[idx_rhs].reset(new real_number[exchanger.getTotalToRecv()*size_particle_rhs]);
+                if(nb_particles_for_me){
+                    my_particles_rhs[idx_rhs].reset(new real_number[exchanger.getTotalToRecv()*size_particle_rhs]);
+                }
                 exchanger.alltoallv<real_number>(split_particles_rhs[idx_rhs].get(), my_particles_rhs[idx_rhs].get(), size_particle_rhs);
             }
         }
@@ -272,6 +347,7 @@ public:
         {
             TIMEZONE("close");
             int hdfret = H5Fclose(particle_file);
+            variable_used_only_in_assert(hdfret);
             assert(hdfret >= 0);
             hdfret = H5Pclose(plist_id_par);
             assert(hdfret >= 0);
@@ -282,7 +358,7 @@ public:
     }
 
     partsize_t getTotalNbParticles() final{
-        return partsize_t(nb_total_particles);
+        return partsize_t(total_number_of_particles);
     }
 
     partsize_t getLocalNbParticles() final{
@@ -294,7 +370,7 @@ public:
     }
 
     std::unique_ptr<real_number[]> getMyParticles() final {
-        assert(my_particles_positions != nullptr);
+        assert(my_particles_positions != nullptr || nb_particles_for_me == 0);
         return std::move(my_particles_positions);
     }
 
@@ -304,9 +380,13 @@ public:
     }
 
     std::unique_ptr<partsize_t[]> getMyParticlesIndexes() final {
-        assert(my_particles_indexes != nullptr);
+        assert(my_particles_indexes != nullptr || nb_particles_for_me == 0);
         return std::move(my_particles_indexes);
     }
+
+    std::vector<hsize_t> getParticleFileLayout(){
+        return std::vector<hsize_t>(this->particle_file_layout);
+    }
 };
 
 #endif
diff --git a/bfps/cpp/particles/particles_output_hdf5.hpp b/cpp/particles/particles_output_hdf5.hpp
similarity index 71%
rename from bfps/cpp/particles/particles_output_hdf5.hpp
rename to cpp/particles/particles_output_hdf5.hpp
index bc0a03690293668203dd78978680fdea03ab3a28..6be651799f1bf98e3215cc3b0988b77975706b19 100644
--- a/bfps/cpp/particles/particles_output_hdf5.hpp
+++ b/cpp/particles/particles_output_hdf5.hpp
@@ -1,3 +1,28 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
 #ifndef PARTICLES_OUTPUT_HDF5_HPP
 #define PARTICLES_OUTPUT_HDF5_HPP
 
@@ -10,21 +35,19 @@
 
 template <class partsize_t,
           class real_number,
-          int size_particle_positions,
-          int size_particle_rhs>
+          int size_particle_positions>
 class particles_output_hdf5 : public abstract_particles_output<partsize_t,
                                                                real_number,
-                                                               size_particle_positions,
-                                                               size_particle_rhs>{
+                                                               size_particle_positions>{
     using Parent = abstract_particles_output<partsize_t,
                                              real_number,
-                                             size_particle_positions,
-                                             size_particle_rhs>;
+                                             size_particle_positions>;
 
-    const std::string particle_species_name;
+    std::string particle_species_name;
 
     hid_t file_id;
     const partsize_t total_nb_particles;
+    std::vector<hsize_t> particle_file_layout;   // to hold the shape of initial condition array
 
     hid_t dset_id_state;
     hid_t dset_id_rhs;
@@ -39,8 +62,7 @@ public:
                           const bool in_use_collective_io = false)
             : abstract_particles_output<partsize_t,
                                         real_number,
-                                        size_particle_positions,
-                                        size_particle_rhs>(
+                                        size_particle_positions>(
                                                 in_mpi_com,
                                                 inTotalNbParticles,
                                                 in_nb_rhs),
@@ -63,6 +85,7 @@ public:
                     plist_id_par,
                     Parent::getComWriter(),
                     MPI_INFO_NULL);
+            variable_used_only_in_assert(retTest);
             assert(retTest >= 0);
 
             // Parallel HDF5 write
@@ -90,11 +113,18 @@ public:
 
     ~particles_output_hdf5(){}
 
+    void update_particle_species_name(
+            const std::string new_name)
+    {
+        this->particle_species_name.assign(new_name);
+    }
+
     int close_file(void){
         if(Parent::isInvolved()){
             TIMEZONE("particles_output_hdf5::close_file");
 
             int rethdf = H5Gclose(dset_id_state);
+            variable_used_only_in_assert(rethdf);
             assert(rethdf >= 0);
 
             rethdf = H5Gclose(dset_id_rhs);
@@ -177,7 +207,8 @@ public:
             const real_number* particles_positions,
             const std::unique_ptr<real_number[]>* particles_rhs,
             const partsize_t nb_particles,
-            const partsize_t particles_idx_offset) final{
+            const partsize_t particles_idx_offset,
+            const int size_particle_rhs) final{
         assert(Parent::isInvolved());
 
         TIMEZONE("particles_output_hdf5::write");
@@ -194,16 +225,14 @@ public:
         assert(plist_id >= 0);
         {
             int rethdf = H5Pset_dxpl_mpio(plist_id, use_collective_io ? H5FD_MPIO_COLLECTIVE : H5FD_MPIO_INDEPENDENT);
+            variable_used_only_in_assert(rethdf);
             assert(rethdf >= 0);
         }
 
         {
-            assert(total_nb_particles >= 0);
-            assert(size_particle_positions >= 0);
-            const hsize_t datacount[2] = {
-                hsize_t(total_nb_particles),
-                hsize_t(size_particle_positions)};
-            hid_t dataspace = H5Screate_simple(2, datacount, NULL);
+            std::vector<hsize_t> datacount = std::vector<hsize_t>(this->particle_file_layout);
+            datacount.push_back(size_particle_positions);
+            hid_t dataspace = H5Screate_simple(datacount.size(), &datacount.front(), NULL);
             assert(dataspace >= 0);
 
             hid_t dataset_id = H5Dcreate( dset_id_state,
@@ -222,7 +251,12 @@ public:
             hid_t memspace = H5Screate_simple(2, count, NULL);
             assert(memspace >= 0);
 
-            hid_t filespace = H5Dget_space(dataset_id);
+            assert(total_nb_particles >= 0);
+            assert(size_particle_positions >= 0);
+            const hsize_t file_count[2] = {hsize_t(total_nb_particles), size_particle_positions};
+            hid_t filespace = H5Screate_simple(2, file_count, NULL);
+            assert(filespace >= 0);
+
             int rethdf = H5Sselect_hyperslab(
                     filespace,
                     H5S_SELECT_SET,
@@ -230,6 +264,7 @@ public:
                     NULL,
                     count,
                     NULL);
+            variable_used_only_in_assert(rethdf);
             assert(rethdf >= 0);
 
             herr_t	status = H5Dwrite(
@@ -239,6 +274,7 @@ public:
                     filespace,
                     plist_id,
                     particles_positions);
+            variable_used_only_in_assert(status);
             assert(status >= 0);
             rethdf = H5Sclose(memspace);
             assert(rethdf >= 0);
@@ -249,10 +285,10 @@ public:
         }
         {
             assert(size_particle_rhs >= 0);
-            const hsize_t datacount[3] = {hsize_t(Parent::getNbRhs()),
-                                          hsize_t(total_nb_particles),
-                                          hsize_t(size_particle_rhs)};
-            hid_t dataspace = H5Screate_simple(3, datacount, NULL);
+            std::vector<hsize_t> datacount = std::vector<hsize_t>(this->particle_file_layout);
+            datacount.insert(datacount.begin(), hsize_t(Parent::getNbRhs()));
+            datacount.push_back(size_particle_positions);
+            hid_t dataspace = H5Screate_simple(datacount.size(), &datacount.front(), NULL);
             assert(dataspace >= 0);
 
             hid_t dataset_id = H5Dcreate( dset_id_rhs,
@@ -277,8 +313,12 @@ public:
                 hid_t memspace = H5Screate_simple(3, count, NULL);
                 assert(memspace >= 0);
 
-                hid_t filespace = H5Dget_space(dataset_id);
+                assert(total_nb_particles >= 0);
+                assert(size_particle_positions >= 0);
+                const hsize_t file_count[3] = {hsize_t(Parent::getNbRhs()), hsize_t(total_nb_particles), size_particle_positions};
+                hid_t filespace = H5Screate_simple(3, file_count, NULL);
                 assert(filespace >= 0);
+
                 int rethdf = H5Sselect_hyperslab(
                         filespace,
                         H5S_SELECT_SET,
@@ -286,6 +326,7 @@ public:
                         NULL,
                         count,
                         NULL);
+                variable_used_only_in_assert(rethdf);
                 assert(rethdf >= 0);
 
                 herr_t	status = H5Dwrite(
@@ -295,6 +336,7 @@ public:
                         filespace,
                         plist_id,
                         particles_rhs[idx_rhs].get());
+                variable_used_only_in_assert(status);
                 assert(status >= 0);
                 rethdf = H5Sclose(filespace);
                 assert(rethdf >= 0);
@@ -302,14 +344,27 @@ public:
                 assert(rethdf >= 0);
             }
             int rethdf = H5Dclose(dataset_id);
+            variable_used_only_in_assert(rethdf);
             assert(rethdf >= 0);
         }
 
         {
             int rethdf = H5Pclose(plist_id);
+            variable_used_only_in_assert(rethdf);
             assert(rethdf >= 0);
         }
     }
+
+    int setParticleFileLayout(std::vector<hsize_t> input_layout){
+        this->particle_file_layout.resize(input_layout.size());
+        for (unsigned int i=0; i<this->particle_file_layout.size(); i++)
+            this->particle_file_layout[i] = input_layout[i];
+        return EXIT_SUCCESS;
+    }
+
+    std::vector<hsize_t> getParticleFileLayout(void){
+        return std::vector<hsize_t>(this->particle_file_layout);
+    }
 };
 
 #endif//PARTICLES_OUTPUT_HDF5_HPP
diff --git a/bfps/cpp/particles/particles_output_mpiio.hpp b/cpp/particles/particles_output_mpiio.hpp
similarity index 68%
rename from bfps/cpp/particles/particles_output_mpiio.hpp
rename to cpp/particles/particles_output_mpiio.hpp
index 77dae6ca2f9441948ccf04f8a72e4a53d249894b..b1c17898c3c2941e0ed161e40113a0d13c99b524 100644
--- a/bfps/cpp/particles/particles_output_mpiio.hpp
+++ b/cpp/particles/particles_output_mpiio.hpp
@@ -1,3 +1,28 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
 #ifndef PARTICLES_OUTPUT_MPIIO
 #define PARTICLES_OUTPUT_MPIIO
 
@@ -11,8 +36,8 @@
 #include "particles_utils.hpp"
 
 template <class partsize_t, class real_number, int size_particle_positions, int size_particle_rhs>
-class particles_output_mpiio : public abstract_particles_output<partsize_t, real_number, size_particle_positions, size_particle_rhs>{
-    using Parent = abstract_particles_output<partsize_t, real_number, size_particle_positions, size_particle_rhs>;
+class particles_output_mpiio : public abstract_particles_output<partsize_t, real_number, size_particle_positions>{
+    using Parent = abstract_particles_output<partsize_t, real_number, size_particle_positions>;
 
     const std::string filename;
     const int nb_step_prealloc;
@@ -24,7 +49,7 @@ class particles_output_mpiio : public abstract_particles_output<partsize_t, real
 public:
     particles_output_mpiio(MPI_Comm in_mpi_com, const std::string in_filename, const partsize_t inTotalNbParticles,
                            const int in_nb_rhs, const int in_nb_step_prealloc = -1)
-            : abstract_particles_output<partsize_t, real_number, size_particle_positions, size_particle_rhs>(in_mpi_com, inTotalNbParticles, in_nb_rhs),
+            : abstract_particles_output<partsize_t, real_number, size_particle_positions>(in_mpi_com, inTotalNbParticles, in_nb_rhs),
               filename(in_filename), nb_step_prealloc(in_nb_step_prealloc), current_step_in_file(0){
         if(Parent::isInvolved()){
             {
diff --git a/cpp/particles/particles_output_sampling_hdf5.hpp b/cpp/particles/particles_output_sampling_hdf5.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ea3c94dcec25572dd324c643d9cc80468ba51680
--- /dev/null
+++ b/cpp/particles/particles_output_sampling_hdf5.hpp
@@ -0,0 +1,293 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#ifndef PARTICLES_OUTPUT_SAMPLING_HDF5_HPP
+#define PARTICLES_OUTPUT_SAMPLING_HDF5_HPP
+
+#include "abstract_particles_output.hpp"
+
+#include <hdf5.h>
+
+template <class partsize_t,
+          class real_number,
+          int size_particle_positions>
+class particles_output_sampling_hdf5 : public abstract_particles_output<
+                                       partsize_t,
+                                       real_number,
+                                       size_particle_positions>{
+    using Parent = abstract_particles_output<partsize_t,
+                                             real_number,
+                                             size_particle_positions>;
+
+    hid_t file_id, pgroup_id;
+
+    std::string dataset_name;
+    std::vector<hsize_t> particle_file_layout;   // to hold the shape of initial condition array
+    const bool use_collective_io;
+
+public:
+    static bool DatasetExistsCol(MPI_Comm in_mpi_com,
+                                  const std::string& in_filename,
+                                  const std::string& in_groupname,
+                                 const std::string& in_dataset_name){
+        int my_rank;
+        AssertMpi(MPI_Comm_rank(in_mpi_com, &my_rank));
+
+        int dataset_exists = -1;
+
+        if(my_rank == 0){
+            hid_t file_id = H5Fopen(
+                    in_filename.c_str(),
+                    H5F_ACC_RDWR | H5F_ACC_DEBUG,
+                    H5P_DEFAULT);
+            assert(file_id >= 0);
+
+            dataset_exists = H5Lexists(
+                    file_id,
+                    (in_groupname + "/" + in_dataset_name).c_str(),
+                    H5P_DEFAULT);
+
+            int retTest = H5Fclose(file_id);
+            assert(retTest >= 0);
+        }
+
+        AssertMpi(MPI_Bcast( &dataset_exists, 1, MPI_INT, 0, in_mpi_com ));
+        return dataset_exists;
+    }
+
+    particles_output_sampling_hdf5(
+            MPI_Comm in_mpi_com,
+            const partsize_t inTotalNbParticles,
+            const std::string& in_filename,
+            const std::string& in_groupname,
+            const std::string& in_dataset_name,
+            const bool in_use_collective_io = false)
+            : Parent(in_mpi_com, inTotalNbParticles, 1),
+              dataset_name(in_dataset_name),
+              use_collective_io(in_use_collective_io){
+        if(Parent::isInvolved()){
+            // prepare parallel MPI access property list
+            hid_t plist_id_par = H5Pcreate(H5P_FILE_ACCESS);
+            assert(plist_id_par >= 0);
+            int retTest = H5Pset_fapl_mpio(
+                    plist_id_par,
+                    Parent::getComWriter(),
+                    MPI_INFO_NULL);
+            variable_used_only_in_assert(retTest);
+            assert(retTest >= 0);
+
+            // open file for parallel HDF5 access
+            file_id = H5Fopen(
+                    in_filename.c_str(),
+                    H5F_ACC_RDWR | H5F_ACC_DEBUG,
+                    plist_id_par);
+            assert(file_id >= 0);
+            retTest = H5Pclose(plist_id_par);
+            assert(retTest >= 0);
+
+            // open group
+            pgroup_id = H5Gopen(
+                    file_id,
+                    in_groupname.c_str(),
+                    H5P_DEFAULT);
+            assert(pgroup_id >= 0);
+        }
+    }
+
+    ~particles_output_sampling_hdf5(){
+        if(Parent::isInvolved()){
+            // close group
+            int retTest = H5Gclose(pgroup_id);
+            variable_used_only_in_assert(retTest);
+            assert(retTest >= 0);
+            // close file
+            retTest = H5Fclose(file_id);
+            assert(retTest >= 0);
+        }
+    }
+
+    int switch_to_group(
+            const std::string &in_groupname)
+    {
+        if(Parent::isInvolved()){
+            // close old group
+            int retTest = H5Gclose(pgroup_id);
+            variable_used_only_in_assert(retTest);
+            assert(retTest >= 0);
+
+            // open new group
+            pgroup_id = H5Gopen(
+                    file_id,
+                    in_groupname.c_str(),
+                    H5P_DEFAULT);
+            assert(pgroup_id >= 0);
+        }
+        return EXIT_SUCCESS;
+    }
+
+    template <int size_particle_rhs>
+    int save_dataset(
+            const std::string& in_groupname,
+            const std::string& in_dataset_name,
+            const real_number input_particles_positions[],
+            const std::unique_ptr<real_number[]> input_particles_rhs[],
+            const partsize_t index_particles[],
+            const partsize_t nb_particles,
+            const int idx_time_step)
+    {
+        // update group
+        int retTest = this->switch_to_group(
+                in_groupname);
+        variable_used_only_in_assert(retTest);
+        assert(retTest == EXIT_SUCCESS);
+        // update dataset name
+        dataset_name = in_dataset_name + "/" + std::to_string(idx_time_step);
+        int dataset_exists;
+        if (this->getMyRank() == 0)
+            dataset_exists = H5Lexists(
+                pgroup_id,
+                dataset_name.c_str(),
+                H5P_DEFAULT);
+        AssertMpi(MPI_Bcast(&dataset_exists, 1, MPI_INT, 0, this->getCom()));
+        if (dataset_exists == 0)
+            this->template save<size_particle_rhs>(
+                input_particles_positions,
+                input_particles_rhs,
+                index_particles,
+                nb_particles,
+                idx_time_step);
+        return EXIT_SUCCESS;
+    }
+
+    void write(
+            const int /*idx_time_step*/,
+            const real_number* /*particles_positions*/,
+            const std::unique_ptr<real_number[]>* particles_rhs,
+            const partsize_t nb_particles,
+            const partsize_t particles_idx_offset,
+            const int size_particle_rhs) final{
+        assert(Parent::isInvolved());
+
+        TIMEZONE("particles_output_hdf5::write");
+
+        assert(particles_idx_offset < Parent::getTotalNbParticles() ||
+               (particles_idx_offset == Parent::getTotalNbParticles() &&
+                nb_particles == 0));
+        assert(particles_idx_offset+nb_particles <= Parent::getTotalNbParticles());
+
+        static_assert(std::is_same<real_number, double>::value ||
+                      std::is_same<real_number, float>::value,
+                      "real_number must be double or float");
+        const hid_t type_id = (sizeof(real_number) == 8 ?
+                               H5T_NATIVE_DOUBLE :
+                               H5T_NATIVE_FLOAT);
+
+        hid_t plist_id = H5Pcreate(H5P_DATASET_XFER);
+        assert(plist_id >= 0);
+        {
+            int rethdf = H5Pset_dxpl_mpio(
+                    plist_id,
+                    (use_collective_io ?
+                     H5FD_MPIO_COLLECTIVE :
+                     H5FD_MPIO_INDEPENDENT));
+            variable_used_only_in_assert(rethdf);
+            assert(rethdf >= 0);
+        }
+        {
+            assert(size_particle_rhs >= 0);
+            std::vector<hsize_t> datacount = std::vector<hsize_t>(this->particle_file_layout);
+            datacount.push_back(size_particle_positions);
+            hid_t dataspace = H5Screate_simple(datacount.size(), &datacount.front(), NULL);
+            assert(dataspace >= 0);
+
+            hid_t dataset_id = H5Dcreate( pgroup_id,
+                                          dataset_name.c_str(),
+                                          type_id,
+                                          dataspace,
+                                          H5P_DEFAULT,
+                                          H5P_DEFAULT,
+                                          H5P_DEFAULT);
+            assert(dataset_id >= 0);
+
+            assert(particles_idx_offset >= 0);
+            const hsize_t count[2] = {
+                hsize_t(nb_particles),
+                hsize_t(size_particle_rhs)};
+            const hsize_t offset[2] = {
+                hsize_t(particles_idx_offset),
+                0};
+            hid_t memspace = H5Screate_simple(2, count, NULL);
+            assert(memspace >= 0);
+
+            const hsize_t file_count[2] = {hsize_t(Parent::getTotalNbParticles()), hsize_t(size_particle_rhs)};
+            hid_t filespace = H5Screate_simple(2, file_count, NULL);
+            assert(filespace >= 0);
+            int rethdf = H5Sselect_hyperslab(
+                    filespace,
+                    H5S_SELECT_SET,
+                    offset,
+                    NULL,
+                    count,
+                    NULL);
+            variable_used_only_in_assert(rethdf);
+            assert(rethdf >= 0);
+
+            herr_t	status = H5Dwrite(
+                    dataset_id,
+                    type_id,
+                    memspace,
+                    filespace,
+                    plist_id,
+                    particles_rhs[0].get());
+            variable_used_only_in_assert(status);
+            assert(status >= 0);
+            rethdf = H5Sclose(filespace);
+            assert(rethdf >= 0);
+            rethdf = H5Sclose(memspace);
+            assert(rethdf >= 0);
+            rethdf = H5Dclose(dataset_id);
+            assert(rethdf >= 0);
+        }
+
+        {
+            int rethdf = H5Pclose(plist_id);
+            variable_used_only_in_assert(rethdf);
+            assert(rethdf >= 0);
+        }
+    }
+
+    int setParticleFileLayout(std::vector<hsize_t> input_layout){
+        this->particle_file_layout.resize(input_layout.size());
+        for (unsigned int i=0; i<this->particle_file_layout.size(); i++)
+            this->particle_file_layout[i] = input_layout[i];
+        return EXIT_SUCCESS;
+    }
+
+    std::vector<hsize_t> getParticleFileLayout(void){
+        return std::vector<hsize_t>(this->particle_file_layout);
+    }
+};
+
+#endif
diff --git a/cpp/particles/particles_sampling.hpp b/cpp/particles/particles_sampling.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..672c080aea1b59e250109f94fd5fee388e199755
--- /dev/null
+++ b/cpp/particles/particles_sampling.hpp
@@ -0,0 +1,109 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#ifndef PARTICLES_SAMPLING_HPP
+#define PARTICLES_SAMPLING_HPP
+
+#include <memory>
+#include <string>
+
+#include "abstract_particles_system.hpp"
+#include "particles_output_sampling_hdf5.hpp"
+
+#include "field.hpp"
+#include "kspace.hpp"
+
+
+template <class partsize_t, class particles_rnumber, class rnumber, field_backend be, field_components fc>
+void sample_from_particles_system(const field<rnumber, be, fc>& in_field, // a pointer to a field<rnumber, FFTW, fc>
+                                  std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>& ps, // a pointer to an particles_system<double>
+                                  const std::string& filename,
+                                  const std::string& parent_groupname,
+                                  const std::string& fname){
+    const std::string datasetname = fname + std::string("/") + std::to_string(ps->get_step_idx());
+    const int size_particle_rhs = ncomp(fc);
+
+    // Stop here if already exists
+    if(particles_output_sampling_hdf5<partsize_t, particles_rnumber, 3>::DatasetExistsCol(MPI_COMM_WORLD,
+                                                                                          filename,
+                                                                                          parent_groupname,
+                                                                                          datasetname)){
+        return;
+    }
+
+    const partsize_t nb_particles = ps->getLocalNbParticles();
+    std::unique_ptr<particles_rnumber[]> sample_rhs(new particles_rnumber[size_particle_rhs*nb_particles]);
+    std::fill_n(sample_rhs.get(), size_particle_rhs*nb_particles, 0);
+
+    ps->sample_compute_field(in_field, sample_rhs.get());
+
+
+
+    particles_output_sampling_hdf5<partsize_t, particles_rnumber, 3> outputclass(MPI_COMM_WORLD,
+                                                                                 ps->getGlobalNbParticles(),
+                                                                                 filename,
+                                                                                 parent_groupname,
+                                                                                 datasetname);
+    outputclass.template save<size_particle_rhs>(ps->getParticlesState(),
+                     &sample_rhs,
+                     ps->getParticlesIndexes(),
+                     ps->getLocalNbParticles(),
+                     ps->get_step_idx());
+}
+
+template <class partsize_t, class particles_rnumber>
+void sample_particles_system_position(
+        std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>& ps, // a pointer to an particles_system<double>
+                                  const std::string& filename,
+                                  const std::string& parent_groupname,
+                                  const std::string& fname){
+    const std::string datasetname = fname + std::string("/") + std::to_string(ps->get_step_idx());
+
+    // Stop here if already exists
+    if(particles_output_sampling_hdf5<partsize_t, particles_rnumber, 3>::DatasetExistsCol(MPI_COMM_WORLD,
+                                                                                          filename,
+                                                                                          parent_groupname,
+                                                                                          datasetname)){
+        return;
+    }
+
+    const partsize_t nb_particles = ps->getLocalNbParticles();
+    std::unique_ptr<particles_rnumber[]> sample_rhs(new particles_rnumber[3*nb_particles]);
+    std::copy(ps->getParticlesState(), ps->getParticlesState() + 3*nb_particles, sample_rhs.get());
+
+    particles_output_sampling_hdf5<partsize_t, particles_rnumber, 3> outputclass(MPI_COMM_WORLD,
+                                                                                 ps->getGlobalNbParticles(),
+                                                                                 filename,
+                                                                                 parent_groupname,
+                                                                                 datasetname);
+    outputclass.template save<3>(ps->getParticlesState(),
+                     &sample_rhs,
+                     ps->getParticlesIndexes(),
+                     ps->getLocalNbParticles(),
+                     ps->get_step_idx());
+}
+
+#endif//PARTICLES_SAMPLING_HPP
+
diff --git a/bfps/cpp/particles/particles_system.hpp b/cpp/particles/particles_system.hpp
similarity index 55%
rename from bfps/cpp/particles/particles_system.hpp
rename to cpp/particles/particles_system.hpp
index 02767a8b433ecb8365f4a0577d1c0d6508c2bed1..a05175ca52c4c4b669f29f893913b3d7fcf6c484 100644
--- a/bfps/cpp/particles/particles_system.hpp
+++ b/cpp/particles/particles_system.hpp
@@ -1,3 +1,28 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
 #ifndef PARTICLES_SYSTEM_HPP
 #define PARTICLES_SYSTEM_HPP
 
@@ -12,9 +37,13 @@
 #include "particles_adams_bashforth.hpp"
 #include "scope_timer.hpp"
 
+#include "p2p_distr_mpi.hpp"
+
 template <class partsize_t, class real_number, class field_rnumber, class field_class, class interpolator_class, int interp_neighbours,
-          int size_particle_rhs>
+          int size_particle_positions, int size_particle_rhs, class p2p_computer_class, class particles_inner_computer_class>
 class particles_system : public abstract_particles_system<partsize_t, real_number> {
+    static_assert(size_particle_positions >= 3, "There should be at least the positions X,Y,Z in the state");
+
     MPI_Comm mpi_com;
 
     const std::pair<int,int> current_partition_interval;
@@ -24,12 +53,12 @@ class particles_system : public abstract_particles_system<partsize_t, real_numbe
 
     particles_distr_mpi<partsize_t, real_number> particles_distr;
 
-    particles_adams_bashforth<partsize_t, real_number, 3, size_particle_rhs> positions_updater;
+    particles_adams_bashforth<partsize_t, real_number, size_particle_positions, size_particle_rhs> positions_updater;
 
     using computer_class = particles_field_computer<partsize_t, real_number, interpolator_class, interp_neighbours>;
     computer_class computer;
 
-    field_class default_field;
+    const field_class& default_field;
 
     std::unique_ptr<partsize_t[]> current_my_nb_particles_per_partition;
     std::unique_ptr<partsize_t[]> current_offset_particles_for_partition;
@@ -44,9 +73,14 @@ class particles_system : public abstract_particles_system<partsize_t, real_numbe
     partsize_t my_nb_particles;
     const partsize_t total_nb_particles;
     std::vector<std::unique_ptr<real_number[]>> my_particles_rhs;
+    std::vector<hsize_t> particle_file_layout;
 
     int step_idx;
 
+    p2p_distr_mpi<partsize_t, real_number> distr_p2p;
+    p2p_computer_class computer_p2p;
+    particles_inner_computer_class computer_particules_inner;
+
 public:
     particles_system(const std::array<size_t,3>& field_grid_dim, const std::array<real_number,3>& in_spatial_box_width,
                      const std::array<real_number,3>& in_spatial_box_offset,
@@ -57,9 +91,12 @@ public:
                      const field_class& in_field,
                      MPI_Comm in_mpi_com,
                      const partsize_t in_total_nb_particles,
+                     const real_number in_cutoff,
+                     p2p_computer_class in_computer_p2p,
+                     particles_inner_computer_class in_computer_particules_inner,
                      const int in_current_iteration = 1)
         : mpi_com(in_mpi_com),
-          current_partition_interval({in_local_field_offset[IDX_Z], in_local_field_offset[IDX_Z] + in_local_field_dims[IDX_Z]}),
+          current_partition_interval({in_local_field_offset[IDXC_Z], in_local_field_offset[IDXC_Z] + in_local_field_dims[IDXC_Z]}),
           partition_interval_size(current_partition_interval.second - current_partition_interval.first),
           interpolator(),
           particles_distr(in_mpi_com, current_partition_interval,field_grid_dim),
@@ -69,7 +106,9 @@ public:
           default_field(in_field),
           spatial_box_width(in_spatial_box_width), spatial_partition_width(in_spatial_partition_width),
           my_spatial_low_limit(in_my_spatial_low_limit), my_spatial_up_limit(in_my_spatial_up_limit),
-          my_nb_particles(0), total_nb_particles(in_total_nb_particles), step_idx(in_current_iteration){
+          my_nb_particles(0), total_nb_particles(in_total_nb_particles), step_idx(in_current_iteration),
+          distr_p2p(in_mpi_com, current_partition_interval,field_grid_dim, spatial_box_width, in_spatial_box_offset, in_cutoff),
+          computer_p2p(std::move(in_computer_p2p)), computer_particules_inner(std::move(in_computer_particules_inner)){
 
         current_my_nb_particles_per_partition.reset(new partsize_t[partition_interval_size]);
         current_offset_particles_for_partition.reset(new partsize_t[partition_interval_size+1]);
@@ -87,15 +126,16 @@ public:
         my_nb_particles = particles_input.getLocalNbParticles();
 
         for(partsize_t idx_part = 0 ; idx_part < my_nb_particles ; ++idx_part){ // TODO remove me
-            const int partition_level = computer.pbc_field_layer(my_particles_positions[idx_part*3+IDX_Z], IDX_Z);
+            const int partition_level = computer.pbc_field_layer(my_particles_positions[idx_part*size_particle_positions+IDXC_Z], IDXC_Z);
+            variable_used_only_in_assert(partition_level);
             assert(partition_level >= current_partition_interval.first);
             assert(partition_level < current_partition_interval.second);
         }
 
-        particles_utils::partition_extra_z<partsize_t, 3>(&my_particles_positions[0], my_nb_particles, partition_interval_size,
+        particles_utils::partition_extra_z<partsize_t, size_particle_positions>(&my_particles_positions[0], my_nb_particles, partition_interval_size,
                                               current_my_nb_particles_per_partition.get(), current_offset_particles_for_partition.get(),
         [&](const real_number& z_pos){
-            const int partition_level = computer.pbc_field_layer(z_pos, IDX_Z);
+            const int partition_level = computer.pbc_field_layer(z_pos, IDXC_Z);
             assert(current_partition_interval.first <= partition_level && partition_level < current_partition_interval.second);
             return partition_level - current_partition_interval.first;
         },
@@ -114,16 +154,15 @@ public:
                 assert(current_my_nb_particles_per_partition[idxPartition] ==
                        current_offset_particles_for_partition[idxPartition+1] - current_offset_particles_for_partition[idxPartition]);
                 for(partsize_t idx = current_offset_particles_for_partition[idxPartition] ; idx < current_offset_particles_for_partition[idxPartition+1] ; ++idx){
-                    assert(computer.pbc_field_layer(my_particles_positions[idx*3+IDX_Z], IDX_Z)-current_partition_interval.first == idxPartition);
+                    assert(computer.pbc_field_layer(my_particles_positions[idx*size_particle_positions+IDXC_Z], IDXC_Z)-current_partition_interval.first == idxPartition);
                 }
             }
         }
     }
 
-
     void compute() final {
         TIMEZONE("particles_system::compute");
-        particles_distr.template compute_distr<computer_class, field_class, 3, size_particle_rhs>(
+        particles_distr.template compute_distr<computer_class, field_class, size_particle_positions, size_particle_rhs>(
                                computer, default_field,
                                current_my_nb_particles_per_partition.get(),
                                my_particles_positions.get(),
@@ -131,11 +170,64 @@ public:
                                interp_neighbours);
     }
 
+    void compute_p2p() final {
+        // TODO P2P
+        if(computer_p2p.isEnable() == true){
+            TIMEZONE("particles_system::compute_p2p");
+            distr_p2p.template compute_distr<p2p_computer_class, size_particle_positions, size_particle_rhs>(
+                            computer_p2p, current_my_nb_particles_per_partition.get(),
+                            my_particles_positions.get(), my_particles_rhs.front().get(),
+                            my_particles_positions_indexes.get());
+        }
+    }
+
+    void compute_particles_inner() final {
+        if(computer_particules_inner.isEnable() == true){
+            TIMEZONE("particles_system::compute_particles_inner");
+            computer_particules_inner.template compute_interaction<size_particle_positions, size_particle_rhs>(
+                            my_nb_particles, my_particles_positions.get(), my_particles_rhs.front().get());
+        }
+    }
+
+    void add_Lagrange_multipliers() final {
+        if(computer_particules_inner.isEnable() == true){
+            TIMEZONE("particles_system::add_Lagrange_multipliers");
+            computer_particules_inner.template add_Lagrange_multipliers<size_particle_positions, size_particle_rhs>(
+                            my_nb_particles, my_particles_positions.get(), my_particles_rhs.front().get());
+        }
+    }
+
+    void enforce_unit_orientation() final {
+        if(computer_particules_inner.isEnable() == true){
+            TIMEZONE("particles_system::enforce_unit_orientation");
+            computer_particules_inner.template enforce_unit_orientation<size_particle_positions>(
+                            my_nb_particles, my_particles_positions.get());
+        }
+    }
+
+    void compute_sphere_particles_inner(const real_number particle_extra_field[]) final {
+        if(computer_particules_inner.isEnable() == true){
+            TIMEZONE("particles_system::compute_sphere_particles_inner");
+            computer_particules_inner.template compute_interaction_with_extra<size_particle_positions, size_particle_rhs, 3>(
+                            my_nb_particles, my_particles_positions.get(), my_particles_rhs.front().get(),
+                            particle_extra_field);
+        }
+    }
+
+    void compute_ellipsoid_particles_inner(const real_number particle_extra_field[]) final {
+        if(computer_particules_inner.isEnable() == true){
+            TIMEZONE("particles_system::compute_ellipsoid_particles_inner");
+            computer_particules_inner.template compute_interaction_with_extra<size_particle_positions, size_particle_rhs, 9>(
+                            my_nb_particles, my_particles_positions.get(), my_particles_rhs.front().get(),
+                            particle_extra_field);
+        }
+    }
+
     template <class sample_field_class, int sample_size_particle_rhs>
     void sample_compute(const sample_field_class& sample_field,
                         real_number sample_rhs[]) {
         TIMEZONE("particles_system::compute");
-        particles_distr.template compute_distr<computer_class, sample_field_class, 3, sample_size_particle_rhs>(
+        particles_distr.template compute_distr<computer_class, sample_field_class, size_particle_positions, sample_size_particle_rhs>(
                                computer, sample_field,
                                current_my_nb_particles_per_partition.get(),
                                my_particles_positions.get(),
@@ -179,7 +271,7 @@ public:
 
     void redistribute() final {
         TIMEZONE("particles_system::redistribute");
-        particles_distr.template redistribute<computer_class, 3, size_particle_rhs, 1>(
+        particles_distr.template redistribute<computer_class, size_particle_positions, size_particle_rhs, 1>(
                               computer,
                               current_my_nb_particles_per_partition.get(),
                               &my_nb_particles,
@@ -210,16 +302,61 @@ public:
     void completeLoop(const real_number dt) final {
         TIMEZONE("particles_system::completeLoop");
         compute();
+        compute_p2p();
+        compute_particles_inner();
         move(dt);
+        enforce_unit_orientation();
         redistribute();
         inc_step_idx();
         shift_rhs_vectors();
     }
 
-    const real_number* getParticlesPositions() const final {
+    void completeLoopWithVorticity(
+            const real_number dt,
+            const real_number particle_extra_field[]) final {
+        TIMEZONE("particles_system::completeLoopWithVorticity");
+        compute();
+        compute_p2p();
+        compute_sphere_particles_inner(particle_extra_field);
+        move(dt);
+        enforce_unit_orientation();
+        redistribute();
+        inc_step_idx();
+        shift_rhs_vectors();
+    }
+
+    void completeLoopWithVelocityGradient(
+            const real_number dt,
+            const real_number particle_extra_field[]) final {
+        TIMEZONE("particles_system::completeLoopWithVelocityGradient");
+        compute();
+        compute_p2p();
+        compute_ellipsoid_particles_inner(particle_extra_field);
+        move(dt);
+        enforce_unit_orientation();
+        redistribute();
+        inc_step_idx();
+        shift_rhs_vectors();
+    }
+
+    const real_number* getParticlesState() const final {
         return my_particles_positions.get();
     }
 
+    std::unique_ptr<real_number[]> extractParticlesState(const int firstState, const int lastState) const final {
+        const int nbStates = std::max(0,(std::min(lastState,size_particle_positions)-firstState));
+
+        std::unique_ptr<real_number[]> stateExtract(new real_number[my_nb_particles*nbStates]);
+
+        for(partsize_t idx_part = 0 ; idx_part < my_nb_particles ; ++idx_part){
+            for(int idxState = 0 ; idxState < nbStates ; ++idxState){
+                stateExtract[idx_part*nbStates + idxState] = my_particles_positions[idx_part*size_particle_positions + idxState+firstState];
+            }
+        }
+
+        return stateExtract;
+    }
+
     const std::unique_ptr<real_number[]>* getParticlesRhs() const final {
         return my_particles_rhs.data();
     }
@@ -240,11 +377,22 @@ public:
         return int(my_particles_rhs.size());
     }
 
+    int setParticleFileLayout(std::vector<hsize_t> input_layout) final{
+        this->particle_file_layout.resize(input_layout.size());
+        for (unsigned int i=0; i<this->particle_file_layout.size(); i++)
+            this->particle_file_layout[i] = input_layout[i];
+        return EXIT_SUCCESS;
+    }
+
+    std::vector<hsize_t> getParticleFileLayout(void) final{
+        return std::vector<hsize_t>(this->particle_file_layout);
+    }
+
     void checkNan() const { // TODO remove
         for(partsize_t idx_part = 0 ; idx_part < my_nb_particles ; ++idx_part){ // TODO remove me
-            assert(std::isnan(my_particles_positions[idx_part*3+IDX_X]) == false);
-            assert(std::isnan(my_particles_positions[idx_part*3+IDX_Y]) == false);
-            assert(std::isnan(my_particles_positions[idx_part*3+IDX_Z]) == false);
+            assert(std::isnan(my_particles_positions[idx_part*size_particle_positions+IDXC_X]) == false);
+            assert(std::isnan(my_particles_positions[idx_part*size_particle_positions+IDXC_Y]) == false);
+            assert(std::isnan(my_particles_positions[idx_part*size_particle_positions+IDXC_Z]) == false);
 
             for(int idx_rhs = 0 ; idx_rhs < my_particles_rhs.size() ; ++idx_rhs){
                 for(int idx_rhs_val = 0 ; idx_rhs_val < size_particle_rhs ; ++idx_rhs_val){
diff --git a/bfps/cpp/particles/particles_system_builder.hpp b/cpp/particles/particles_system_builder.hpp
similarity index 57%
rename from bfps/cpp/particles/particles_system_builder.hpp
rename to cpp/particles/particles_system_builder.hpp
index 7a2d49c07c3a6de21fb93d83b338609be858f0dc..6a6f4a26f3f0f08703b1a1ed8857f2445b641ac7 100644
--- a/bfps/cpp/particles/particles_system_builder.hpp
+++ b/cpp/particles/particles_system_builder.hpp
@@ -1,12 +1,40 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
 #ifndef PARTICLES_SYSTEM_BUILDER_HPP
 #define PARTICLES_SYSTEM_BUILDER_HPP
 
 #include <string>
 
+#include <cmath>
 #include "abstract_particles_system.hpp"
 #include "particles_system.hpp"
 #include "particles_input_hdf5.hpp"
 #include "particles_generic_interp.hpp"
+#include "p2p_computer_empty.hpp"
+#include "particles_inner_computer_empty.hpp"
 
 #include "field.hpp"
 #include "kspace.hpp"
@@ -108,7 +136,8 @@ inline RetType evaluate(IterType1 value1, IterType2 value2, Args... args){
 ///
 //////////////////////////////////////////////////////////////////////////////
 
-template <class partsize_t, class field_rnumber, field_backend be, field_components fc, class particles_rnumber>
+template <class partsize_t, class field_rnumber, field_backend be, field_components fc, class particles_rnumber, class p2p_computer_class,
+          class particles_inner_computer_class, int size_particle_positions, int size_particle_rhs>
 struct particles_system_build_container {
     template <const int interpolation_size, const int spline_mode>
     static std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>> instanciate(
@@ -119,25 +148,28 @@ struct particles_system_build_container {
              const std::string& fname_input, // particles input filename
             const std::string& inDatanameState, const std::string& inDatanameRhs, // input dataset names
              MPI_Comm mpi_comm,
-            const int in_current_iteration){
+            const int in_current_iteration,
+            p2p_computer_class p2p_computer,
+            particles_inner_computer_class inner_computer,
+            const particles_rnumber cutoff = std::numeric_limits<particles_rnumber>::max()){
 
         // The size of the field grid (global size) all_size seems
         std::array<size_t,3> field_grid_dim;
-        field_grid_dim[IDX_X] = fs_field->rlayout->sizes[FIELD_IDX_X];// nx
-        field_grid_dim[IDX_Y] = fs_field->rlayout->sizes[FIELD_IDX_Y];// nx
-        field_grid_dim[IDX_Z] = fs_field->rlayout->sizes[FIELD_IDX_Z];// nz
+        field_grid_dim[IDXC_X] = fs_field->rlayout->sizes[IDXV_X];// nx
+        field_grid_dim[IDXC_Y] = fs_field->rlayout->sizes[IDXV_Y];// nx
+        field_grid_dim[IDXC_Z] = fs_field->rlayout->sizes[IDXV_Z];// nz
 
         // The size of the local field grid (the field nodes that belong to current process)
         std::array<size_t,3> local_field_dims;
-        local_field_dims[IDX_X] = fs_field->rlayout->subsizes[FIELD_IDX_X];
-        local_field_dims[IDX_Y] = fs_field->rlayout->subsizes[FIELD_IDX_Y];
-        local_field_dims[IDX_Z] = fs_field->rlayout->subsizes[FIELD_IDX_Z];
+        local_field_dims[IDXC_X] = fs_field->rlayout->subsizes[IDXV_X];
+        local_field_dims[IDXC_Y] = fs_field->rlayout->subsizes[IDXV_Y];
+        local_field_dims[IDXC_Z] = fs_field->rlayout->subsizes[IDXV_Z];
 
         // The offset of the local field grid
         std::array<size_t,3> local_field_offset;
-        local_field_offset[IDX_X] = fs_field->rlayout->starts[FIELD_IDX_X];
-        local_field_offset[IDX_Y] = fs_field->rlayout->starts[FIELD_IDX_Y];
-        local_field_offset[IDX_Z] = fs_field->rlayout->starts[FIELD_IDX_Z];
+        local_field_offset[IDXC_X] = fs_field->rlayout->starts[IDXV_X];
+        local_field_offset[IDXC_Y] = fs_field->rlayout->starts[IDXV_Y];
+        local_field_offset[IDXC_Z] = fs_field->rlayout->starts[IDXV_Z];
 
 
         // Retreive split from fftw to know processes that have no work
@@ -145,57 +177,60 @@ struct particles_system_build_container {
         AssertMpi(MPI_Comm_rank(mpi_comm, &my_rank));
         AssertMpi(MPI_Comm_size(mpi_comm, &nb_processes));
 
-        const int split_step = (int(field_grid_dim[IDX_Z])+nb_processes-1)/nb_processes;
-        const int nb_processes_involved = (int(field_grid_dim[IDX_Z])+split_step-1)/split_step;
+        const int split_step = (int(field_grid_dim[IDXC_Z])+nb_processes-1)/nb_processes;
+        const int nb_processes_involved = (int(field_grid_dim[IDXC_Z])+split_step-1)/split_step;
 
-        assert((my_rank < nb_processes_involved && local_field_dims[IDX_Z] != 0)
-               || (nb_processes_involved <= my_rank && local_field_dims[IDX_Z] == 0));
-        assert(nb_processes_involved <= int(field_grid_dim[IDX_Z]));
+        assert((my_rank < nb_processes_involved && local_field_dims[IDXC_Z] != 0)
+               || (nb_processes_involved <= my_rank && local_field_dims[IDXC_Z] == 0));
+        assert(nb_processes_involved <= int(field_grid_dim[IDXC_Z]));
 
         // Make the idle processes starting from the limit (and not 0 as set by fftw)
         if(nb_processes_involved <= my_rank){
-            local_field_offset[IDX_Z] = field_grid_dim[IDX_Z];
+            local_field_offset[IDXC_Z] = field_grid_dim[IDXC_Z];
         }
 
         // Ensure that 1D partitioning is used
         {
-            assert(local_field_offset[IDX_X] == 0);
-            assert(local_field_offset[IDX_Y] == 0);
-            assert(local_field_dims[IDX_X] == field_grid_dim[IDX_X]);
-            assert(local_field_dims[IDX_Y] == field_grid_dim[IDX_Y]);
-
-            assert(my_rank >= nb_processes_involved || ((my_rank == 0 && local_field_offset[IDX_Z] == 0)
-                   || (my_rank != 0 && local_field_offset[IDX_Z] != 0)));
-            assert(my_rank >= nb_processes_involved || ((my_rank == nb_processes_involved-1 && local_field_offset[IDX_Z]+local_field_dims[IDX_Z] == field_grid_dim[IDX_Z])
-                   || (my_rank != nb_processes_involved-1 && local_field_offset[IDX_Z]+local_field_dims[IDX_Z] != field_grid_dim[IDX_Z])));
+            assert(local_field_offset[IDXC_X] == 0);
+            assert(local_field_offset[IDXC_Y] == 0);
+            assert(local_field_dims[IDXC_X] == field_grid_dim[IDXC_X]);
+            assert(local_field_dims[IDXC_Y] == field_grid_dim[IDXC_Y]);
+
+            assert(my_rank >= nb_processes_involved || ((my_rank == 0 && local_field_offset[IDXC_Z] == 0)
+                   || (my_rank != 0 && local_field_offset[IDXC_Z] != 0)));
+            assert(my_rank >= nb_processes_involved || ((my_rank == nb_processes_involved-1 && local_field_offset[IDXC_Z]+local_field_dims[IDXC_Z] == field_grid_dim[IDXC_Z])
+                   || (my_rank != nb_processes_involved-1 && local_field_offset[IDXC_Z]+local_field_dims[IDXC_Z] != field_grid_dim[IDXC_Z])));
         }
 
         // The spatial box size (all particles should be included inside)
         std::array<particles_rnumber,3> spatial_box_width;
-        spatial_box_width[IDX_X] = 4 * acos(0) / (fs_kk->dkx);
-        spatial_box_width[IDX_Y] = 4 * acos(0) / (fs_kk->dky);
-        spatial_box_width[IDX_Z] = 4 * acos(0) / (fs_kk->dkz);
+        spatial_box_width[IDXC_X] = 4 * acos(0) / (fs_kk->dkx);
+        spatial_box_width[IDXC_Y] = 4 * acos(0) / (fs_kk->dky);
+        spatial_box_width[IDXC_Z] = 4 * acos(0) / (fs_kk->dkz);
 
         // Box is in the corner
         std::array<particles_rnumber,3> spatial_box_offset;
-        spatial_box_offset[IDX_X] = 0;
-        spatial_box_offset[IDX_Y] = 0;
-        spatial_box_offset[IDX_Z] = 0;
+        spatial_box_offset[IDXC_X] = 0;
+        spatial_box_offset[IDXC_Y] = 0;
+        spatial_box_offset[IDXC_Z] = 0;
 
         // The distance between two field nodes in z
         std::array<particles_rnumber,3> spatial_partition_width;
-        spatial_partition_width[IDX_X] = spatial_box_width[IDX_X]/particles_rnumber(field_grid_dim[IDX_X]);
-        spatial_partition_width[IDX_Y] = spatial_box_width[IDX_Y]/particles_rnumber(field_grid_dim[IDX_Y]);
-        spatial_partition_width[IDX_Z] = spatial_box_width[IDX_Z]/particles_rnumber(field_grid_dim[IDX_Z]);
+        spatial_partition_width[IDXC_X] = spatial_box_width[IDXC_X]/particles_rnumber(field_grid_dim[IDXC_X]);
+        spatial_partition_width[IDXC_Y] = spatial_box_width[IDXC_Y]/particles_rnumber(field_grid_dim[IDXC_Y]);
+        spatial_partition_width[IDXC_Z] = spatial_box_width[IDXC_Z]/particles_rnumber(field_grid_dim[IDXC_Z]);
         // The spatial interval of the current process
-        const particles_rnumber my_spatial_low_limit_z = particles_rnumber(local_field_offset[IDX_Z])*spatial_partition_width[IDX_Z];
-        const particles_rnumber my_spatial_up_limit_z = particles_rnumber(local_field_offset[IDX_Z]+local_field_dims[IDX_Z])*spatial_partition_width[IDX_Z];
+        const particles_rnumber my_spatial_low_limit_z = particles_rnumber(local_field_offset[IDXC_Z])*spatial_partition_width[IDXC_Z];
+        const particles_rnumber my_spatial_up_limit_z = particles_rnumber(local_field_offset[IDXC_Z]+local_field_dims[IDXC_Z])*spatial_partition_width[IDXC_Z];
 
         // Create the particles system
         using particles_system_type = particles_system<partsize_t, particles_rnumber, field_rnumber,
                                                        field<field_rnumber, be, fc>,
                                                        particles_generic_interp<particles_rnumber, interpolation_size,spline_mode>,
-                                                       interpolation_size, ncomp(fc)>;
+                                                       interpolation_size,
+                                                       size_particle_positions, size_particle_rhs,
+                                                       p2p_computer_class,
+                                                       particles_inner_computer_class>;
         particles_system_type* part_sys = new particles_system_type(field_grid_dim,
                                                spatial_box_width,
                                                spatial_box_offset,
@@ -207,10 +242,14 @@ struct particles_system_build_container {
                                                (*fs_field),
                                                mpi_comm,
                                                nparticles,
+                                               cutoff,
+                                               p2p_computer,
+                                               inner_computer,
                                                in_current_iteration);
 
+        // TODO P2P load particle data too
         // Load particles from hdf5
-        particles_input_hdf5<partsize_t, particles_rnumber, 3,3> generator(mpi_comm, fname_input,
+        particles_input_hdf5<partsize_t, particles_rnumber, size_particle_positions, size_particle_rhs> generator(mpi_comm, fname_input,
                                             inDatanameState, inDatanameRhs, my_spatial_low_limit_z, my_spatial_up_limit_z);
 
         // Ensure parameters match the input file
@@ -229,6 +268,9 @@ struct particles_system_build_container {
 
         assert(part_sys->getNbRhs() == nsteps);
 
+        // store particle file layout
+        part_sys->setParticleFileLayout(generator.getParticleFileLayout());
+
         // Return the created particles system
         return std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>(part_sys);
     }
@@ -250,10 +292,44 @@ inline std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>
     return Template_double_for_if::evaluate<std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>,
                        int, 1, 11, 1, // interpolation_size
                        int, 0, 3, 1, // spline_mode
-                       particles_system_build_container<partsize_t, field_rnumber,be,fc,particles_rnumber>>(
+                       particles_system_build_container<partsize_t, field_rnumber,be,fc,particles_rnumber,
+                                                        p2p_computer_empty<particles_rnumber,partsize_t>,
+                                                        particles_inner_computer_empty<particles_rnumber,partsize_t>,
+                                                        3,3>>(
+                           interpolation_size, // template iterator 1
+                           spline_mode, // template iterator 2
+                           fs_field,fs_kk, nsteps, nparticles, fname_input, inDatanameState, inDatanameRhs, mpi_comm, in_current_iteration,
+                           p2p_computer_empty<particles_rnumber,partsize_t>(), particles_inner_computer_empty<particles_rnumber,partsize_t>());
+}
+
+template <class partsize_t, class field_rnumber, field_backend be, field_components fc,
+          class p2p_computer_class, class particles_inner_computer_class,
+          class particles_rnumber = double>
+inline std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>> particles_system_builder_with_p2p(
+        const field<field_rnumber, be, fc>* fs_field, // (field object)
+        const kspace<be, SMOOTH>* fs_kk, // (kspace object, contains dkx, dky, dkz)
+        const int nsteps, // to check coherency between parameters and hdf input file (nb rhs)
+        const partsize_t nparticles, // to check coherency between parameters and hdf input file
+        const std::string& fname_input, // particles input filename
+        const std::string& inDatanameState, const std::string& inDatanameRhs, // input dataset names
+        const int interpolation_size,
+        const int spline_mode,
+        MPI_Comm mpi_comm,
+        const int in_current_iteration,
+        p2p_computer_class p2p_computer,
+        particles_inner_computer_class inner_computer,
+        const particles_rnumber cutoff){
+    return Template_double_for_if::evaluate<std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>,
+                       int, 1, 11, 1, // interpolation_size
+                       int, 0, 3, 1, // spline_mode
+                       particles_system_build_container<partsize_t, field_rnumber,be,fc,particles_rnumber,
+                                                        p2p_computer_class,
+                                                        particles_inner_computer_class,
+                                                        6,6>>(
                            interpolation_size, // template iterator 1
                            spline_mode, // template iterator 2
-                           fs_field,fs_kk, nsteps, nparticles, fname_input, inDatanameState, inDatanameRhs, mpi_comm, in_current_iteration);
+                           fs_field,fs_kk, nsteps, nparticles, fname_input, inDatanameState, inDatanameRhs, mpi_comm, in_current_iteration,
+                           std::move(p2p_computer), std::move(inner_computer), cutoff);
 }
 
 
diff --git a/bfps/cpp/particles/particles_utils.hpp b/cpp/particles/particles_utils.hpp
similarity index 82%
rename from bfps/cpp/particles/particles_utils.hpp
rename to cpp/particles/particles_utils.hpp
index 146dc4399477b72c30329edff587d35d7b44d69d..f1e0c790cd9c02ffb714bb555455662134346ee4 100644
--- a/bfps/cpp/particles/particles_utils.hpp
+++ b/cpp/particles/particles_utils.hpp
@@ -1,3 +1,28 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
 #ifndef PARTICLES_UTILS_HPP
 #define PARTICLES_UTILS_HPP
 
@@ -19,16 +44,28 @@
 #define AssertMpi(X) if(MPI_SUCCESS != (X)) { printf("MPI Error at line %d\n",__LINE__); fflush(stdout) ; throw std::runtime_error("Stop from from mpi erro"); }
 #endif
 
-enum IDXS_3D {
-    IDX_X = 0,
-    IDX_Y = 1,
-    IDX_Z = 2
+enum IDX_COMPONENT_3D {
+    IDXC_X = 0,
+    IDXC_Y = 1,
+    IDXC_Z = 2
+};
+
+enum IDX_COMPONENT_DEL_3D {
+    IDXC_DX_X = 0,
+    IDXC_DX_Y = 1,
+    IDXC_DX_Z = 2,
+    IDXC_DY_X = 3,
+    IDXC_DY_Y = 4,
+    IDXC_DY_Z = 5,
+    IDXC_DZ_X = 6,
+    IDXC_DZ_Y = 7,
+    IDXC_DZ_Z = 8,
 };
 
-enum FIELD_IDXS_3D {
-    FIELD_IDX_X = 2,
-    FIELD_IDX_Y = 1,
-    FIELD_IDX_Z = 0
+enum IDX_VARIABLE_3D {
+    IDXV_X = 2,
+    IDXV_Y = 1,
+    IDXV_Z = 0
 };
 
 namespace particles_utils {
@@ -123,7 +160,7 @@ inline void partition_extra_z(real_number* array, const partsize_t size, const i
     if(nb_partitions == 2){
         const partsize_t size_current = partition_extra<partsize_t, nb_values>(array, size,
                 [&](const real_number inval[]){
-            return partitions_levels(inval[IDX_Z]) == 0;
+            return partitions_levels(inval[IDXC_Z]) == 0;
         }, pdcswap);
         partitions_size[0] = size_current;
         partitions_size[1] = size-size_current;
@@ -152,7 +189,7 @@ inline void partition_extra_z(real_number* array, const partsize_t size, const i
             const partsize_t size_current = partition_extra<partsize_t, nb_values>(&array[partitions_offset[current_part.first]*nb_values],
                                                      size_unpart,
                     [&](const real_number inval[]){
-                return partitions_levels(inval[IDX_Z]) <= idx_middle;
+                return partitions_levels(inval[IDXC_Z]) <= idx_middle;
             }, pdcswap, partitions_offset[current_part.first]);
 
             partitions_offset[idx_middle+1] = size_current + partitions_offset[current_part.first];
diff --git a/bfps/cpp/scope_timer.cpp b/cpp/scope_timer.cpp
similarity index 100%
rename from bfps/cpp/scope_timer.cpp
rename to cpp/scope_timer.cpp
diff --git a/bfps/cpp/scope_timer.hpp b/cpp/scope_timer.hpp
similarity index 99%
rename from bfps/cpp/scope_timer.hpp
rename to cpp/scope_timer.hpp
index 2c48e2eda06ded74e668825181f0444eef22f647..890f522c415d7a102a0fff25c5292502cbcb459c 100644
--- a/bfps/cpp/scope_timer.hpp
+++ b/cpp/scope_timer.hpp
@@ -791,7 +791,8 @@ extern EventManager global_timer_manager;
 
 #define TIMEZONE(NAME)                                                      \
   ScopeEvent TIMEZONE_Core_Pre_Merge(____TIMEZONE_AUTO_ID, __LINE__)( \
-      NAME, global_timer_manager, ScopeEventUniqueKey);
+      NAME, global_timer_manager, ScopeEventUniqueKey); \
+  DEBUG_MSG((NAME + std::string("\n")).c_str());
 #define TIMEZONE_MULTI_REF(NAME)                                            \
   ScopeEvent TIMEZONE_Core_Pre_Merge(____TIMEZONE_AUTO_ID, __LINE__)( \
       NAME, global_timer_manager, ScopeEventMultiRefKey);
diff --git a/bfps/cpp/shared_array.hpp b/cpp/shared_array.hpp
similarity index 62%
rename from bfps/cpp/shared_array.hpp
rename to cpp/shared_array.hpp
index 1951e2f9838ccf37367d859206453d3db91e8e19..0245dc5df81e5bd1511b57583b9a4a86745a5d2c 100644
--- a/bfps/cpp/shared_array.hpp
+++ b/cpp/shared_array.hpp
@@ -1,3 +1,28 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2016 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
 #ifndef SHAREDARRAY_HPP
 #define SHAREDARRAY_HPP
 
diff --git a/cpp/spline.hpp b/cpp/spline.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ef990088566ec10f0bbf10937980705ffeb570dc
--- /dev/null
+++ b/cpp/spline.hpp
@@ -0,0 +1,40 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2017 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+
+#ifndef SPLINE_HPP
+#define SPLINE_HPP
+
+#include "spline_n1.hpp"
+#include "spline_n2.hpp"
+#include "spline_n3.hpp"
+#include "spline_n4.hpp"
+#include "spline_n5.hpp"
+#include "spline_n6.hpp"
+#include "spline_n7.hpp"
+#include "spline_n8.hpp"
+#include "spline_n9.hpp"
+#include "spline_n10.hpp"
+
+#endif
diff --git a/bfps/cpp/spline_n1.cpp b/cpp/spline_n1.cpp
similarity index 100%
rename from bfps/cpp/spline_n1.cpp
rename to cpp/spline_n1.cpp
diff --git a/bfps/cpp/spline_n1.hpp b/cpp/spline_n1.hpp
similarity index 100%
rename from bfps/cpp/spline_n1.hpp
rename to cpp/spline_n1.hpp
diff --git a/bfps/cpp/spline_n10.cpp b/cpp/spline_n10.cpp
similarity index 100%
rename from bfps/cpp/spline_n10.cpp
rename to cpp/spline_n10.cpp
diff --git a/bfps/cpp/spline_n10.hpp b/cpp/spline_n10.hpp
similarity index 100%
rename from bfps/cpp/spline_n10.hpp
rename to cpp/spline_n10.hpp
diff --git a/bfps/cpp/spline_n2.cpp b/cpp/spline_n2.cpp
similarity index 100%
rename from bfps/cpp/spline_n2.cpp
rename to cpp/spline_n2.cpp
diff --git a/bfps/cpp/spline_n2.hpp b/cpp/spline_n2.hpp
similarity index 100%
rename from bfps/cpp/spline_n2.hpp
rename to cpp/spline_n2.hpp
diff --git a/bfps/cpp/spline_n3.cpp b/cpp/spline_n3.cpp
similarity index 100%
rename from bfps/cpp/spline_n3.cpp
rename to cpp/spline_n3.cpp
diff --git a/bfps/cpp/spline_n3.hpp b/cpp/spline_n3.hpp
similarity index 100%
rename from bfps/cpp/spline_n3.hpp
rename to cpp/spline_n3.hpp
diff --git a/bfps/cpp/spline_n4.cpp b/cpp/spline_n4.cpp
similarity index 100%
rename from bfps/cpp/spline_n4.cpp
rename to cpp/spline_n4.cpp
diff --git a/bfps/cpp/spline_n4.hpp b/cpp/spline_n4.hpp
similarity index 100%
rename from bfps/cpp/spline_n4.hpp
rename to cpp/spline_n4.hpp
diff --git a/bfps/cpp/spline_n5.cpp b/cpp/spline_n5.cpp
similarity index 100%
rename from bfps/cpp/spline_n5.cpp
rename to cpp/spline_n5.cpp
diff --git a/bfps/cpp/spline_n5.hpp b/cpp/spline_n5.hpp
similarity index 100%
rename from bfps/cpp/spline_n5.hpp
rename to cpp/spline_n5.hpp
diff --git a/bfps/cpp/spline_n6.cpp b/cpp/spline_n6.cpp
similarity index 100%
rename from bfps/cpp/spline_n6.cpp
rename to cpp/spline_n6.cpp
diff --git a/bfps/cpp/spline_n6.hpp b/cpp/spline_n6.hpp
similarity index 100%
rename from bfps/cpp/spline_n6.hpp
rename to cpp/spline_n6.hpp
diff --git a/bfps/cpp/spline_n7.cpp b/cpp/spline_n7.cpp
similarity index 100%
rename from bfps/cpp/spline_n7.cpp
rename to cpp/spline_n7.cpp
diff --git a/bfps/cpp/spline_n7.hpp b/cpp/spline_n7.hpp
similarity index 100%
rename from bfps/cpp/spline_n7.hpp
rename to cpp/spline_n7.hpp
diff --git a/bfps/cpp/spline_n8.cpp b/cpp/spline_n8.cpp
similarity index 100%
rename from bfps/cpp/spline_n8.cpp
rename to cpp/spline_n8.cpp
diff --git a/bfps/cpp/spline_n8.hpp b/cpp/spline_n8.hpp
similarity index 100%
rename from bfps/cpp/spline_n8.hpp
rename to cpp/spline_n8.hpp
diff --git a/bfps/cpp/spline_n9.cpp b/cpp/spline_n9.cpp
similarity index 100%
rename from bfps/cpp/spline_n9.cpp
rename to cpp/spline_n9.cpp
diff --git a/bfps/cpp/spline_n9.hpp b/cpp/spline_n9.hpp
similarity index 100%
rename from bfps/cpp/spline_n9.hpp
rename to cpp/spline_n9.hpp
diff --git a/bfps/cpp/vorticity_equation.cpp b/cpp/vorticity_equation.cpp
similarity index 74%
rename from bfps/cpp/vorticity_equation.cpp
rename to cpp/vorticity_equation.cpp
index 737db2c47e89624065f3d29a1657575bac5ea786..ead9345af5a2f0555e7fa6e2b6ee45cecd9f3624 100644
--- a/bfps/cpp/vorticity_equation.cpp
+++ b/cpp/vorticity_equation.cpp
@@ -26,12 +26,14 @@
 
 #define NDEBUG
 
+#include <limits>
 #include <cassert>
 #include <cmath>
 #include <cstring>
 #include "fftw_tools.hpp"
 #include "vorticity_equation.hpp"
 #include "scope_timer.hpp"
+#include "shared_array.hpp"
 
 
 
@@ -151,6 +153,7 @@ vorticity_equation<rnumber, be>::vorticity_equation(
     this->nu = 0.1;
     this->fmode = 1;
     this->famplitude = 1.0;
+    this->friction_coefficient = 1.0;
     this->fk0  = 2.0;
     this->fk1 = 4.0;
 }
@@ -188,13 +191,6 @@ void vorticity_equation<rnumber, be>::compute_vorticity()
             this->cvorticity->cval(cindex,1,1) =  (this->kk->kz[zindex]*this->u->cval(cindex,0,0) - this->kk->kx[xindex]*this->u->cval(cindex,2,0));
             this->cvorticity->cval(cindex,2,0) = -(this->kk->kx[xindex]*this->u->cval(cindex,1,1) - this->kk->ky[yindex]*this->u->cval(cindex,0,1));
             this->cvorticity->cval(cindex,2,1) =  (this->kk->kx[xindex]*this->u->cval(cindex,1,0) - this->kk->ky[yindex]*this->u->cval(cindex,0,0));
-            //ptrdiff_t tindex = 3*cindex;
-            //this->cvorticity->get_cdata()[tindex+0][0] = -(this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][1] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][1]);
-            //this->cvorticity->get_cdata()[tindex+1][0] = -(this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][1] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][1]);
-            //this->cvorticity->get_cdata()[tindex+2][0] = -(this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][1] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][1]);
-            //this->cvorticity->get_cdata()[tindex+0][1] =  (this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][0] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][0]);
-            //this->cvorticity->get_cdata()[tindex+1][1] =  (this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][0] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][0]);
-            //this->cvorticity->get_cdata()[tindex+2][1] =  (this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][0] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][0]);
         }
         else
             std::fill_n((rnumber*)(this->cvorticity->get_cdata()+3*cindex), 6, 0.0);
@@ -223,13 +219,6 @@ void vorticity_equation<rnumber, be>::compute_velocity(field<rnumber, be, THREE>
             this->u->cval(cindex,1,1) =  (this->kk->kz[zindex]*vorticity->cval(cindex,0,0) - this->kk->kx[xindex]*vorticity->cval(cindex,2,0)) / k2;
             this->u->cval(cindex,2,0) = -(this->kk->kx[xindex]*vorticity->cval(cindex,1,1) - this->kk->ky[yindex]*vorticity->cval(cindex,0,1)) / k2;
             this->u->cval(cindex,2,1) =  (this->kk->kx[xindex]*vorticity->cval(cindex,1,0) - this->kk->ky[yindex]*vorticity->cval(cindex,0,0)) / k2;
-            //ptrdiff_t tindex = 3*cindex;
-            //this->u->get_cdata()[tindex+0][0] = -(this->kk->ky[yindex]*vorticity->get_cdata()[tindex+2][1] - this->kk->kz[zindex]*vorticity->get_cdata()[tindex+1][1]) / k2;
-            //this->u->get_cdata()[tindex+0][1] =  (this->kk->ky[yindex]*vorticity->get_cdata()[tindex+2][0] - this->kk->kz[zindex]*vorticity->get_cdata()[tindex+1][0]) / k2;
-            //this->u->get_cdata()[tindex+1][0] = -(this->kk->kz[zindex]*vorticity->get_cdata()[tindex+0][1] - this->kk->kx[xindex]*vorticity->get_cdata()[tindex+2][1]) / k2;
-            //this->u->get_cdata()[tindex+1][1] =  (this->kk->kz[zindex]*vorticity->get_cdata()[tindex+0][0] - this->kk->kx[xindex]*vorticity->get_cdata()[tindex+2][0]) / k2;
-            //this->u->get_cdata()[tindex+2][0] = -(this->kk->kx[xindex]*vorticity->get_cdata()[tindex+1][1] - this->kk->ky[yindex]*vorticity->get_cdata()[tindex+0][1]) / k2;
-            //this->u->get_cdata()[tindex+2][1] =  (this->kk->kx[xindex]*vorticity->get_cdata()[tindex+1][0] - this->kk->ky[yindex]*vorticity->get_cdata()[tindex+0][0]) / k2;
         }
         else
             std::fill_n((rnumber*)(this->u->get_cdata()+3*cindex), 6, 0.0);
@@ -238,49 +227,231 @@ void vorticity_equation<rnumber, be>::compute_velocity(field<rnumber, be, THREE>
     this->u->symmetrize();
 }
 
+template <class rnumber,
+          field_backend be>
+void vorticity_equation<rnumber, be>::add_Kolmogorov_forcing(
+        field<rnumber, be, THREE> *dst,
+        int fmode,
+        double famplitude)
+{
+    TIMEZONE("vorticity_equation::add_Kolmogorov_forcing");
+    ptrdiff_t cindex;
+    if (dst->clayout->myrank == dst->clayout->rank[0][fmode])
+    {
+        cindex = dst->get_cindex(0, (fmode - dst->clayout->starts[0]), 0);
+        dst->cval(cindex,2, 0) -= famplitude/2;
+    }
+    if (dst->clayout->myrank == dst->clayout->rank[0][dst->clayout->sizes[0] - fmode])
+    {
+        cindex = dst->get_cindex(0, (dst->clayout->sizes[0] - fmode - dst->clayout->starts[0]), 0);
+        dst->cval(cindex, 2, 0) -= famplitude/2;
+    }
+}
+
+template <class rnumber,
+          field_backend be>
+void vorticity_equation<rnumber, be>::add_field_band(
+        field<rnumber, be, THREE> *dst,
+        field<rnumber, be, THREE> *src,
+        double k0, double k1,
+        double prefactor)
+{
+    TIMEZONE("vorticity_equation::add_field_band");
+    this->kk->CLOOP(
+                [&](ptrdiff_t cindex,
+                    ptrdiff_t xindex,
+                    ptrdiff_t yindex,
+                    ptrdiff_t zindex){
+        double knorm = sqrt(this->kk->kx[xindex]*this->kk->kx[xindex] +
+                            this->kk->ky[yindex]*this->kk->ky[yindex] +
+                            this->kk->kz[zindex]*this->kk->kz[zindex]);
+        if ((k0 <= knorm) &&
+            (k1 >= knorm))
+            for (int c=0; c<3; c++)
+                for (int i=0; i<2; i++)
+                    dst->cval(cindex,c,i) += prefactor*src->cval(cindex,c,i);
+    }
+    );
+}
+
 template <class rnumber,
           field_backend be>
 void vorticity_equation<rnumber, be>::add_forcing(
         field<rnumber, be, THREE> *dst,
-        field<rnumber, be, THREE> *vort_field,
-        rnumber factor)
+        field<rnumber, be, THREE> *vort_field)
 {
     TIMEZONE("vorticity_equation::add_forcing");
-    if (strcmp(this->forcing_type, "none") == 0)
-        return;
     if (strcmp(this->forcing_type, "Kolmogorov") == 0)
     {
-        ptrdiff_t cindex;
-        if (this->cvorticity->clayout->myrank == this->cvorticity->clayout->rank[0][this->fmode])
-        {
-            cindex = ((this->fmode - this->cvorticity->clayout->starts[0]) * this->cvorticity->clayout->sizes[1])*this->cvorticity->clayout->sizes[2];
-            dst->cval(cindex,2, 0) -= this->famplitude*factor/2;
-            //dst->get_cdata()[cindex*3+2][0] -= this->famplitude*factor/2;
-        }
-        if (this->cvorticity->clayout->myrank == this->cvorticity->clayout->rank[0][this->cvorticity->clayout->sizes[0] - this->fmode])
-        {
-            cindex = ((this->cvorticity->clayout->sizes[0] - this->fmode - this->cvorticity->clayout->starts[0]) * this->cvorticity->clayout->sizes[1])*this->cvorticity->clayout->sizes[2];
-            dst->cval(cindex, 2, 0) -= this->famplitude*factor/2;
-            //dst->get_cdata()[cindex*3+2][0] -= this->famplitude*factor/2;
-        }
+        this->add_Kolmogorov_forcing(dst, this->fmode, this->famplitude);
+        return;
+    }
+    if (strcmp(this->forcing_type, "2Kolmogorov") == 0)
+    {
+        // 2 Kolmogorov forces
+        // first one wavenumber fk0, amplitude 1 - A
+        double amplitude = 1 - this->famplitude;
+        int fmode = int(this->fk0 / this->kk->dky);
+        this->add_Kolmogorov_forcing(dst, fmode, amplitude);
+        // second one wavenumber fk1, amplitude A
+        amplitude = this->famplitude * pow(int(this->fk1) / double(int(this->fk0)), 3);
+        fmode = int(this->fk1 / this->kk->dky);
+        this->add_Kolmogorov_forcing(dst, fmode, amplitude);
+        return;
+    }
+    if (strcmp(this->forcing_type, "Kolmogorov_and_drag") == 0)
+    {
+        this->add_Kolmogorov_forcing(dst, this->fmode, this->famplitude);
+        this->add_field_band(
+                dst, vort_field,
+                this->fk0, this->fk1,
+                -this->friction_coefficient);
+        return;
+    }
+    if (strcmp(this->forcing_type, "Kolmogorov_and_compensated_drag") == 0)
+    {
+        double amplitude = this->famplitude * (
+                1 + this->friction_coefficient / sqrt(this->fmode  * this->famplitude));
+        this->add_Kolmogorov_forcing(dst, this->fmode, amplitude);
+        this->add_field_band(
+                dst, vort_field,
+                this->fk0, this->fk1,
+                -this->friction_coefficient);
         return;
     }
     if (strcmp(this->forcing_type, "linear") == 0)
     {
-        this->kk->CLOOP(
+        this->add_field_band(
+                dst, vort_field,
+                this->fk0, this->fk1,
+                this->famplitude);
+        return;
+    }
+    if ((strcmp(this->forcing_type, "fixed_energy_injection_rate") == 0) ||
+        (strcmp(this->forcing_type, "fixed_energy_injection_rate_and_drag") == 0))
+    {
+        // first, compute energy in shell
+        shared_array<double> local_energy_in_shell(1);
+        double energy_in_shell = 0;
+        this->kk->CLOOP_K2_NXMODES(
+                    [&](ptrdiff_t cindex,
+                        ptrdiff_t xindex,
+                        ptrdiff_t yindex,
+                        ptrdiff_t zindex,
+                        double k2,
+                        int nxmodes){
+            double knorm = sqrt(k2);
+            if ((k2 > 0) &&
+                (this->fk0 <= knorm) &&
+                (this->fk1 >= knorm))
+                    *local_energy_in_shell.getMine() += nxmodes*(
+                            vort_field->cval(cindex, 0, 0)*vort_field->cval(cindex, 0, 0) + vort_field->cval(cindex, 0, 1)*vort_field->cval(cindex, 0, 1) +
+                            vort_field->cval(cindex, 1, 0)*vort_field->cval(cindex, 1, 0) + vort_field->cval(cindex, 1, 1)*vort_field->cval(cindex, 1, 1) +
+                            vort_field->cval(cindex, 2, 0)*vort_field->cval(cindex, 2, 0) + vort_field->cval(cindex, 2, 1)*vort_field->cval(cindex, 2, 1)
+                            ) / k2;
+        }
+        );
+        local_energy_in_shell.mergeParallel();
+        MPI_Allreduce(
+                local_energy_in_shell.getMasterData(),
+                &energy_in_shell,
+                1,
+                MPI_DOUBLE,
+                MPI_SUM,
+                vort_field->comm);
+        // we should divide by 2, if we wanted energy;
+        // but then we would need to multiply the amplitude by 2 anyway,
+        // because what we really care about is force dotted into velocity,
+        // without the division by 2.
+
+        // now, modify amplitudes
+        if (energy_in_shell < 10*std::numeric_limits<rnumber>::epsilon())
+            energy_in_shell = 1;
+        double temp_famplitude = this->injection_rate / energy_in_shell;
+        this->add_field_band(
+                dst, vort_field,
+                this->fk0, this->fk1,
+                temp_famplitude);
+        // and add drag if desired
+        if (strcmp(this->forcing_type, "fixed_energy_injection_rate_and_drag") == 0)
+            this->add_field_band(
+                    dst, vort_field,
+                    this->fmode, this->fmode + (this->fk1 - this->fk0),
+                    -this->friction_coefficient);
+        return;
+    }
+    if (strcmp(this->forcing_type, "fixed_energy") == 0)
+        return;
+}
+
+template <class rnumber,
+          field_backend be>
+void vorticity_equation<rnumber, be>::impose_forcing(
+        field<rnumber, be, THREE> *onew,
+        field<rnumber, be, THREE> *oold)
+{
+    TIMEZONE("vorticity_equation::impose_forcing");
+    if (strcmp(this->forcing_type, "fixed_energy") == 0)
+    {
+        // first, compute energy in shell
+        shared_array<double> local_energy_in_shell(1);
+        shared_array<double> local_total_energy(1);
+        double energy_in_shell, total_energy;
+        this->kk->CLOOP_K2_NXMODES(
                     [&](ptrdiff_t cindex,
                         ptrdiff_t xindex,
                         ptrdiff_t yindex,
-                        ptrdiff_t zindex){
-            double knorm = sqrt(this->kk->kx[xindex]*this->kk->kx[xindex] +
-                                this->kk->ky[yindex]*this->kk->ky[yindex] +
-                                this->kk->kz[zindex]*this->kk->kz[zindex]);
+                        ptrdiff_t zindex,
+                        double k2,
+                        int nxmodes){
+            if (k2 > 0)
+            {
+                double mode_energy = nxmodes*(
+                            onew->cval(cindex, 0, 0)*onew->cval(cindex, 0, 0) + onew->cval(cindex, 0, 1)*onew->cval(cindex, 0, 1) +
+                            onew->cval(cindex, 1, 0)*onew->cval(cindex, 1, 0) + onew->cval(cindex, 1, 1)*onew->cval(cindex, 1, 1) +
+                            onew->cval(cindex, 2, 0)*onew->cval(cindex, 2, 0) + onew->cval(cindex, 2, 1)*onew->cval(cindex, 2, 1)
+                            ) / k2;
+                *local_total_energy.getMine() += mode_energy;
+                double knorm = sqrt(k2);
+                if ((this->fk0 <= knorm) && (this->fk1 >= knorm))
+                    *local_energy_in_shell.getMine() += mode_energy;
+            }
+        }
+        );
+        local_total_energy.mergeParallel();
+        local_energy_in_shell.mergeParallel();
+        MPI_Allreduce(
+                local_energy_in_shell.getMasterData(),
+                &energy_in_shell,
+                1,
+                MPI_DOUBLE,
+                MPI_SUM,
+                onew->comm);
+        MPI_Allreduce(
+                local_total_energy.getMasterData(),
+                &total_energy,
+                1,
+                MPI_DOUBLE,
+                MPI_SUM,
+                onew->comm);
+        // divide by 2, because we want energy
+        total_energy /= 2;
+        energy_in_shell /= 2;
+        // now, add forcing term
+        // see Michael's thesis, page 38
+        double temp_famplitude = sqrt((this->energy - total_energy + energy_in_shell) / energy_in_shell);
+        this->kk->CLOOP_K2(
+                    [&](ptrdiff_t cindex,
+                        ptrdiff_t xindex,
+                        ptrdiff_t yindex,
+                        ptrdiff_t zindex,
+                        double k2){
+            double knorm = sqrt(k2);
             if ((this->fk0 <= knorm) &&
-                    (this->fk1 >= knorm))
+                (this->fk1 >= knorm))
                 for (int c=0; c<3; c++)
                     for (int i=0; i<2; i++)
-                        dst->cval(cindex,c,i) += this->famplitude*vort_field->cval(cindex,c,i)*factor;
-                        //dst->get_cdata()[cindex*3+c][i] += this->famplitude*vort_field->get_cdata()[cindex*3+c][i]*factor;
+                        onew->cval(cindex,c,i) *= temp_famplitude;
         }
         );
         return;
@@ -306,16 +477,12 @@ void vorticity_equation<rnumber, be>::omega_nonlin(
                     ptrdiff_t xindex,
                     ptrdiff_t yindex,
                     ptrdiff_t zindex){
-        //ptrdiff_t tindex = 3*rindex;
         rnumber tmp[3];
         for (int cc=0; cc<3; cc++)
             tmp[cc] = (this->u->rval(rindex,(cc+1)%3)*this->rvorticity->rval(rindex,(cc+2)%3) -
                        this->u->rval(rindex,(cc+2)%3)*this->rvorticity->rval(rindex,(cc+1)%3));
-            //tmp[cc][0] = (this->u->get_rdata()[tindex+(cc+1)%3]*this->rvorticity->get_rdata()[tindex+(cc+2)%3] -
-            //              this->u->get_rdata()[tindex+(cc+2)%3]*this->rvorticity->get_rdata()[tindex+(cc+1)%3]);
         for (int cc=0; cc<3; cc++)
             this->u->rval(rindex,cc) = tmp[cc] / this->u->npoints;
-            //this->u->get_rdata()[(3*rindex)+cc] = tmp[cc][0] / this->u->npoints;
     }
     );
     /* go back to Fourier space */
@@ -337,22 +504,13 @@ void vorticity_equation<rnumber, be>::omega_nonlin(
             tmp[1][1] =  (this->kk->kz[zindex]*this->u->cval(cindex,0,0) - this->kk->kx[xindex]*this->u->cval(cindex,2,0));
             tmp[2][1] =  (this->kk->kx[xindex]*this->u->cval(cindex,1,0) - this->kk->ky[yindex]*this->u->cval(cindex,0,0));
         }
-        //ptrdiff_t tindex = 3*cindex;
-        //{
-        //    tmp[0][0] = -(this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][1] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][1]);
-        //    tmp[1][0] = -(this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][1] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][1]);
-        //    tmp[2][0] = -(this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][1] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][1]);
-        //    tmp[0][1] =  (this->kk->ky[yindex]*this->u->get_cdata()[tindex+2][0] - this->kk->kz[zindex]*this->u->get_cdata()[tindex+1][0]);
-        //    tmp[1][1] =  (this->kk->kz[zindex]*this->u->get_cdata()[tindex+0][0] - this->kk->kx[xindex]*this->u->get_cdata()[tindex+2][0]);
-        //    tmp[2][1] =  (this->kk->kx[xindex]*this->u->get_cdata()[tindex+1][0] - this->kk->ky[yindex]*this->u->get_cdata()[tindex+0][0]);
-        //}
         for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++)
             this->u->cval(cindex, cc, i) = tmp[cc][i];
-            //this->u->get_cdata()[3*cindex+cc][i] = tmp[cc][i];
     }
     );
-    this->add_forcing(this->u, this->v[src], 1.0);
+    this->add_forcing(this->u, this->v[src]);
     this->kk->template force_divfree<rnumber>(this->u->get_cdata());
+    this->u->symmetrize();
 }
 
 template <class rnumber,
@@ -377,12 +535,10 @@ void vorticity_equation<rnumber, be>::step(double dt)
                 this->v[1]->cval(cindex,cc,i) = (
                         this->v[0]->cval(cindex,cc,i) +
                         dt*this->u->cval(cindex,cc,i))*factor0;
-                //this->v[1]->get_cdata()[3*cindex+cc][i] = (
-                //        this->v[0]->get_cdata()[3*cindex+cc][i] +
-                //        dt*this->u->get_cdata()[3*cindex+cc][i])*factor0;
         }
     }
     );
+    this->impose_forcing(this->v[1], this->v[0]);
 
     this->omega_nonlin(1);
     this->kk->CLOOP_K2(
@@ -401,15 +557,14 @@ void vorticity_equation<rnumber, be>::step(double dt)
                         3*this->v[0]->cval(cindex,cc,i)*factor0 +
                         ( this->v[1]->cval(cindex,cc,i) +
                          dt*this->u->cval(cindex,cc,i))*factor1)*0.25;
-                //this->v[2]->get_cdata()[3*cindex+cc][i] = (
-                //        3*this->v[0]->get_cdata()[3*cindex+cc][i]*factor0 +
-                //        (this->v[1]->get_cdata()[3*cindex+cc][i] +
-                //         dt*this->u->get_cdata()[3*cindex+cc][i])*factor1)*0.25;
         }
     }
     );
+    this->impose_forcing(this->v[2], this->v[0]);
 
     this->omega_nonlin(2);
+    // store old vorticity
+    *this->v[1] = *this->v[0];
     this->kk->CLOOP_K2(
                 [&](ptrdiff_t cindex,
                     ptrdiff_t xindex,
@@ -425,13 +580,10 @@ void vorticity_equation<rnumber, be>::step(double dt)
                         this->v[0]->cval(cindex,cc,i)*factor0 +
                         2*(this->v[2]->cval(cindex,cc,i) +
                            dt*this->u->cval(cindex,cc,i)))*factor0/3;
-                //this->v[3]->get_cdata()[3*cindex+cc][i] = (
-                //        this->v[0]->get_cdata()[3*cindex+cc][i]*factor0 +
-                //        2*(this->v[2]->get_cdata()[3*cindex+cc][i] +
-                //           dt*this->u->get_cdata()[3*cindex+cc][i]))*factor0/3;
         }
     }
     );
+    this->impose_forcing(this->v[0], this->v[1]);
 
     this->kk->template force_divfree<rnumber>(this->cvorticity->get_cdata());
     this->cvorticity->symmetrize();
@@ -456,7 +608,6 @@ void vorticity_equation<rnumber, be>::compute_pressure(field<rnumber, be, ONE> *
         //ptrdiff_t tindex = 3*rindex;
         for (int cc=0; cc<3; cc++)
             this->v[1]->rval(rindex,cc) = this->u->rval(rindex,cc)*this->u->rval(rindex,cc);
-            //this->v[1]->get_rdata()[tindex+cc] = this->u->get_rdata()[tindex+cc]*this->u->get_rdata()[tindex+cc];
         }
         );
     //this->clean_up_real_space(this->rv[1], 3);
@@ -493,7 +644,6 @@ void vorticity_equation<rnumber, be>::compute_pressure(field<rnumber, be, ONE> *
         //ptrdiff_t tindex = 3*rindex;
         for (int cc=0; cc<3; cc++)
             this->v[1]->rval(rindex,cc) = this->u->rval(rindex,cc)*this->u->rval(rindex,(cc+1)%3);
-            //this->v[1]->get_rdata()[tindex+cc] = this->u->get_rdata()[tindex+cc]*this->u->get_rdata()[tindex+(cc+1)%3];
     }
     );
     //this->clean_up_real_space(this->rv[1], 3);
@@ -529,14 +679,20 @@ void vorticity_equation<rnumber, be>::compute_pressure(field<rnumber, be, ONE> *
 template <class rnumber,
           field_backend be>
 void vorticity_equation<rnumber, be>::compute_Lagrangian_acceleration(
-        field<rnumber, be, THREE> *acceleration)
+        field<rnumber, be, THREE> *acceleration,
+        field<rnumber, be, ONE> *pressure)
 {
-    field<rnumber, be, ONE> *pressure = new field<rnumber, be, ONE>(
+    bool own_pressure = false;
+    if (pressure == NULL)
+    {
+        pressure = new field<rnumber, be, ONE>(
             this->cvelocity->rlayout->sizes[2],
             this->cvelocity->rlayout->sizes[1],
             this->cvelocity->rlayout->sizes[0],
             this->cvelocity->rlayout->comm,
             this->cvelocity->fftw_plan_rigor);
+        own_pressure = true;
+    }
     this->compute_velocity(this->cvorticity);
     this->cvelocity->ift();
     this->compute_pressure(pressure);
@@ -574,7 +730,8 @@ void vorticity_equation<rnumber, be>::compute_Lagrangian_acceleration(
             acceleration->get_cdata()[tindex+2][1] -= this->kk->kz[zindex]*pressure->get_cdata()[cindex][0];
         }
         });
-    delete pressure;
+    if (own_pressure)
+        delete pressure;
 }
 
 template <class rnumber,
@@ -626,7 +783,6 @@ void vorticity_equation<rnumber, be>::compute_Eulerian_acceleration(
         for (int cc=0; cc<3; cc++)
             this->v[1]->rval(rindex,cc) = \
                 this->cvelocity->rval(rindex,cc)*this->cvelocity->rval(rindex,cc) / this->cvelocity->npoints;
-            //this->v[1]->get_rdata()[tindex+cc] = this->cvelocity->get_rdata()[tindex+cc]*this->cvelocity->get_rdata()[tindex+cc] / this->cvelocity->npoints;
     }
     );
     this->v[1]->dft();
@@ -666,7 +822,6 @@ void vorticity_equation<rnumber, be>::compute_Eulerian_acceleration(
         for (int cc=0; cc<3; cc++)
             this->v[1]->rval(rindex,cc) = \
                 this->cvelocity->rval(rindex,cc)*this->cvelocity->rval(rindex,(cc+1)%3) / this->cvelocity->npoints;
-            //this->v[1]->get_rdata()[tindex+cc] = this->cvelocity->get_rdata()[tindex+cc]*this->cvelocity->get_rdata()[tindex+(cc+1)%3] / this->cvelocity->npoints;
     }
     );
     this->v[1]->dft();
diff --git a/bfps/cpp/vorticity_equation.hpp b/cpp/vorticity_equation.hpp
similarity index 72%
rename from bfps/cpp/vorticity_equation.hpp
rename to cpp/vorticity_equation.hpp
index e8bd1d843f730d39439bc99703956dc623ca4e42..cbff223e2d8119b37ef30b4e6b739aa64eff7039 100644
--- a/bfps/cpp/vorticity_equation.hpp
+++ b/cpp/vorticity_equation.hpp
@@ -28,7 +28,6 @@
 #include <iostream>
 
 #include "field.hpp"
-#include "field_descriptor.hpp"
 
 #ifndef VORTICITY_EQUATION
 
@@ -67,9 +66,12 @@ class vorticity_equation
 
         /* physical parameters */
         double nu;
-        int fmode;         // for Kolmogorov flow
-        double famplitude; // both for Kflow and band forcing
-        double fk0, fk1;   // for band forcing
+        int fmode;                   // for Kolmogorov flow
+        double famplitude;           // both for Kflow and band forcing
+        double fk0, fk1;             // for band forcing
+        double injection_rate;       // for fixed energy injection rate
+        double energy;               // for fixed energy
+        double friction_coefficient; // for Kolmogorov_and_drag
         char forcing_type[128];
 
         /* constructor, destructor */
@@ -88,9 +90,36 @@ class vorticity_equation
         void omega_nonlin(int src);
         void step(double dt);
         void impose_zero_modes(void);
+
+        /** \brief Method that computes force and adds it to the right hand side of the NS equations.
+         *
+         *   If the force has an explicit expression, as for instance in the case of Kolmogorov forcing,
+         *   the term should be added to the nonlinear term for the purposes of time-stepping, since
+         *   otherwise a custom time-stepping scheme would need to be implemented for each forcing type.
+         *
+         */
         void add_forcing(field<rnumber, be, THREE> *dst,
-                         field<rnumber, be, THREE> *src_vorticity,
-                         rnumber factor);
+                         field<rnumber, be, THREE> *src_vorticity);
+
+        void add_Kolmogorov_forcing(field<rnumber, be, THREE> *dst,
+                                    int fmode,
+                                    double famplitude);
+        void add_field_band(
+                field<rnumber, be, THREE> *dst,
+                field<rnumber, be, THREE> *src,
+                double k0, double k1,
+                double prefactor);
+
+        /** \brief Method that imposes action of forcing on new vorticity field.
+         *
+         *   If the force is implicit, in the sense that kinetic energy must be
+         *   preserved or something similar, then the action must be imposed
+         *   after the non-linear term has been added.
+         *
+         */
+        void impose_forcing(
+                field<rnumber, be, THREE> *omega_new,
+                field<rnumber, be, THREE> *omega_old);
         void compute_vorticity(void);
         void compute_velocity(field<rnumber, be, THREE> *vorticity);
 
@@ -124,13 +153,16 @@ class vorticity_equation
                     this->kk->template low_pass<rnumber, THREE>(this->cvorticity->get_cdata(), this->kk->kM);
                     this->kk->template force_divfree<rnumber>(this->cvorticity->get_cdata());
                 #endif
+                this->cvorticity->symmetrize();
             }
         }
 
         /* statistics and general postprocessing */
         void compute_pressure(field<rnumber, be, ONE> *pressure);
         void compute_Eulerian_acceleration(field<rnumber, be, THREE> *acceleration);
-        void compute_Lagrangian_acceleration(field<rnumber, be, THREE> *acceleration);
+        void compute_Lagrangian_acceleration(
+                field<rnumber, be, THREE> *acceleration,
+                field<rnumber, be, ONE> *pressure = NULL);
 };
 
 #endif//VORTICITY_EQUATION
diff --git a/documentation/_static/overview.rst b/documentation/_static/overview.rst
index afe7a753666e6ea5911ce1266d0803aa25ea5c45..58af5653cab860961c71d057d92a21c9b99e6ddc 100644
--- a/documentation/_static/overview.rst
+++ b/documentation/_static/overview.rst
@@ -184,16 +184,17 @@ available, called ``bfps``, that you can execute.
 Just executing it will run a small test DNS on a real space grid of size
 :math:`32 \times 32 \times 32`, in the current
 folder, with the simulation name ``test``.
-So, open a console, and type ``bfps NavierStokes``:
+So, open a console, and type ``bfps DNS NSVE``:
 
 .. code:: bash
 
     # depending on how curious you are, you may have a look at the
     # options first:
     bfps --help
-    bfps NavierStokes --help
+    bfps DNS --help
+    bfps DNS NSVE --help
     # or you may just run it:
-    bfps NavierStokes
+    bfps DNS NSVE
 
 The simulation itself should not take more than a few seconds, since
 this is just a :math:`32^3` simulation run for 8 iterations.
@@ -205,9 +206,9 @@ the following:
 .. code:: python
 
     import numpy as np
-    from bfps import NavierStokes
+    from bfps import DNS
 
-    c = NavierStokes(
+    c = DNS(
             work_dir = '/location/of/simulation/data',
             simname = 'simulation_name_goes_here')
     c.compute_statistics()
@@ -223,7 +224,7 @@ the following:
             data_file['iteration'].value*c.parameters['dt'] / c.statistics['Tint'],
             data_file['iteration'].value*c.parameters['dt'] / c.statistics['tauK']))
 
-:func:`compute_statistics <bfps.NavierStokes.NavierStokes.compute_statistics>`
+:func:`compute_statistics <bfps.DNS.DNS.compute_statistics>`
 will read the data
 file generated by the DNS, compute a bunch of basic statistics, for
 example the Taylor scale Reynolds number :math:`R_\lambda` that we're
@@ -233,7 +234,7 @@ What happens is that the DNS will have generated an ``HDF5`` file
 containing a bunch of specific datasets (spectra, moments of real space
 representations, etc).
 The function
-:func:`compute_statistics <bfps.NavierStokes.NavierStokes.compute_statistics>`
+:func:`compute_statistics <bfps.DNS.DNS.compute_statistics>`
 performs simple postprocessing that may however be expensive, therefore
 it also saves some data into a ``<simname>_postprocess.h5`` file, and
 then it also performs some time averages, yielding the ``statistics``
@@ -242,6 +243,8 @@ dictionary that is used in the above code.
 Behind the scenes
 -----------------
 
+TODO FIXME obsolete documentation
+
 In brief the following takes place:
 
     1. An instance ``c`` of
diff --git a/get_version.py b/get_version.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe545a6796333774366e99f9a1416b5b1c1bc62f
--- /dev/null
+++ b/get_version.py
@@ -0,0 +1,63 @@
+################################################################################
+#                                                                              #
+#  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization      #
+#                                                                              #
+#  This file is part of bfps.                                                  #
+#                                                                              #
+#  bfps is free software: you can redistribute it and/or modify                #
+#  it under the terms of the GNU General Public License as published           #
+#  by the Free Software Foundation, either version 3 of the License,           #
+#  or (at your option) any later version.                                      #
+#                                                                              #
+#  bfps is distributed in the hope that it will be useful,                     #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+#  GNU General Public License for more details.                                #
+#                                                                              #
+#  You should have received a copy of the GNU General Public License           #
+#  along with bfps.  If not, see <http://www.gnu.org/licenses/>                #
+#                                                                              #
+# Contact: Cristian.Lalescu@ds.mpg.de                                          #
+#                                                                              #
+################################################################################
+
+
+
+import datetime
+import subprocess
+
+def main():
+    # get current time
+    now = datetime.datetime.now()
+    # obtain version
+    try:
+        git_branch = subprocess.check_output(['git',
+                                              'rev-parse',
+                                              '--abbrev-ref',
+                                              'HEAD']).strip().split()[-1].decode()
+        git_revision = subprocess.check_output(['git', 'rev-parse', 'HEAD']).strip()
+        git_date = datetime.datetime.fromtimestamp(int(subprocess.check_output(['git', 'log', '-1', '--format=%ct']).strip()))
+    except:
+        git_revision = ''
+        git_branch = ''
+        git_date = now
+    if git_branch == '':
+        # there's no git available or something
+        VERSION = '{0:0>4}{1:0>2}{2:0>2}.{3:0>2}{4:0>2}{5:0>2}'.format(
+                    git_date.year, git_date.month, git_date.day,
+                    git_date.hour, git_date.minute, git_date.second)
+    else:
+        VERSION = subprocess.check_output(['git', 'describe', '--tags']).strip().decode().split('-')[0]
+        if (('develop' in git_branch) or
+            ('feature' in git_branch) or
+            ('bugfix'  in git_branch)):
+            VERSION_py = subprocess.check_output(
+                    ['git', 'describe', '--tags', '--dirty']).strip().decode().replace('-g', '+g').replace('-dirty', '.dirty').replace('-', '.post')
+        else:
+            VERSION_py = VERSION
+    print(VERSION)
+    return VERSION_py
+
+if __name__ == '__main__':
+    main()
+
diff --git a/meta/count_nmodes.py b/meta/count_nmodes.py
new file mode 100644
index 0000000000000000000000000000000000000000..19af4ab332067ba72758bbc5244b33c8ea569dc0
--- /dev/null
+++ b/meta/count_nmodes.py
@@ -0,0 +1,34 @@
+import numpy as np
+
+def count_expensive(fk0, fk1):
+    kcomponent = np.arange(-np.floor(fk1)-1, np.floor(fk1)+2, 1).astype(np.float)
+    ksize = (kcomponent[:, None, None]**2 +
+             kcomponent[None, :, None]**2 +
+             kcomponent[None, None, :]**2)**.5
+    #print(ksize[0])
+
+    good_indices = np.where(np.logical_and(
+        ksize >= fk0,
+        ksize <= fk1))
+    #print(ksize[good_indices])
+    #print(good_indices[0].shape)
+    return np.unique(ksize[good_indices].flatten(), return_counts = True)
+
+def main():
+    for ff in [[1, 2],
+               [1.4, 2.3],
+               [1.4, 2.2]]:
+        modes, counts = count_expensive(ff[0], ff[1])
+        nmodes = np.sum(counts)
+        print(1 / ff[1], ff, nmodes)
+        modes_str  = ''
+        counts_str = ''
+        for ii in range(counts.shape[0]):
+            modes_str += '{0:>5g}\t'.format(modes[ii])
+            counts_str += '{0:>5g}\t'.format(counts[ii])
+        print(modes_str + '\n' + counts_str + '\n')
+    return None
+
+if __name__ == '__main__':
+    main()
+
diff --git a/pc_host_info.py b/pc_host_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..dec9db6410b54cd8db31c3bca21843be0edd41b1
--- /dev/null
+++ b/pc_host_info.py
@@ -0,0 +1,51 @@
+################################################################################
+#                                                                              #
+#  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization      #
+#                                                                              #
+#  This file is part of bfps.                                                  #
+#                                                                              #
+#  bfps is free software: you can redistribute it and/or modify                #
+#  it under the terms of the GNU General Public License as published           #
+#  by the Free Software Foundation, either version 3 of the License,           #
+#  or (at your option) any later version.                                      #
+#                                                                              #
+#  bfps is distributed in the hope that it will be useful,                     #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+#  GNU General Public License for more details.                                #
+#                                                                              #
+#  You should have received a copy of the GNU General Public License           #
+#  along with bfps.  If not, see <http://www.gnu.org/licenses/>                #
+#                                                                              #
+# Contact: Cristian.Lalescu@ds.mpg.de                                          #
+#                                                                              #
+################################################################################
+
+
+host_info = {'type' : 'pc'}
+
+
+# info_template = {'type'        : info_template_type,
+#                  'MPI'         : info_template_MPI,
+#                  'environment' : info_template_environment,
+#                  'deltanprocs' : info_template_deltanprocs
+#                  'mail_address': info_template_mail_address}
+
+# info_template_type can be one of:
+# 'pc'            --- jobs run interactively
+# 'cluster'       --- cluster with SGE queueing system
+# 'SLURM'         --- cluster with SLURM queueing system
+# 'IBMLoadLeveler --- cluster with IBM Load Leveler queueing system
+
+# info_template_MPI can be one of:
+# 'openmpi'   --- it means mpirun takes "x" as the parameter to set an environment variable
+# not defined --- use "env" instead of "x"
+
+# info_template_environment, relevant for clusters,
+# is the default queue to which jobs are submitted
+
+# info_template_deltanprocs, relevant for clusters,
+# is the number of cores per node
+
+# info_template_mail_address, relevant for clusters,
+# is the contact e-mail address placed in the job scripts.
diff --git a/setup.py b/setup.py
index 9bba17014aabf36c685395843b806f650604face..0b70e6d14f96d36da0eafd7e5af30e1e93c4aa49 100644
--- a/setup.py
+++ b/setup.py
@@ -1,26 +1,25 @@
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
+################################################################################
+#                                                                              #
+#  Copyright 2015-2019 Max Planck Institute for Dynamics and Self-Organization #
+#                                                                              #
+#  This file is part of bfps.                                                  #
+#                                                                              #
+#  bfps is free software: you can redistribute it and/or modify                #
+#  it under the terms of the GNU General Public License as published           #
+#  by the Free Software Foundation, either version 3 of the License,           #
+#  or (at your option) any later version.                                      #
+#                                                                              #
+#  bfps is distributed in the hope that it will be useful,                     #
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+#  GNU General Public License for more details.                                #
+#                                                                              #
+#  You should have received a copy of the GNU General Public License           #
+#  along with bfps.  If not, see <http://www.gnu.org/licenses/>                #
+#                                                                              #
+# Contact: Cristian.Lalescu@ds.mpg.de                                          #
+#                                                                              #
+################################################################################
 
 
 
@@ -34,147 +33,12 @@ import sys
 import subprocess
 import pickle
 
-
-### compiler configuration
-# check if .config/bfps/machine_settings.py file exists, create it if not
-homefolder = os.path.expanduser('~')
-bfpsfolder = os.path.join(homefolder, '.config', 'bfps')
-if not os.path.exists(os.path.join(bfpsfolder, 'machine_settings.py')):
-    if not os.path.isdir(bfpsfolder):
-        os.mkdir(bfpsfolder)
-    shutil.copyfile('./machine_settings_py.py', os.path.join(bfpsfolder, 'machine_settings.py'))
-# check if .config/bfps/host_information.py file exists, create it if not
-if not os.path.exists(os.path.join(bfpsfolder, 'host_information.py')):
-    if not os.path.isdir(bfpsfolder):
-        os.mkdir(bfpsfolder)
-    open(os.path.join(bfpsfolder, 'host_information.py'),
-         'w').write('host_info = {\'type\' : \'none\'}\n')
-    shutil.copyfile('./machine_settings_py.py', os.path.join(bfpsfolder, 'machine_settings.py'))
-sys.path.insert(0, bfpsfolder)
-# import stuff required for compilation of static library
-from machine_settings import compiler, include_dirs, library_dirs, extra_compile_args, extra_libraries
-
-
 ### package versioning
-# get current time
-now = datetime.datetime.now()
-# obtain version
-try:
-    git_branch = subprocess.check_output(['git',
-                                          'rev-parse',
-                                          '--abbrev-ref',
-                                          'HEAD']).strip().split()[-1].decode()
-    git_revision = subprocess.check_output(['git', 'rev-parse', 'HEAD']).strip()
-    git_date = datetime.datetime.fromtimestamp(int(subprocess.check_output(['git', 'log', '-1', '--format=%ct']).strip()))
-except:
-    git_revision = ''
-    git_branch = ''
-    git_date = now
-if git_branch == '':
-    # there's no git available or something
-    VERSION = '{0:0>4}{1:0>2}{2:0>2}.{3:0>2}{4:0>2}{5:0>2}'.format(
-                git_date.year, git_date.month, git_date.day,
-                git_date.hour, git_date.minute, git_date.second)
-else:
-    if (('develop' in git_branch) or
-        ('feature' in git_branch) or
-        ('bugfix'  in git_branch)):
-        VERSION = subprocess.check_output(
-                ['git', 'describe', '--tags', '--dirty']).strip().decode().replace('-g', '+g').replace('-dirty', '.dirty').replace('-', '.post')
-    else:
-        VERSION = subprocess.check_output(['git', 'describe', '--tags']).strip().decode().split('-')[0]
+import get_version
+VERSION = get_version.main()
 print('This is bfps version ' + VERSION)
 
 
-
-### lists of files and MANIFEST.in
-src_file_list = ['full_code/joint_acc_vel_stats',
-                 'full_code/test',
-                 'full_code/filter_test',
-                 'hdf5_tools',
-                 'full_code/get_rfields',
-                 'full_code/NSVE_field_stats',
-                 'full_code/native_binary_to_hdf5',
-                 'full_code/postprocess',
-                 'full_code/code_base',
-                 'full_code/direct_numerical_simulation',
-                 'full_code/NSVE',
-                 'field_binary_IO',
-                 'vorticity_equation',
-                 'field',
-                 'kspace',
-                 'field_layout',
-                 'field_descriptor',
-                 'rFFTW_distributed_particles',
-                 'distributed_particles',
-                 'particles',
-                 'particles_base',
-                 'rFFTW_interpolator',
-                 'interpolator',
-                 'interpolator_base',
-                 'fluid_solver',
-                 'fluid_solver_base',
-                 'fftw_tools',
-                 'spline_n1',
-                 'spline_n2',
-                 'spline_n3',
-                 'spline_n4',
-                 'spline_n5',
-                 'spline_n6',
-                 'spline_n7',
-                 'spline_n8',
-                 'spline_n9',
-                 'spline_n10',
-                 'Lagrange_polys',
-                 'scope_timer',
-                 'full_code/NSVEparticles']
-
-particle_headers = [
-        'cpp/particles/particles_distr_mpi.hpp',
-        'cpp/particles/abstract_particles_input.hpp',
-        'cpp/particles/abstract_particles_output.hpp',
-        'cpp/particles/abstract_particles_system.hpp',
-        'cpp/particles/alltoall_exchanger.hpp',
-        'cpp/particles/particles_adams_bashforth.hpp',
-        'cpp/particles/particles_field_computer.hpp',
-        'cpp/particles/particles_input_hdf5.hpp',
-        'cpp/particles/particles_generic_interp.hpp',
-        'cpp/particles/particles_output_hdf5.hpp',
-        'cpp/particles/particles_output_mpiio.hpp',
-        'cpp/particles/particles_system_builder.hpp',
-        'cpp/particles/particles_system.hpp',
-        'cpp/particles/particles_utils.hpp',
-        'cpp/particles/particles_output_sampling_hdf5.hpp',
-        'cpp/particles/particles_sampling.hpp',
-        'cpp/particles/env_utils.hpp']
-
-full_code_headers = ['cpp/full_code/main_code.hpp',
-                     'cpp/full_code/codes_with_no_output.hpp',
-                     'cpp/full_code/NSVE_no_output.hpp',
-                     'cpp/full_code/NSVEparticles_no_output.hpp']
-
-header_list = (['cpp/base.hpp'] +
-               ['cpp/fftw_interface.hpp'] +
-               ['cpp/bfps_timer.hpp'] +
-               ['cpp/omputils.hpp'] +
-               ['cpp/shared_array.hpp'] +
-               ['cpp/spline.hpp'] +
-               ['cpp/' + fname + '.hpp'
-                for fname in src_file_list] +
-               particle_headers +
-               full_code_headers)
-
-with open('MANIFEST.in', 'w') as manifest_in_file:
-    for fname in (['bfps/cpp/' + ff + '.cpp' for ff in src_file_list] +
-                  ['bfps/' + ff for ff in header_list]):
-        manifest_in_file.write('include {0}\n'.format(fname))
-
-
-
-### libraries
-libraries = extra_libraries
-
-
 import distutils.cmd
 
 class CompileLibCommand(distutils.cmd.Command):
@@ -182,74 +46,25 @@ class CompileLibCommand(distutils.cmd.Command):
     user_options = [
             ('timing-output=', None, 'Toggle timing output.'),
             ('fftw-estimate=', None, 'Use FFTW ESTIMATE.'),
+            ('split-fftw-many=', None, 'Turn on SPLIT_FFTW_MANY.'),
             ('disable-fftw-omp=', None, 'Turn Off FFTW OpenMP.'),
             ]
     def initialize_options(self):
         self.timing_output = 0
         self.fftw_estimate = 0
         self.disable_fftw_omp = 0
+        self.split_fftw_many = 0
         return None
     def finalize_options(self):
         self.timing_output = (int(self.timing_output) == 1)
+        self.split_fftw_many = (int(self.split_fftw_many) == 1)
         self.fftw_estimate = (int(self.fftw_estimate) == 1)
         self.disable_fftw_omp = (int(self.disable_fftw_omp) == 1)
         return None
     def run(self):
-        if not os.path.isdir('obj'):
-            os.makedirs('obj')
-            need_to_compile = True
-        if not os.path.isdir('obj/full_code'):
-            os.makedirs('obj/full_code')
-            need_to_compile = True
-        if not os.path.isfile('bfps/libbfps.a'):
-            need_to_compile = True
-        else:
-            ofile = 'bfps/libbfps.a'
-            libtime = datetime.datetime.fromtimestamp(os.path.getctime(ofile))
-            latest = libtime
-            for fname in header_list:
-                latest = max(latest,
-                             datetime.datetime.fromtimestamp(os.path.getctime('bfps/' + fname)))
-            need_to_compile = (latest > libtime)
-        eca = extra_compile_args
-        eca += ['-fPIC']
-        if self.timing_output:
-            eca += ['-DUSE_TIMINGOUTPUT']
-        if self.fftw_estimate:
-            eca += ['-DUSE_FFTWESTIMATE']
-        if self.disable_fftw_omp:
-            eca += ['-DNO_FFTWOMP']
-        for fname in src_file_list:
-            ifile = 'bfps/cpp/' + fname + '.cpp'
-            ofile = 'obj/' + fname + '.o'
-            if not os.path.exists(ofile):
-                need_to_compile_file = True
-            else:
-                need_to_compile_file = (need_to_compile or
-                                        (datetime.datetime.fromtimestamp(os.path.getctime(ofile)) <
-                                         datetime.datetime.fromtimestamp(os.path.getctime(ifile))))
-            if need_to_compile_file:
-                command_strings = [compiler, '-c']
-                command_strings += ['bfps/cpp/' + fname + '.cpp']
-                command_strings += ['-o', 'obj/' + fname + '.o']
-                command_strings += eca
-                command_strings += ['-I' + idir for idir in include_dirs]
-                command_strings.append('-Ibfps/cpp/')
-                print(' '.join(command_strings))
-                subprocess.check_call(command_strings)
-        command_strings = ['ar', 'rvs', 'bfps/libbfps.a']
-        command_strings += ['obj/' + fname + '.o' for fname in src_file_list]
-        print(' '.join(command_strings))
-        subprocess.check_call(command_strings)
-
         ### save compiling information
         pickle.dump(
-                {'include_dirs' : include_dirs,
-                 'library_dirs' : library_dirs,
-                 'compiler'     : compiler,
-                 'extra_compile_args' : eca,
-                 'libraries' : libraries,
-                 'install_date' : now,
+                {'install_date' : now,
                  'VERSION' : VERSION,
                  'git_revision' : git_revision},
                 open('bfps/install_info.pickle', 'wb'),
@@ -262,23 +77,24 @@ setup(
         name = 'bfps',
         packages = ['bfps', 'bfps/test'],
         install_requires = ['numpy>=1.8', 'h5py>=2.2.1'],
-        cmdclass={'compile_library' : CompileLibCommand},
-        package_data = {'bfps': header_list +
-                                ['libbfps.a',
-                                 'install_info.pickle'] +
-                                ['test/B32p1e4_checkpoint_0.h5']},
+        package_data = {'bfps': ['test/B32p1e4_checkpoint_0.h5']},
         entry_points = {
             'console_scripts': [
                 'bfps = bfps.__main__:main',
                 'bfps1 = bfps.__main__:main',
-                'bfps.test_NSVEparticles = bfps.test.test_bfps_NSVEparticles:main'],
+                'bfps.test_NSVEparticles = bfps.test.test_bfps_NSVEparticles:main',
+                'bfps.test_particles = bfps.test.test_particles:main',
+                'bfps.test_Parseval = bfps.test.test_Parseval:main',
+                'bfps.test_fftw = bfps.test.test_fftw:main'],
             },
         version = VERSION,
 ########################################################################
 # useless stuff folows
+# if anyone knows how to open the README when calling this script from
+# cmake, please let me know.
 ########################################################################
         description = 'Big Fluid and Particle Simulator',
-        long_description = open('README.rst', 'r').read(),
+        #long_description = open('${PROJECT_SOURCE_DIR}/README.rst', 'r').read(),
         author = AUTHOR,
         author_email = AUTHOR_EMAIL,
         license = 'GPL version 3.0')
diff --git a/tests/DNS/test_scaling.py b/tests/DNS/test_scaling.py
index 1d4b12a5e3eb4aa322d68ba276437d1a641f7eae..3ae1d299ae9ab78dffb4d252d142e13f47adade6 100644
--- a/tests/DNS/test_scaling.py
+++ b/tests/DNS/test_scaling.py
@@ -12,7 +12,12 @@ def get_DNS_parameters(
         nprocesses = 1,
         output_on = False,
         cores_per_node = 16,
-        nparticles = int(1e5)):
+        nparticles = int(1e5),
+        environment = 'express',
+        minutes = '29',
+        no_submit = True,
+        src_dirname = '/draco/ptmp/clalescu/scaling'):
+    assert (N in [1024, 2048, 4096])
     simname = (DNS_type + '{0:0>4d}'.format(N))
     if output_on:
         simname = DNS_type + simname
@@ -25,15 +30,13 @@ def get_DNS_parameters(
     work_dir = 'nn{0:0>4d}np{1}'.format(nnodes, nprocesses)
     if not output_on:
         class_name += '_no_output'
-    src_simname = 'N{0:0>4d}_kMeta2'.format(N)
-    src_iteration = -1
-    if N == 512:
-        src_iteration = 3072
+    src_simname = 'fb3_N{0:0>4d}_kMeta1.5'.format(N)
     if N == 1024:
-        src_iteration = 0x4000
+        src_iteration = 32*1024
     if N == 2048:
-        src_iteration = 0x6000
+        src_iteration = 20*1024
     if N == 4096:
+        src_simname = 'fb3_N2048x2_kMeta1.5'
         src_iteration = 0
     DNS_parameters = [
             class_name,
@@ -45,9 +48,8 @@ def get_DNS_parameters(
             '--niter_todo', '12',
             '--niter_out', '12',
             '--niter_stat', '3']
-    if src_iteration >= 0:
-        DNS_parameters += [
-            '--src-wd', 'database',
+    DNS_parameters += [
+            '--src-wd', src_dirname,
             '--src-simname', src_simname,
             '--src-iteration', '{0}'.format(src_iteration)]
     if DNS_type != 'A':
@@ -63,6 +65,10 @@ def get_DNS_parameters(
                 '--tracers0_neighbours', '{0}'.format(nneighbours),
                 '--tracers0_smoothness', '{0}'.format(smoothness),
                 '--particle-rand-seed', '2']
+    if no_submit:
+        DNS_parameters += ['--no-submit']
+    DNS_parameters += ['--environment', environment,
+                       '--minutes', '{0}'.format(minutes)]
     return simname, work_dir, DNS_parameters
 
 def main():
@@ -86,27 +92,50 @@ def main():
     parser.add_argument(
             '--nnodes',
             type = int,
+            help = 'how many nodes to use',
             dest = 'nnodes',
             default = 1)
     parser.add_argument(
             '--nprocesses',
             type = int,
+            help = 'how many MPI processes to use',
             dest = 'nprocesses',
             default = 1)
     parser.add_argument(
             '--ncores',
             type = int,
+            help = 'how many cores there are per node',
             dest = 'ncores',
-            default = 4)
+            default = 40)
     parser.add_argument(
             '--output-on',
             action = 'store_true',
             dest = 'output_on')
+    parser.add_argument(
+            '--submit',
+            action = 'store_true',
+            dest = 'submit')
     parser.add_argument(
             '--nparticles',
             type = int,
             dest = 'nparticles',
             default = int(1e5))
+    parser.add_argument(
+            '--environment',
+            type = str,
+            dest = 'environment',
+            default = 'express')
+    parser.add_argument(
+            '--minutes',
+            type = int,
+            dest = 'minutes',
+            default = 29,
+            help = 'If environment supports it, this is the requested wall-clock-limit.')
+    parser.add_argument(
+            '--src-wd',
+            type = str,
+            dest = 'src_dirname',
+            default = '/draco/ptmp/clalescu/scaling')
     opt = parser.parse_args(sys.argv[1:])
     simname, work_dir, params = get_DNS_parameters(
             DNS_type = opt.DNS_setup,
@@ -115,7 +144,11 @@ def main():
             nprocesses = opt.nprocesses,
             output_on = opt.output_on,
             nparticles = opt.nparticles,
-            cores_per_node = opt.ncores)
+            cores_per_node = opt.ncores,
+            no_submit = not opt.submit,
+            minutes = opt.minutes,
+            environment = opt.environment,
+            src_dirname = opt.src_dirname)
     print(work_dir + '/' + simname)
     print(' '.join(params))
     # these following 2 lines actually launch something
diff --git a/tests/base.py b/tests/base.py
index 1c06974e836d2a348bf1e4b260f2b018ec3ab7af..542679733757b5213193f3b7f6ad02cda7e0617b 100644
--- a/tests/base.py
+++ b/tests/base.py
@@ -33,7 +33,6 @@ import numpy as np
 import matplotlib.pyplot as plt
 
 import bfps
-from bfps import FluidResize
 from bfps.tools import particle_finite_diff_test as acceleration_test
 
 import argparse
@@ -50,6 +49,9 @@ def get_parser(base_class = bfps.NavierStokes,
     parser.add_argument('-n',
             type = int, dest = 'n',
             default = n)
+    parser.add_argument('--np',
+            type = int, dest = 'np',
+            default = ncpu)
     parser.add_argument('--ncpu',
             type = int, dest = 'ncpu',
             default = ncpu)
@@ -89,33 +91,13 @@ parser.add_argument(
         dest = 'kMeta',
         default = 2.0)
 
-def double(opt):
-    old_simname = 'N{0:0>3x}'.format(opt.n)
-    new_simname = 'N{0:0>3x}'.format(opt.n*2)
-    c = FluidResize(fluid_precision = opt.precision)
-    c.launch(
-            args = ['--simname', old_simname + '_double',
-                    '--wd', opt.work_dir,
-                    '--nx', '{0}'.format(opt.n),
-                    '--ny', '{0}'.format(opt.n),
-                    '--nz', '{0}'.format(opt.n),
-                    '--dst_nx', '{0}'.format(2*opt.n),
-                    '--dst_ny', '{0}'.format(2*opt.n),
-                    '--dst_nz', '{0}'.format(2*opt.n),
-                    '--dst_simname', new_simname,
-                    '--src_simname', old_simname,
-                    '--src_iteration', '0',
-                    '--src_wd', './',
-                    '--niter_todo', '0'])
-    return None
-
 def launch(
         opt,
         nu = None,
         dt = None,
         tracer_state_file = None,
         vorticity_field = None,
-        code_class = bfps.NavierStokes,
+        code_class = bfps.DNS,
         particle_class = 'particles',
         interpolator_class = 'rFFTW_interpolator'):
     c = code_class(
diff --git a/tests/ci-scripts/test.sh b/tests/ci-scripts/test.sh
index ddde2489e431412c260752f800640812ead91167..bb6eaa859fa40d8ffa975e693dc6351ebbbd63d5 100644
--- a/tests/ci-scripts/test.sh
+++ b/tests/ci-scripts/test.sh
@@ -5,41 +5,47 @@ set -x
 # stops when fails
 set -e
 
-# Init
-export destdir=$(pwd)"/ci-installdir"
-export pythonbin=/home/ubuntu/anaconda3/bin/python3
-export bfpspythonpath=$destdir/lib/python3.6/site-packages/
-export PYTHONPATH=:$bfpspythonpath$PYTHONPATH
-export PATH=$destdir/bin/:/home/ubuntu/hdf5/install/bin/:$PATH
-export LD_LIBRARY_PATH=/home/ubuntu/hdf5/install/lib/:/home/ubuntu/fftw/install/lib/
-
-echo "destdir = $destdir"
-echo "pythonbin = $pythonbin"
-echo "bfpspythonpath = $bfpspythonpath"
-
-# Remove possible previous installation
-if [[ -d $destdir ]] ; then
-    rm -rf $destdir ;
-fi
-
-# Create install path
-if [[ ! -d $bfpspythonpath ]] ; then
-    mkdir -p $bfpspythonpath ;
-fi
-
-# Build
-$pythonbin setup.py compile_library --timing-output 1
-# Install
-$pythonbin setup.py install --prefix=$destdir
-
-# Test
-ls $destdir
-ls $destdir/bin/
-
-$pythonbin $destdir/bin/bfps.test_NSVEparticles
-
-# Clean
-if [[ -d $destdir ]] ; then
-    rm -rf $destdir ;
-fi
+echo "please check VM before turning tests back on"
+
+## Init
+#export destdir=$(pwd)"/ci-installdir"
+#export pythonbin=/home/ubuntu/anaconda3/bin/python3
+#export bfpspythonpath=$destdir/lib/python3.6/site-packages/
+#export PYTHONPATH=:$bfpspythonpath$PYTHONPATH
+#export PATH=$destdir/bin/:/home/ubuntu/hdf5/install/bin/:$PATH
+#export LD_LIBRARY_PATH=/home/ubuntu/hdf5/install/lib/:/home/ubuntu/fftw/install/lib/
+#
+#echo "destdir = $destdir"
+#echo "pythonbin = $pythonbin"
+#echo "bfpspythonpath = $bfpspythonpath"
+#
+## Remove possible previous installation
+#if [[ -d $destdir ]] ; then
+#    rm -rf $destdir ;
+#fi
+#
+## Create install path
+#if [[ ! -d $bfpspythonpath ]] ; then
+#    mkdir -p $bfpspythonpath ;
+#fi
+#
+## Build
+#$pythonbin setup.py compile_library --timing-output 1
+## Install
+#$pythonbin setup.py install --prefix=$destdir
+#
+## Test
+#ls $destdir
+#ls $destdir/bin/
+#
+#$pythonbin $destdir/bin/bfps.test_fftw
+#
+#$pythonbin $destdir/bin/bfps.test_Parseval
+#
+#$pythonbin $destdir/bin/bfps.test_NSVEparticles
+#
+## Clean
+#if [[ -d $destdir ]] ; then
+#    rm -rf $destdir ;
+#fi
 
diff --git a/tests/misc/makefile b/tests/misc/makefile
new file mode 100644
index 0000000000000000000000000000000000000000..d44b9f04a10bdcb46fcf88ec9c858832c3c6e4df
--- /dev/null
+++ b/tests/misc/makefile
@@ -0,0 +1,15 @@
+test_fftw: test_fftw.c
+	mpicc \
+		-DFFTW_PLAN_RIGOR=FFTW_ESTIMATE \
+		-I/stuff/ext_installs/include \
+		-fopenmp \
+		test_fftw.c \
+		-o test_fftw \
+		-L/stuff/ext_installs/lib \
+		-lfftw3_mpi \
+		-lfftw3 \
+		-lfftw3f_mpi \
+		-lfftw3f \
+		-lfftw3_threads \
+		-lfftw3f_threads \
+		-lm
diff --git a/tests/misc/pow_overflow.cpp b/tests/misc/pow_overflow.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..48cc8aaeff5b7cacb9f9175970eeeaf2112d299f
--- /dev/null
+++ b/tests/misc/pow_overflow.cpp
@@ -0,0 +1,30 @@
+#include <cfenv>
+#include <cmath>
+#include <iostream>
+#include <limits>
+
+int main()
+{
+    feenableexcept(FE_ALL_EXCEPT);
+    double p0 = 3.54;
+    double p4 = 122;
+    double p5 = 0.836;
+    double ell = 1.0;
+    double result = 0.;
+    double argument = 0.;
+
+    for (int k = 0; k<128; k++)
+    {
+        argument = p0*k*ell;
+    //    double exponent = p4*pow(ell, p5);
+    //    //if (exponent*log(argument) <2*std::numeric_limits<double>::min())
+    //    //    result = 0.;
+    //    //else
+    //    //{
+    //    //    double result0 = pow(p0*argument, p4*pow(ell, p5));
+    //    //    result = exp(-0.5*result0);
+    //    //}
+    }
+    std::cout << argument << std::endl;
+    return 0;
+}
diff --git a/tests/misc/run.sh b/tests/misc/run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ada649cac9355848e903c19778785aaf28a935fd
--- /dev/null
+++ b/tests/misc/run.sh
@@ -0,0 +1,2 @@
+make
+mpirun -np 2 -x OMP_NUM_THREADS=1 test_fftw
diff --git a/tests/misc/test_fftw.c b/tests/misc/test_fftw.c
new file mode 100644
index 0000000000000000000000000000000000000000..af9fef7b6564bdc4b5b3db0908cda43ec3dd9945
--- /dev/null
+++ b/tests/misc/test_fftw.c
@@ -0,0 +1,341 @@
+#include <fftw3-mpi.h>
+#include <omp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <math.h>
+
+#ifndef FFTW_PLAN_RIGOR
+
+#define FFTW_PLAN_RIGOR FFTW_ESTIMATE
+
+#endif
+
+//#define NO_FFTWOMP
+
+#define NX 36
+#define NY 36
+#define NZ 12
+
+const int nx = NX;
+const int ny = NY;
+const int nz = NZ;
+const int npoints = NX*NY*NZ;
+
+const double dkx = 1.0;
+const double dky = 1.0;
+const double dkz = 1.0;
+
+int myrank, nprocs;
+
+int main(
+        int argc,
+        char *argv[])
+{
+    ////////////////////////////////////
+    /* initialize MPI environment */
+#ifdef NO_FFTWOMP
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+    fftw_mpi_init();
+    fftwf_mpi_init();
+    printf("There are %d processes\n", nprocs);
+#else
+    int mpiprovided;
+    MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &mpiprovided);
+    assert(mpiprovided >= MPI_THREAD_FUNNELED);
+    MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+    const int nThreads = omp_get_max_threads();
+    printf("Number of threads for the FFTW = %d\n",
+              nThreads);
+    if (nThreads > 1){
+        fftw_init_threads();
+        fftwf_init_threads();
+    }
+    fftw_mpi_init();
+    fftwf_mpi_init();
+    printf("There are %d processes and %d threads\n",
+              nprocs,
+              nThreads);
+    if (nThreads > 1){
+        fftw_plan_with_nthreads(nThreads);
+        fftwf_plan_with_nthreads(nThreads);
+    }
+#endif
+
+    ////////////////////////////////////
+    /* do useful work */
+
+    // declarations
+    ptrdiff_t nfftw[3];
+    ptrdiff_t tmp_local_size;
+    ptrdiff_t local_n0, local_0_start;
+    ptrdiff_t local_n1, local_1_start;
+    ptrdiff_t local_size;
+    ptrdiff_t ix, iy, iz;
+    ptrdiff_t jx, jy, jz;
+    ptrdiff_t rindex, cindex;
+    int cc;
+    float *data0, *data;
+    fftwf_complex *cdata;
+    double L2norm0, L2norm1, L2norm2, L2normk;
+    double local_L2norm0, local_L2norm1;
+    fftwf_plan c2r_plan, r2c_plan;
+    double *kx, *ky, *kz;
+
+    // get sizes
+    nfftw[0] = nz;
+    nfftw[1] = ny;
+    nfftw[2] = nx;
+    tmp_local_size =  fftwf_mpi_local_size_many_transposed(
+            3, nfftw, 3,
+            FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, MPI_COMM_WORLD,
+            &local_n0, &local_0_start,
+            &local_n1, &local_1_start);
+
+    local_size = local_n1 * nz * nx * 3 * 2;
+
+    // allocate field
+    data = fftwf_alloc_real(
+            local_size);
+    data0 = fftwf_alloc_real(
+            local_size);
+    cdata = (fftwf_complex*)(data);
+
+    c2r_plan = fftwf_mpi_plan_many_dft_c2r(
+            3, nfftw, 3,
+            FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
+            cdata,
+            data,
+            MPI_COMM_WORLD,
+            FFTW_PLAN_RIGOR | FFTW_MPI_TRANSPOSED_IN);
+
+    r2c_plan = fftwf_mpi_plan_many_dft_r2c(
+            3, nfftw, 3,
+            FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
+            data,
+            cdata,
+            MPI_COMM_WORLD,
+            FFTW_PLAN_RIGOR | FFTW_MPI_TRANSPOSED_OUT);
+
+    kx = (double*)malloc(sizeof(double)*(nx/2+1));
+    ky = (double*)malloc(sizeof(double)*local_n1);
+    kz = (double*)malloc(sizeof(double)*nz);
+
+    // generate wavenumbers
+    for (jy = 0; jy < local_n1; jy++)
+    {
+        if (jy + local_1_start <= ny/2)
+            ky[jy] = dky*(jy + local_1_start);
+        else
+            ky[jy] = dky*((jy + local_1_start) - ny); }
+    for (jz = 0; jz < nz; jz++)
+    {
+        if (jz <= nz/2)
+            kz[jz] = dkz*jz;
+        else
+            kz[jz] = dkz*(jz - nz);
+    }
+    for (jx = 0; jx < nx/2+1; jx++)
+    {
+        kx[jx] = dkx*jx;
+    }
+
+    // fill field with random numbers
+    // I'm generating cindex the stupid way, but we can also use
+    // cindex = (jy*nz + jz)*(nx/2+1) + jx
+    cindex = 0;
+    for (jy = 0; jy < local_n1; jy++)
+        for (jz = 0; jz < nz; jz++)
+        {
+            for (jx = 0; jx < nx/2+1; jx++)
+            {
+                double k2 = (kx[jx]*kx[jx] +
+                             ky[jy]*ky[jy] +
+                             kz[jz]*kz[jz]);
+                if (jx == 0 && (jy + local_1_start) == 0 && jz == 0)
+                    k2 = dkx*dkx + dky*dky + dkz*dkz;
+                for (cc = 0; cc<3; cc++)
+                {
+                    cdata[cindex*3+cc][0] = (drand48()-0.5) / sqrt(k2);
+                    cdata[cindex*3+cc][1] = (drand48()-0.5) / sqrt(k2);
+                }
+                cindex++;
+            }
+        }
+
+    // go back and forth so that the
+    // Fourier space representation is properly symmetrized
+    fftwf_execute(c2r_plan);
+    fftwf_execute(r2c_plan);
+    // normalize, compute Fourier space L2 norm
+    cindex = 0;
+    local_L2norm0 = 0;
+    for (jy = 0; jy < local_n1; jy++)
+        for (jz = 0; jz < nz; jz++)
+        {
+            for (cc = 0; cc<3; cc++)
+            {
+                cdata[cindex*3+cc][0] /= npoints;
+                cdata[cindex*3+cc][1] /= npoints;
+                local_L2norm0 += (cdata[cindex*3+cc][0]*cdata[cindex*3+cc][0] +
+                                  cdata[cindex*3+cc][1]*cdata[cindex*3+cc][1]);
+            }
+            cindex++;
+            for (jx = 1; jx < nx/2+1; jx++)
+            {
+                for (cc = 0; cc<3; cc++)
+                {
+                    cdata[cindex*3+cc][0] /= npoints;
+                    cdata[cindex*3+cc][1] /= npoints;
+                    local_L2norm0 += 2*(cdata[cindex*3+cc][0]*cdata[cindex*3+cc][0] +
+                                        cdata[cindex*3+cc][1]*cdata[cindex*3+cc][1]);
+                }
+                cindex++;
+            }
+        }
+    MPI_Allreduce(
+            &local_L2norm0,
+            &L2normk,
+            1,
+            MPI_DOUBLE,
+            MPI_SUM,
+            MPI_COMM_WORLD);
+    L2normk = sqrt(L2normk);
+
+    // go to real space
+    fftwf_execute(c2r_plan);
+
+    // rindex = (iz*ny + iy)*(nx+2) + ix
+    rindex = 0;
+    local_L2norm0 = 0;
+    for (iz = 0; iz < local_n0; iz++)
+        for (iy = 0; iy < ny; iy++)
+        {
+            for (ix = 0; ix < nx; ix++)
+            {
+                for (cc = 0; cc<3; cc++)
+                {
+                    local_L2norm0 += data[rindex*3+cc]*data[rindex*3+cc];
+                }
+                rindex++;
+            }
+            for (ix = nx; ix < nx+2; ix++)
+            {
+                rindex++;
+            }
+        }
+    MPI_Allreduce(
+            &local_L2norm0,
+            &L2norm1,
+            1,
+            MPI_DOUBLE,
+            MPI_SUM,
+            MPI_COMM_WORLD);
+    L2norm1 = sqrt(L2norm1 / npoints);
+
+    //fftwf_execute(r2c_plan);
+
+    //cindex = 0;
+    //local_L2norm0 = 0;
+    //for (jy = 0; jy < local_n1; jy++)
+    //    for (jz = 0; jz < nz; jz++)
+    //    {
+    //        for (cc = 0; cc<3; cc++)
+    //        {
+    //            local_L2norm0 += (cdata[cindex*3+cc][0]*cdata[cindex*3+cc][0] +
+    //                              cdata[cindex*3+cc][1]*cdata[cindex*3+cc][1]);
+    //        }
+    //        cindex++;
+    //        // I am not adding the energy from mode nx/2 as a matter of principle.
+    //        for (jx = 1; jx < nx/2+1; jx++)
+    //        {
+    //            for (cc = 0; cc<3; cc++)
+    //            {
+    //                local_L2norm0 += 2*(cdata[cindex*3+cc][0]*cdata[cindex*3+cc][0] +
+    //                                    cdata[cindex*3+cc][1]*cdata[cindex*3+cc][1]);
+    //            }
+    //            cindex++;
+    //        }
+    //    }
+    //MPI_Allreduce(
+    //        &local_L2norm0,
+    //        &L2normk,
+    //        1,
+    //        MPI_DOUBLE,
+    //        MPI_SUM,
+    //        MPI_COMM_WORLD);
+    //L2normk = sqrt(L2normk) / (nx*ny*nz);
+    //fftwf_execute(c2r_plan);
+
+    //// normalize
+    //rindex = 0;
+    //local_L2norm0 = 0;
+    //local_L2norm1 = 0;
+    //for (iz = 0; iz < local_n0; iz++)
+    //    for (iy = 0; iy < ny; iy++)
+    //    {
+    //        for (ix = 0; ix < nx; ix++)
+    //        {
+    //            for (cc = 0; cc<3; cc++)
+    //            {
+    //                data[rindex*3+cc] /= (nx*ny*nz);
+    //                local_L2norm0 += data[rindex*3+cc]*data[rindex*3+cc];
+    //                local_L2norm1 += ((data0[rindex*3+cc] - data[rindex*3+cc])*
+    //                                  (data0[rindex*3+cc] - data[rindex*3+cc]));
+    //            }
+    //            rindex++;
+    //        }
+    //        for (ix = nx; ix < nx+2; ix++)
+    //        {
+    //            rindex++;
+    //        }
+    //    }
+    //MPI_Allreduce(
+    //        &local_L2norm0,
+    //        &L2norm1,
+    //        1,
+    //        MPI_DOUBLE,
+    //        MPI_SUM,
+    //        MPI_COMM_WORLD);
+    //MPI_Allreduce(
+    //        &local_L2norm1,
+    //        &L2norm2,
+    //        1,
+    //        MPI_DOUBLE,
+    //        MPI_SUM,
+    //        MPI_COMM_WORLD);
+    //L2norm1 = sqrt(L2norm1 / (nx*ny*nz));
+    //L2norm2 = sqrt(L2norm2 / (nx*ny*nz));
+
+    printf("FFTW_PLAN_RIGOR=%d\n", FFTW_PLAN_RIGOR);
+    printf("L2normk = %g, L2norm1 = %g, relative error = %g\n",
+            L2normk, L2norm1, fabs(L2normk - L2norm1) / (L2normk));
+
+    // deallocate
+    fftwf_destroy_plan(r2c_plan);
+    fftwf_destroy_plan(c2r_plan);
+    fftwf_free(data);
+    fftwf_free(data0);
+    free(kx);
+    free(ky);
+    free(kz);
+
+    ////////////////////////////////////
+    /* clean up */
+    fftwf_mpi_cleanup();
+    fftw_mpi_cleanup();
+
+#ifndef NO_FFTWOMP
+    if (nThreads > 1){
+        fftw_cleanup_threads();
+        fftwf_cleanup_threads();
+    }
+#endif
+
+    MPI_Finalize();
+    return EXIT_SUCCESS;
+}
+
diff --git a/tests/run_all_tests.sh b/tests/run_all_tests.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7865a8a3d9a3b7d56194b0dcda2bc24925aaeafd
--- /dev/null
+++ b/tests/run_all_tests.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+set -e
+
+bfps.test_fftw
+bfps.test_Parseval
+bfps.test_NSVEparticles
+
+# test postprocessing
+bfps PP field_single_to_double --simname dns_nsveparticles --iter0 32 --iter1 32
+bfps PP get_rfields --simname dns_nsveparticles --iter0 0 --iter1 64
+bfps PP joint_acc_vel_stats --simname dns_nsveparticles --iter0 0 --iter1 64
+bfps PP resize --simname dns_nsveparticles --new_nx 96 --new_ny 96 --new_nz 96 --new_simname dns_nsveparticles_resized
diff --git a/tests/test_io_03_run.py b/tests/test_io_03_run.py
index a789ac66fd99d8e5525ce69b1e861f609d969212..5b4905ba8973299b44a3dd7ef5f9fa07294e7a8b 100644
--- a/tests/test_io_03_run.py
+++ b/tests/test_io_03_run.py
@@ -35,5 +35,5 @@ if __name__ == '__main__':
     c.write_src()
     c.write_par()
     c.set_host_info(bfps.host_info)
-    c.run()
+    c.run(opt.ncpu, 1)
 
diff --git a/tests/test_plain.py b/tests/test_plain.py
deleted file mode 100644
index ad30224f869fc724758cc95d8b9e10da7b4ca2d4..0000000000000000000000000000000000000000
--- a/tests/test_plain.py
+++ /dev/null
@@ -1,156 +0,0 @@
-#! /usr/bin/env python3
-#######################################################################
-#                                                                     #
-#  Copyright 2015 Max Planck Institute                                #
-#                 for Dynamics and Self-Organization                  #
-#                                                                     #
-#  This file is part of bfps.                                         #
-#                                                                     #
-#  bfps is free software: you can redistribute it and/or modify       #
-#  it under the terms of the GNU General Public License as published  #
-#  by the Free Software Foundation, either version 3 of the License,  #
-#  or (at your option) any later version.                             #
-#                                                                     #
-#  bfps is distributed in the hope that it will be useful,            #
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of     #
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
-#  GNU General Public License for more details.                       #
-#                                                                     #
-#  You should have received a copy of the GNU General Public License  #
-#  along with bfps.  If not, see <http://www.gnu.org/licenses/>       #
-#                                                                     #
-# Contact: Cristian.Lalescu@ds.mpg.de                                 #
-#                                                                     #
-#######################################################################
-
-
-
-#from base import *
-import bfps
-from bfps.tools import particle_finite_diff_test as acceleration_test
-import sys
-
-import numpy as np
-import matplotlib.pyplot as plt
-
-#parser.add_argument('--multiplejob',
-#        dest = 'multiplejob', action = 'store_true')
-#
-#parser.add_argument(
-#        '--particle-class',
-#        default = 'particles',
-#        dest = 'particle_class',
-#        type = str)
-#
-#parser.add_argument(
-#        '--interpolator-class',
-#        default = 'interpolator',
-#        dest = 'interpolator_class',
-#        type = str)
-
-class NSPlain(bfps.NavierStokes):
-    def specific_parser_arguments(
-            self,
-            parser):
-        bfps.NavierStokes.specific_parser_arguments(self, parser)
-        parser.add_argument(
-                '--particle-class',
-                default = 'rFFTW_distributed_particles',
-                dest = 'particle_class',
-                type = str)
-        parser.add_argument(
-                '--interpolator-class',
-                default = 'rFFTW_interpolator',
-                dest = 'interpolator_class',
-                type = str)
-        parser.add_argument('--neighbours',
-                type = int,
-                dest = 'neighbours',
-                default = 3)
-        parser.add_argument('--smoothness',
-                type = int,
-                dest = 'smoothness',
-                default = 2)
-        return None
-    def launch(
-            self,
-            args = [],
-            **kwargs):
-        opt = self.prepare_launch(args = args)
-        self.fill_up_fluid_code()
-        if type(opt.nparticles) == int:
-            if opt.nparticles > 0:
-                self.add_3D_rFFTW_field(
-                        name = 'rFFTW_acc')
-                self.add_interpolator(
-                        name = 'spline',
-                        neighbours = opt.neighbours,
-                        smoothness = opt.smoothness,
-                        class_name =  opt.interpolator_class)
-                self.add_particles(
-                        kcut = ['fs->kM/2', 'fs->kM/3'],
-                        integration_steps = 3,
-                        interpolator = 'spline',
-                        class_name = opt.particle_class)
-                self.add_particles(
-                        integration_steps = [2, 3, 4, 6],
-                        interpolator = 'spline',
-                        acc_name = 'rFFTW_acc',
-                        class_name = opt.particle_class)
-        self.finalize_code()
-        self.launch_jobs(opt = opt)
-        return None
-
-def plain(args):
-    wd = opt.work_dir
-    opt.work_dir = wd + '/N{0:0>3x}_1'.format(opt.n)
-    c0 = launch(opt, dt = 0.2/opt.n,
-            particle_class = opt.particle_class,
-            interpolator_class = opt.interpolator_class)
-    c0.compute_statistics()
-    print ('Re = {0:.0f}'.format(c0.statistics['Re']))
-    print ('Rlambda = {0:.0f}'.format(c0.statistics['Rlambda']))
-    print ('Lint = {0:.4e}, etaK = {1:.4e}'.format(c0.statistics['Lint'], c0.statistics['etaK']))
-    print ('Tint = {0:.4e}, tauK = {1:.4e}'.format(c0.statistics['Tint'], c0.statistics['tauK']))
-    print ('kMetaK = {0:.4e}'.format(c0.statistics['kMeta']))
-    for s in range(c0.particle_species):
-        acceleration_test(c0, species = s, m = 1)
-    if not opt.multiplejob:
-        return None
-    assert(opt.niter_todo % 3 == 0)
-    opt.work_dir = wd + '/N{0:0>3x}_2'.format(opt.n)
-    opt.njobs *= 2
-    opt.niter_todo = opt.niter_todo//2
-    c1 = launch(opt, dt = c0.parameters['dt'],
-            particle_class = opt.particle_class,
-            interpolator_class = opt.interpolator_class)
-    c1.compute_statistics()
-    opt.work_dir = wd + '/N{0:0>3x}_3'.format(opt.n)
-    opt.njobs = 3*opt.njobs//2
-    opt.niter_todo = 2*opt.niter_todo//3
-    c2 = launch(opt, dt = c0.parameters['dt'],
-            particle_class = opt.particle_class,
-            interpolator_class = opt.interpolator_class)
-    c2.compute_statistics()
-    compare_stats(opt, c0, c1)
-    compare_stats(opt, c0, c2)
-    return None
-
-if __name__ == '__main__':
-    c0 = NSPlain()
-    c0.launch(
-            ['-n', '32',
-             '--ncpu', '4',
-             '--nparticles', '1000',
-             '--niter_todo', '48',
-             '--wd', 'data/single'] +
-            sys.argv[1:])
-    c0.compute_statistics()
-    print ('Re = {0:.0f}'.format(c0.statistics['Re']))
-    print ('Rlambda = {0:.0f}'.format(c0.statistics['Rlambda']))
-    print ('Lint = {0:.4e}, etaK = {1:.4e}'.format(c0.statistics['Lint'], c0.statistics['etaK']))
-    print ('Tint = {0:.4e}, tauK = {1:.4e}'.format(c0.statistics['Tint'], c0.statistics['tauK']))
-    print ('kMetaK = {0:.4e}'.format(c0.statistics['kMeta']))
-    for s in range(c0.particle_species):
-        acceleration_test(c0, species = s, m = 1)
-
diff --git a/tests/test_vorticity_equation.py b/tests/test_vorticity_equation.py
index dfaccb8bf352bdd252e5edf29f6e7d711689f7dc..e492bfa5c75d0f2f2b9989cccef49964b8bc90b4 100644
--- a/tests/test_vorticity_equation.py
+++ b/tests/test_vorticity_equation.py
@@ -273,12 +273,13 @@ def main():
         particle_initial_condition[..., 2] = yvals[None, :, None]
         particle_initial_condition = particle_initial_condition.reshape(-1, 3)
         nparticles = nparticles**2
-    c = bfps.NavierStokes(simname = 'fluid_solver')
+    c = bfps.DNS(simname = 'fluid_solver')
     if run_NS:
         run_NSVE = True
         subprocess.call('rm *fluid_solver* NavierStokes*', shell = True)
         c.launch(
-                ['-n', '32',
+                ['NSVE',
+                 '-n', '32',
                  '--simname', 'fluid_solver',
                  '--ncpu', '4',
                  '--niter_todo', '{0}'.format(niterations),
@@ -298,9 +299,10 @@ def main():
         f = h5py.File('vorticity_equation_checkpoint_0.h5', 'w')
         f['vorticity/complex/0'] = data
         f.close()
-        c = bfps.NSVorticityEquation()
+        c = bfps.DNS()
         c.launch(
-                ['-n', '32',
+                ['NSVEparticles',
+                 '-n', '32',
                  '--simname', 'vorticity_equation',
                  '--np', '4',
                  '--ntpp', '1',