diff --git a/README.rst b/README.rst
index ddb9f2447db919248100368a9a08b13297d5e3a4..5418d711b7846e2ebd2ca68270a95baf1f9e35b3 100644
--- a/README.rst
+++ b/README.rst
@@ -60,7 +60,8 @@ Use a console; navigate to the ``bfps`` folder, and type:
 If you want to run simulations on the machine where you're installing,
 you will need to call `compile_library` before installing.
 Your machine needs to have an MPI compiler installed, the HDF5 C library
-and FFTW >= 3.4.
+and FFTW >= 3.4 --- a detailed prerequisite installation list is
+included at the end of this document.
 The file `machine_settings_py.py` should be modified
 appropriately for your machine (otherwise the `compile_library` command will most
 likely fail).
@@ -102,7 +103,63 @@ Comments
 * particles: initialization of multistep solvers is done with lower
   order methods, so direct convergence tests will fail.
 
+* Code is only known to work with HDF5 1.8.x.
+
 * Code is used mainly with Python 3.4 and 3.5.
   In principle it should be easy to maintain compatibility with Python
   2.7.x, but as of `bfps 1.8` this is no longer a main concern.
 
+-------------------------------
+Installation with prerequisites
+-------------------------------
+
+These installation steps assume that you have a working MPI compiler,
+properly configured on your system (i.e. the various configure scripts
+are able to find it).
+If this is not the case, please consult the FFTW and HDF5 compilation
+instructions for detailed instructions (`./configure --help` should be
+enough).
+
+1. Make directory PREFIX on local fast partition.
+
+2. Download, compile, install FFTW (latest version 3.x from http://www.fftw.org/).
+   Execute the following commands in order, feel free to customize
+   optimisation flags for your own computer:
+
+.. code:: bash
+    ./configure --prefix=PREFIX --enable-single --enable-sse --enable-mpi --enable-openmp --enable-threads
+    make
+    make install
+    ./configure --prefix=PREFIX --enable-sse --enable-sse2 --enable-mpi --enable-openmp --enable-threads
+    make
+    make install
+
+3. Download, compile, install HDF5 (version 1.8.x, currently available
+   at https://support.hdfgroup.org/HDF5/release/obtainsrc518.html.
+   We are using parallel I/O, therefore we use the plain C interface of HDF5:
+
+.. code:: bash
+    ./configure --prefix=PREFIX --enable-parallel
+    make
+    make install
+
+3. This step may be ommited.
+   I recommend the creation of a virtual python3 environment (also under PREFIX) that will be used for installing bfps and dependencies.
+   Please see https://docs.python-guide.org/dev/virtualenvs/.
+
+4. Clone bfps repository.
+
+.. code:: bash
+    git clone git@gitlab.mpcdf.mpg.de:clalescu/bfps.git
+
+5. Tweak host_information.py and machine_settings.py for your user and your machine and place under ~/.config/bfps.
+
+6. Activate virtual environment.
+
+7. Go into bfps repository, execute
+
+.. code:: bash
+
+    python setup.py compile_library
+    python setup.py install
+
diff --git a/bfps/cpp/full_code/NSVEcomplex_particles.cpp b/bfps/cpp/full_code/NSVEcomplex_particles.cpp
index 4ffa148af2ba33fa6c412349d323efafe6035cae..9b910e9bb7a5aaf6b36d884858a095ff9971dffa 100644
--- a/bfps/cpp/full_code/NSVEcomplex_particles.cpp
+++ b/bfps/cpp/full_code/NSVEcomplex_particles.cpp
@@ -44,6 +44,7 @@ int NSVEcomplex_particles<rnumber>::initialize(void)
     particles_inner_computer<double, long long int> current_particles_inner_computer(inner_v0);
     current_particles_inner_computer.setEnable(enable_inner);
 
+
     this->ps = particles_system_builder_with_p2p(
                 this->fs->cvelocity,                                                    // (field object)
                 this->fs->kk,                                                           // (kspace object, contains dkx, dky, dkz)
@@ -92,7 +93,11 @@ int NSVEcomplex_particles<rnumber>::step(void)
     if(this->enable_vorticity_omega){
         *this->tmp_vec_field = this->fs->cvorticity->get_cdata();
         this->tmp_vec_field->ift();
-        this->ps->completeLoopWithVorticity(this->dt, *this->tmp_vec_field);
+        std::unique_ptr<double[]> sampled_vorticity(new double[3*this->ps->getLocalNbParticles()]);
+        std::fill_n(sampled_vorticity.get(), 3*this->ps->getLocalNbParticles(), 0);
+        this->ps->sample_compute_field(*this->tmp_vec_field, sampled_vorticity.get());
+        DEBUG_MSG("sampled vorticity is %g\n", sampled_vorticity[0]);
+        this->ps->completeLoopWithVorticity(this->dt, sampled_vorticity.get());
     }
     else{
         this->ps->completeLoop(this->dt);
@@ -145,6 +150,7 @@ int NSVEcomplex_particles<rnumber>::do_stats()
         return EXIT_SUCCESS;
 
     /// allocate temporary data array
+    /// initialize pdata0 with the positions, and pdata1 with the orientations
     std::unique_ptr<double[]> pdata0 = this->ps->extractParticlesState(0, 3);
     std::unique_ptr<double[]> pdata1 = this->ps->extractParticlesState(3, 6);
     std::unique_ptr<double[]> pdata2(new double[9*this->ps->getLocalNbParticles()]);
@@ -170,6 +176,8 @@ int NSVEcomplex_particles<rnumber>::do_stats()
             this->ps->get_step_idx()-1);
 
     /// sample velocity
+    /// from now on, we need to clean up data arrays before interpolation
+    std::fill_n(pdata1.get(), 3*this->ps->getLocalNbParticles(), 0);
     this->ps->sample_compute_field(*this->tmp_vec_field, pdata1.get());
     this->particles_sample_writer_mpi->template save_dataset<3>(
             "tracers0",
@@ -188,6 +196,7 @@ int NSVEcomplex_particles<rnumber>::do_stats()
             this->fs->cvelocity,
             this->nabla_u);
     this->nabla_u->ift();
+    std::fill_n(pdata2.get(), 9*this->ps->getLocalNbParticles(), 0);
     this->ps->sample_compute_field(*this->nabla_u, pdata2.get());
     this->particles_sample_writer_mpi->template save_dataset<9>(
             "tracers0",
@@ -201,6 +210,7 @@ int NSVEcomplex_particles<rnumber>::do_stats()
     /// compute acceleration and sample it
     this->fs->compute_Lagrangian_acceleration(this->tmp_vec_field);
     this->tmp_vec_field->ift();
+    std::fill_n(pdata1.get(), 3*this->ps->getLocalNbParticles(), 0);
     this->ps->sample_compute_field(*this->tmp_vec_field, pdata1.get());
     this->particles_sample_writer_mpi->template save_dataset<3>(
             "tracers0",
@@ -231,7 +241,10 @@ int NSVEcomplex_particles<rnumber>::read_parameters(void)
     this->tracers0_smoothness = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_smoothness");
     this->enable_p2p = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_enable_p2p");
     this->enable_inner = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_enable_inner");
-    this->enable_vorticity_omega = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_enable_vorticity_omega");
+    int tval = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_enable_vorticity_omega");
+    this->enable_vorticity_omega = tval;
+    DEBUG_MSG("tracers0_enable_vorticity_omega = %d, this->enable_vorticity_omega = %d\n",
+              tval, this->enable_vorticity_omega);
     this->cutoff = hdf5_tools::read_value<double>(parameter_file, "parameters/tracers0_cutoff");
     this->inner_v0 = hdf5_tools::read_value<double>(parameter_file, "parameters/tracers0_inner_v0");
     H5Fclose(parameter_file);
diff --git a/bfps/cpp/particles/abstract_particles_system.hpp b/bfps/cpp/particles/abstract_particles_system.hpp
index 871f045953bdb35cd7c21c84e2a024a9d85869ff..ffa89892d1d6e34b7bcfe1b43b44e02c20471779 100644
--- a/bfps/cpp/particles/abstract_particles_system.hpp
+++ b/bfps/cpp/particles/abstract_particles_system.hpp
@@ -75,6 +75,7 @@ public:
                                    const field<rnumber, be, fc>& in_field) {
         static_assert(fc == THREE, "only THREE is supported for now");
         std::unique_ptr<real_number[]> extra_rhs(new real_number[getLocalNbParticles()*3]());
+        std::fill_n(extra_rhs.get(), 3*getLocalNbParticles(), 0);
         sample_compute_field(in_field, extra_rhs.get());
         completeLoopWithVorticity(dt, extra_rhs.get());
     }
diff --git a/bfps/cpp/particles/particles_inner_computer.hpp b/bfps/cpp/particles/particles_inner_computer.hpp
index 5c855cf5d4f9c9ce69200212e4929420a0c37228..f1fe322a3c30d87c6e3244c6c85f1ff3002f9a00 100644
--- a/bfps/cpp/particles/particles_inner_computer.hpp
+++ b/bfps/cpp/particles/particles_inner_computer.hpp
@@ -98,12 +98,12 @@ public:
         #pragma omp parallel for
         for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){
             // Cross product vorticity/orientation
-            rhs_part[idx_part*size_particle_rhs + 3+IDX_X] += (rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Y]*pos_part[idx_part*size_particle_positions + 3+IDX_Z] -
-                                                               rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Z]*pos_part[idx_part*size_particle_positions + 3+IDX_Y]);
-            rhs_part[idx_part*size_particle_rhs + 3+IDX_Y] += (rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Z]*pos_part[idx_part*size_particle_positions + 3+IDX_X] -
-                                                               rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_X]*pos_part[idx_part*size_particle_positions + 3+IDX_Z]);
-            rhs_part[idx_part*size_particle_rhs + 3+IDX_Z] += (rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_X]*pos_part[idx_part*size_particle_positions + 3+IDX_Y] -
-                                                               rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Y]*pos_part[idx_part*size_particle_positions + 3+IDX_X]);
+            rhs_part[idx_part*size_particle_rhs + 3+IDX_X] += 0.5*(rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Y]*pos_part[idx_part*size_particle_positions + 3+IDX_Z] -
+                                                                   rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Z]*pos_part[idx_part*size_particle_positions + 3+IDX_Y]);
+            rhs_part[idx_part*size_particle_rhs + 3+IDX_Y] += 0.5*(rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Z]*pos_part[idx_part*size_particle_positions + 3+IDX_X] -
+                                                                   rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_X]*pos_part[idx_part*size_particle_positions + 3+IDX_Z]);
+            rhs_part[idx_part*size_particle_rhs + 3+IDX_Z] += 0.5*(rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_X]*pos_part[idx_part*size_particle_positions + 3+IDX_Y] -
+                                                                   rhs_part_extra[idx_part*size_particle_rhs_extra + IDX_Y]*pos_part[idx_part*size_particle_positions + 3+IDX_X]);
         }
     }
 
diff --git a/bfps/cpp/particles/particles_system.hpp b/bfps/cpp/particles/particles_system.hpp
index db2f33fa3716a8b22c96817a8c2724434c17834a..e451f56a56b00a050d9d940085ad50ccf564ea30 100644
--- a/bfps/cpp/particles/particles_system.hpp
+++ b/bfps/cpp/particles/particles_system.hpp
@@ -278,7 +278,7 @@ public:
 
     void completeLoopWithVorticity(const real_number dt,
                                    const real_number particle_extra_rhs[]) final {
-        TIMEZONE("particles_system::completeLoop");
+        TIMEZONE("particles_system::completeLoopWithVorticity");
         compute();
         compute_p2p();
         compute_particles_inner(particle_extra_rhs);