diff --git a/bfps/cpp/distributed_particles.cpp b/bfps/cpp/distributed_particles.cpp
index b0a445af03ab51d79dd1aded6ba230fc4b80adbf..3773b5f4e0c84811f456a7cef0cac7b750847381 100644
--- a/bfps/cpp/distributed_particles.cpp
+++ b/bfps/cpp/distributed_particles.cpp
@@ -39,7 +39,7 @@
 
 extern int myrank, nprocs;
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 distributed_particles<particle_type, rnumber, interp_neighbours>::distributed_particles(
         const char *NAME,
         const hid_t data_file_id,
@@ -56,21 +56,25 @@ distributed_particles<particle_type, rnumber, interp_neighbours>::distributed_pa
     this->vel = FIELD;
     this->rhs.resize(INTEGRATION_STEPS);
     this->integration_steps = INTEGRATION_STEPS;
+    this->state.reserve(2*this->nparticles / this->nprocs);
+    for (unsigned int i=0; i<this->rhs.size(); i++)
+        this->rhs[i].reserve(2*this->nparticles / this->nprocs);
 }
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 distributed_particles<particle_type, rnumber, interp_neighbours>::~distributed_particles()
 {
 }
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 void distributed_particles<particle_type, rnumber, interp_neighbours>::sample(
         interpolator<rnumber, interp_neighbours> *field,
+        const std::unordered_map<int, single_particle_state<particle_type>> &x,
         std::unordered_map<int, single_particle_state<POINT3D>> &y)
 {
     double *yy = new double[3];
     y.clear();
-    for (auto &pp: this->state)
+    for (auto &pp: x)
     {
         (*field)(pp.second.data, yy);
         y[pp.first] = yy;
@@ -78,39 +82,41 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::sample(
     delete[] yy;
 }
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 void distributed_particles<particle_type, rnumber, interp_neighbours>::get_rhs(
         const std::unordered_map<int, single_particle_state<particle_type>> &x,
         std::unordered_map<int, single_particle_state<particle_type>> &y)
 {
-    double *yy = new double[this->ncomponents];
-    y.clear();
-    for (auto &pp: x)
+    std::unordered_map<int, single_particle_state<POINT3D>> yy;
+    switch(particle_type)
     {
-        (*this->vel)(pp.second.data, yy);
-        y[pp.first] = yy;
+        case VELOCITY_TRACER:
+            this->sample(this->vel, this->state, yy);
+            y.clear();
+            for (auto &pp: x)
+                y[pp.first] = yy[pp.first].data;
+            break;
     }
-    delete[] yy;
 }
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 void distributed_particles<particle_type, rnumber, interp_neighbours>::sample(
         interpolator<rnumber, interp_neighbours> *field,
         const char *dset_name)
 {
     std::unordered_map<int, single_particle_state<POINT3D>> y;
-    this->sample(field, y);
+    this->sample(field, this->state, y);
     this->write(dset_name, y);
 }
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 void distributed_particles<particle_type, rnumber, interp_neighbours>::roll_rhs()
 {
     for (int i=this->integration_steps-2; i>=0; i--)
         rhs[i+1] = rhs[i];
 }
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 void distributed_particles<particle_type, rnumber, interp_neighbours>::redistribute(
         std::unordered_map<int, single_particle_state<particle_type>> &x,
         std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals)
@@ -168,7 +174,7 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::redistrib
     buffer_size = (buffer_size > npr[0])? buffer_size : npr[0];
     buffer_size = (buffer_size > npr[1])? buffer_size : npr[1];
     //DEBUG_MSG("buffer size is %d\n", buffer_size);
-    double *buffer = new double[buffer_size*this->ncomponents*(1+vals.size())];
+    double *buffer = new double[buffer_size*state_dimension(particle_type)*(1+vals.size())];
     for (rsrc = 0; rsrc<this->nprocs; rsrc++)
         for (int i=0; i<2; i++)
         {
@@ -186,21 +192,21 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::redistrib
                 for (int p: ps[i])
                 {
                     std::copy(x[p].data,
-                              x[p].data + this->ncomponents,
-                              buffer + pcounter*(1+vals.size())*this->ncomponents);
+                              x[p].data + state_dimension(particle_type),
+                              buffer + pcounter*(1+vals.size())*state_dimension(particle_type));
                     x.erase(p);
                     for (int tindex=0; tindex<vals.size(); tindex++)
                     {
                         std::copy(vals[tindex][p].data,
-                                  vals[tindex][p].data + this->ncomponents,
-                                  buffer + (pcounter*(1+vals.size()) + tindex+1)*this->ncomponents);
+                                  vals[tindex][p].data + state_dimension(particle_type),
+                                  buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type));
                         vals[tindex].erase(p);
                     }
                     pcounter++;
                 }
                 MPI_Send(
                         buffer,
-                        nps[i]*(1+vals.size())*this->ncomponents,
+                        nps[i]*(1+vals.size())*state_dimension(particle_type),
                         MPI_DOUBLE,
                         rdst,
                         2*(rsrc*this->nprocs + rdst)+1,
@@ -218,7 +224,7 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::redistrib
                         MPI_STATUS_IGNORE);
                 MPI_Recv(
                         buffer,
-                        npr[1-i]*(1+vals.size())*this->ncomponents,
+                        npr[1-i]*(1+vals.size())*state_dimension(particle_type),
                         MPI_DOUBLE,
                         rsrc,
                         2*(rsrc*this->nprocs + rdst)+1,
@@ -227,10 +233,10 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::redistrib
                 int pcounter = 0;
                 for (int p: pr[1-i])
                 {
-                    x[p] = buffer + (pcounter*(1+vals.size()))*this->ncomponents;
+                    x[p] = buffer + (pcounter*(1+vals.size()))*state_dimension(particle_type);
                     for (int tindex=0; tindex<vals.size(); tindex++)
                     {
-                        vals[tindex][p] = buffer + (pcounter*(1+vals.size()) + tindex+1)*this->ncomponents;
+                        vals[tindex][p] = buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type);
                     }
                     pcounter++;
                 }
@@ -255,13 +261,13 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::redistrib
 
 
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 void distributed_particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth(
         const int nsteps)
 {
     this->get_rhs(this->state, this->rhs[0]);
     for (auto &pp: this->state)
-        for (int i=0; i<this->ncomponents; i++)
+        for (int i=0; i<state_dimension(particle_type); i++)
             switch(nsteps)
             {
                 case 1:
@@ -303,7 +309,7 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::AdamsBash
 }
 
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 void distributed_particles<particle_type, rnumber, interp_neighbours>::step()
 {
     this->AdamsBashforth((this->iteration < this->integration_steps) ?
@@ -313,10 +319,10 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::step()
 }
 
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 void distributed_particles<particle_type, rnumber, interp_neighbours>::read()
 {
-    double *temp = new double[this->chunk_size*this->ncomponents];
+    double *temp = new double[this->chunk_size*state_dimension(particle_type)];
     for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
     {
         //read state
@@ -324,14 +330,14 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::read()
             this->read_state_chunk(cindex, temp);
         MPI_Bcast(
                 temp,
-                this->chunk_size*this->ncomponents,
+                this->chunk_size*state_dimension(particle_type),
                 MPI_DOUBLE,
                 0,
                 this->comm);
         for (int p=0; p<this->chunk_size; p++)
         {
-            if (this->vel->get_rank(temp[this->ncomponents*p+2]) == this->myrank)
-                this->state[p+cindex*this->chunk_size] = temp + this->ncomponents*p;
+            if (this->vel->get_rank(temp[state_dimension(particle_type)*p+2]) == this->myrank)
+                this->state[p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p;
         }
         //read rhs
         if (this->iteration > 0)
@@ -341,7 +347,7 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::read()
                     this->read_rhs_chunk(cindex, i, temp);
                 MPI_Bcast(
                         temp,
-                        this->chunk_size*this->ncomponents,
+                        this->chunk_size*state_dimension(particle_type),
                         MPI_DOUBLE,
                         0,
                         this->comm);
@@ -349,7 +355,7 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::read()
                 {
                     auto pp = this->state.find(p+cindex*this->chunk_size);
                     if (pp != this->state.end())
-                        this->rhs[i][p+cindex*this->chunk_size] = temp + this->ncomponents*p;
+                        this->rhs[i][p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p;
                 }
             }
     }
@@ -357,7 +363,7 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::read()
     delete[] temp;
 }
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 void distributed_particles<particle_type, rnumber, interp_neighbours>::write(
         const char *dset_name,
         std::unordered_map<int, single_particle_state<POINT3D>> &y)
@@ -389,28 +395,28 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::write(
     delete[] data;
 }
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 void distributed_particles<particle_type, rnumber, interp_neighbours>::write(
         const bool write_rhs)
 {
-    double *temp0 = new double[this->chunk_size*this->ncomponents];
-    double *temp1 = new double[this->chunk_size*this->ncomponents];
+    double *temp0 = new double[this->chunk_size*state_dimension(particle_type)];
+    double *temp1 = new double[this->chunk_size*state_dimension(particle_type)];
     for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
     {
         //write state
-        std::fill_n(temp0, this->ncomponents*this->chunk_size, 0);
+        std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0);
         for (int p=0; p<this->chunk_size; p++)
         {
             auto pp = this->state.find(p + cindex*this->chunk_size);
             if (pp != this->state.end())
                 std::copy(pp->second.data,
-                          pp->second.data + this->ncomponents,
-                          temp0 + pp->first*this->ncomponents);
+                          pp->second.data + state_dimension(particle_type),
+                          temp0 + pp->first*state_dimension(particle_type));
         }
         MPI_Allreduce(
                 temp0,
                 temp1,
-                this->ncomponents*this->chunk_size,
+                state_dimension(particle_type)*this->chunk_size,
                 MPI_DOUBLE,
                 MPI_SUM,
                 this->comm);
@@ -420,19 +426,19 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::write(
         if (write_rhs)
             for (int i=0; i<this->integration_steps; i++)
             {
-                std::fill_n(temp0, this->ncomponents*this->chunk_size, 0);
+                std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0);
                 for (int p=0; p<this->chunk_size; p++)
                 {
                     auto pp = this->rhs[i].find(p + cindex*this->chunk_size);
                     if (pp != this->rhs[i].end())
                         std::copy(pp->second.data,
-                                  pp->second.data + this->ncomponents,
-                                  temp0 + pp->first*this->ncomponents);
+                                  pp->second.data + state_dimension(particle_type),
+                                  temp0 + pp->first*state_dimension(particle_type));
                 }
                 MPI_Allreduce(
                         temp0,
                         temp1,
-                        this->ncomponents*this->chunk_size,
+                        state_dimension(particle_type)*this->chunk_size,
                         MPI_DOUBLE,
                         MPI_SUM,
                         this->comm);
diff --git a/bfps/cpp/distributed_particles.hpp b/bfps/cpp/distributed_particles.hpp
index e972f69943814823e8b5527422b9727ab7331a8d..cf6e124a7744c049b6fcf0c84c1618a0a214c30e 100644
--- a/bfps/cpp/distributed_particles.hpp
+++ b/bfps/cpp/distributed_particles.hpp
@@ -39,7 +39,7 @@
 
 #define DISTRIBUTED_PARTICLES
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 class distributed_particles: public particles_io_base<particle_type>
 {
     private:
@@ -74,6 +74,7 @@ class distributed_particles: public particles_io_base<particle_type>
                 const char *dset_name);
         void sample(
                 interpolator<rnumber, interp_neighbours> *field,
+                const std::unordered_map<int, single_particle_state<particle_type>> &x,
                 std::unordered_map<int, single_particle_state<POINT3D>> &y);
         void get_rhs(
                 const std::unordered_map<int, single_particle_state<particle_type>> &x,
diff --git a/bfps/cpp/interpolator.hpp b/bfps/cpp/interpolator.hpp
index d20ee6ee1b5023d6a5e177d15e8ddc92a339d7ca..7e56ebe159fd24ed7cf623f0a869e1d262d4aadb 100644
--- a/bfps/cpp/interpolator.hpp
+++ b/bfps/cpp/interpolator.hpp
@@ -42,6 +42,7 @@ class interpolator:public interpolator_base<rnumber, interp_neighbours>
         rnumber *field;
 
     public:
+        using interpolator_base<rnumber, interp_neighbours>::operator();
         ptrdiff_t buffer_size;
 
         /* descriptor for buffered field */
@@ -67,17 +68,6 @@ class interpolator:public interpolator_base<rnumber, interp_neighbours>
                 const double *__restrict__ x,
                 double *__restrict__ y,
                 const int *deriv = NULL);
-        /* interpolate 1 point */
-        inline void operator()(
-                const double *__restrict__ x,
-                double *__restrict__ dest,
-                const int *deriv = NULL)
-        {
-            int xg[3];
-            double xx[3];
-            this->get_grid_coordinates(x, xg, xx);
-            (*this)(xg, xx, dest, deriv);
-        }
         void operator()(
                 const int *__restrict__ xg,
                 const double *__restrict__ xx,
diff --git a/bfps/cpp/interpolator_base.hpp b/bfps/cpp/interpolator_base.hpp
index 1f29a5c05e2afa2dda45f45e5f87a71a63d2b25d..7dda7fb08319bf2a044bcc220e204b748d6336d6 100644
--- a/bfps/cpp/interpolator_base.hpp
+++ b/bfps/cpp/interpolator_base.hpp
@@ -87,6 +87,18 @@ class interpolator_base
                 const double *__restrict__ xx,
                 double *__restrict__ dest,
                 const int *deriv = NULL) = 0;
+
+        /* interpolate 1 point */
+        inline void operator()(
+                const double *__restrict__ x,
+                double *__restrict__ dest,
+                const int *deriv = NULL)
+        {
+            int xg[3];
+            double xx[3];
+            this->get_grid_coordinates(x, xg, xx);
+            (*this)(xg, xx, dest, deriv);
+        }
 };
 
 #endif//INTERPOLATOR_BASE
diff --git a/bfps/cpp/particles.cpp b/bfps/cpp/particles.cpp
index 2182e769191d587aea2f5b174993ccae407bc109..459ea7a37847a6ce4f217bc937130f9304466ed1 100644
--- a/bfps/cpp/particles.cpp
+++ b/bfps/cpp/particles.cpp
@@ -39,7 +39,7 @@
 
 extern int myrank, nprocs;
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 particles<particle_type, rnumber, interp_neighbours>::particles(
         const char *NAME,
         const hid_t data_file_id,
@@ -55,7 +55,7 @@ particles<particle_type, rnumber, interp_neighbours>::particles(
            (INTEGRATION_STEPS >= 1));
     this->vel = FIELD;
     this->integration_steps = INTEGRATION_STEPS;
-    this->array_size = this->nparticles * this->ncomponents;
+    this->array_size = this->nparticles * state_dimension(particle_type);
     this->state = new double[this->array_size];
     std::fill_n(this->state, this->array_size, 0.0);
     for (int i=0; i < this->integration_steps; i++)
@@ -65,7 +65,7 @@ particles<particle_type, rnumber, interp_neighbours>::particles(
     }
 }
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 particles<particle_type, rnumber, interp_neighbours>::~particles()
 {
     delete[] this->state;
@@ -75,18 +75,18 @@ particles<particle_type, rnumber, interp_neighbours>::~particles()
     }
 }
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 void particles<particle_type, rnumber, interp_neighbours>::get_rhs(double *x, double *y)
 {
     switch(particle_type)
     {
         case VELOCITY_TRACER:
-            this->vel->sample(this->nparticles, this->ncomponents, x, y);
+            this->vel->sample(this->nparticles, state_dimension(particle_type), x, y);
             break;
     }
 }
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 void particles<particle_type, rnumber, interp_neighbours>::roll_rhs()
 {
     for (int i=this->integration_steps-2; i>=0; i--)
@@ -97,7 +97,7 @@ void particles<particle_type, rnumber, interp_neighbours>::roll_rhs()
 
 
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 void particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth(
         const int nsteps)
 {
@@ -107,26 +107,26 @@ void particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth(
     {
         case 1:
             for (int p=0; p<this->nparticles; p++)
-                for (int i=0; i<this->ncomponents; i++)
+                for (int i=0; i<state_dimension(particle_type); i++)
                 {
-                    ii = p*this->ncomponents+i;
+                    ii = p*state_dimension(particle_type)+i;
                     this->state[ii] += this->dt*this->rhs[0][ii];
                 }
             break;
         case 2:
             for (int p=0; p<this->nparticles; p++)
-                for (int i=0; i<this->ncomponents; i++)
+                for (int i=0; i<state_dimension(particle_type); i++)
                 {
-                    ii = p*this->ncomponents+i;
+                    ii = p*state_dimension(particle_type)+i;
                     this->state[ii] += this->dt*(3*this->rhs[0][ii]
                                                -   this->rhs[1][ii])/2;
                 }
             break;
         case 3:
             for (int p=0; p<this->nparticles; p++)
-                for (int i=0; i<this->ncomponents; i++)
+                for (int i=0; i<state_dimension(particle_type); i++)
                 {
-                    ii = p*this->ncomponents+i;
+                    ii = p*state_dimension(particle_type)+i;
                     this->state[ii] += this->dt*(23*this->rhs[0][ii]
                                                - 16*this->rhs[1][ii]
                                                +  5*this->rhs[2][ii])/12;
@@ -134,9 +134,9 @@ void particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth(
             break;
         case 4:
             for (int p=0; p<this->nparticles; p++)
-                for (int i=0; i<this->ncomponents; i++)
+                for (int i=0; i<state_dimension(particle_type); i++)
                 {
-                    ii = p*this->ncomponents+i;
+                    ii = p*state_dimension(particle_type)+i;
                     this->state[ii] += this->dt*(55*this->rhs[0][ii]
                                                - 59*this->rhs[1][ii]
                                                + 37*this->rhs[2][ii]
@@ -145,9 +145,9 @@ void particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth(
             break;
         case 5:
             for (int p=0; p<this->nparticles; p++)
-                for (int i=0; i<this->ncomponents; i++)
+                for (int i=0; i<state_dimension(particle_type); i++)
                 {
-                    ii = p*this->ncomponents+i;
+                    ii = p*state_dimension(particle_type)+i;
                     this->state[ii] += this->dt*(1901*this->rhs[0][ii]
                                                - 2774*this->rhs[1][ii]
                                                + 2616*this->rhs[2][ii]
@@ -157,9 +157,9 @@ void particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth(
             break;
         case 6:
             for (int p=0; p<this->nparticles; p++)
-                for (int i=0; i<this->ncomponents; i++)
+                for (int i=0; i<state_dimension(particle_type); i++)
                 {
-                    ii = p*this->ncomponents+i;
+                    ii = p*state_dimension(particle_type)+i;
                     this->state[ii] += this->dt*(4277*this->rhs[0][ii]
                                                - 7923*this->rhs[1][ii]
                                                + 9982*this->rhs[2][ii]
@@ -173,7 +173,7 @@ void particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth(
 }
 
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 void particles<particle_type, rnumber, interp_neighbours>::step()
 {
     this->AdamsBashforth((this->iteration < this->integration_steps) ?
@@ -183,16 +183,16 @@ void particles<particle_type, rnumber, interp_neighbours>::step()
 }
 
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 void particles<particle_type, rnumber, interp_neighbours>::read()
 {
     if (this->myrank == 0)
         for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
         {
-            this->read_state_chunk(cindex, this->state+cindex*this->chunk_size*this->ncomponents);
+            this->read_state_chunk(cindex, this->state+cindex*this->chunk_size*state_dimension(particle_type));
             if (this->iteration > 0)
                 for (int i=0; i<this->integration_steps; i++)
-                    this->read_rhs_chunk(cindex, i, this->rhs[i]+cindex*this->chunk_size*this->ncomponents);
+                    this->read_rhs_chunk(cindex, i, this->rhs[i]+cindex*this->chunk_size*state_dimension(particle_type));
         }
     MPI_Bcast(
             this->state,
@@ -210,27 +210,27 @@ void particles<particle_type, rnumber, interp_neighbours>::read()
                     this->comm);
 }
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 void particles<particle_type, rnumber, interp_neighbours>::write(
         const bool write_rhs)
 {
     if (this->myrank == 0)
         for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
         {
-            this->write_state_chunk(cindex, this->state+cindex*this->chunk_size*this->ncomponents);
+            this->write_state_chunk(cindex, this->state+cindex*this->chunk_size*state_dimension(particle_type));
             if (write_rhs)
                 for (int i=0; i<this->integration_steps; i++)
-                    this->write_rhs_chunk(cindex, i, this->rhs[i]+cindex*this->chunk_size*this->ncomponents);
+                    this->write_rhs_chunk(cindex, i, this->rhs[i]+cindex*this->chunk_size*state_dimension(particle_type));
         }
 }
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 void particles<particle_type, rnumber, interp_neighbours>::sample(
         interpolator_base<rnumber, interp_neighbours> *field,
         const char *dset_name)
 {
     double *y = new double[this->nparticles*3];
-    field->sample(this->nparticles, this->ncomponents, this->state, y);
+    field->sample(this->nparticles, state_dimension(particle_type), this->state, y);
     if (this->myrank == 0)
         for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
             this->write_point3D_chunk(dset_name, cindex, y+cindex*this->chunk_size*3);
diff --git a/bfps/cpp/particles.hpp b/bfps/cpp/particles.hpp
index cf67b1fc32509b16d7a0774c0fc40a74759e39e0..03daf3e3fc866ac485b3649a28dfb13cf1b50ff1 100644
--- a/bfps/cpp/particles.hpp
+++ b/bfps/cpp/particles.hpp
@@ -37,7 +37,7 @@
 
 #define PARTICLES
 
-template <int particle_type, class rnumber, int interp_neighbours>
+template <particle_types particle_type, class rnumber, int interp_neighbours>
 class particles: public particles_io_base<particle_type>
 {
     private:
@@ -75,7 +75,7 @@ class particles: public particles_io_base<particle_type>
                 interpolator_base<rnumber, interp_neighbours> *field,
                 double *y)
         {
-            field->sample(this->nparticles, this->ncomponents, this->state, y);
+            field->sample(this->nparticles, state_dimension(particle_type), this->state, y);
         }
 
         void get_rhs(
diff --git a/bfps/cpp/particles_base.cpp b/bfps/cpp/particles_base.cpp
index c0fc631799cf55e1fbd9b8c93db945f47aa9e980..59d6d078dd63a319f34d3ae03acccc1f259714be 100644
--- a/bfps/cpp/particles_base.cpp
+++ b/bfps/cpp/particles_base.cpp
@@ -30,78 +30,56 @@
 #include <cassert>
 #include "particles_base.hpp"
 
-template <int particle_type>
+template <particle_types particle_type>
 single_particle_state<particle_type>::single_particle_state()
 {
-    switch(particle_type)
-    {
-        default:
-            this->data = new double[3];
-            std::fill_n(this->data, 3, 0);
-            break;
-    }
+    std::fill_n(this->data, state_dimension(particle_type), 0);
 }
 
-template <int particle_type>
+template <particle_types particle_type>
 single_particle_state<particle_type>::single_particle_state(
         const single_particle_state<particle_type> &src)
 {
-    switch(particle_type)
-    {
-        default:
-            this->data = new double[3];
-            std::copy(src.data, src.data + 3, this->data);
-            break;
-    }
+    std::copy(
+            src.data,
+            src.data + state_dimension(particle_type),
+            this->data);
 }
 
-template <int particle_type>
+template <particle_types particle_type>
 single_particle_state<particle_type>::single_particle_state(
         const double *src)
 {
-    switch(particle_type)
-    {
-        default:
-            this->data = new double[3];
-            std::copy(src, src + 3, this->data);
-            break;
-    }
+    std::copy(
+            src,
+            src + state_dimension(particle_type),
+            this->data);
 }
 
-template <int particle_type>
+template <particle_types particle_type>
 single_particle_state<particle_type>::~single_particle_state()
 {
-    switch(particle_type)
-    {
-        default:
-            delete[] this->data;
-            break;
-    }
 }
 
-template <int particle_type>
+template <particle_types particle_type>
 single_particle_state<particle_type> &single_particle_state<particle_type>::operator=(
         const single_particle_state &src)
 {
-    switch(particle_type)
-    {
-        default:
-            std::copy(src.data, src.data + 3, this->data);
-            break;
-    }
+    std::copy(
+            src.data,
+            src.data + state_dimension(particle_type),
+            this->data);
     return *this;
 }
 
-template <int particle_type>
+template <particle_types particle_type>
 single_particle_state<particle_type> &single_particle_state<particle_type>::operator=(
         const double *src)
 {
-    switch(particle_type)
-    {
-        default:
-            std::copy(src, src + 3, this->data);
-            break;
-    }
+    std::copy(
+            src,
+            src + state_dimension(particle_type),
+            this->data);
     return *this;
 }
 
@@ -134,19 +112,13 @@ int get_chunk_offsets(
     return EXIT_SUCCESS;
 }
 
-template <int particle_type>
+template <particle_types particle_type>
 particles_io_base<particle_type>::particles_io_base(
         const char *NAME,
         const int TRAJ_SKIP,
         const hid_t data_file_id,
         MPI_Comm COMM)
 {
-    switch(particle_type)
-    {
-        default:
-            this->ncomponents = 3;
-            break;
-    }
     this->name = std::string(NAME);
     this->traj_skip = TRAJ_SKIP;
     this->comm = COMM;
@@ -161,7 +133,7 @@ particles_io_base<particle_type>::particles_io_base(
         dspace = H5Dget_space(dset);
         this->hdf5_state_dims.resize(H5Sget_simple_extent_ndims(dspace));
         H5Sget_simple_extent_dims(dspace, &this->hdf5_state_dims.front(), NULL);
-        assert(this->hdf5_state_dims[this->hdf5_state_dims.size()-1] == this->ncomponents);
+        assert(this->hdf5_state_dims[this->hdf5_state_dims.size()-1] == state_dimension(particle_type));
         this->nparticles = 1;
         for (int i=1; i<this->hdf5_state_dims.size()-1; i++)
             this->nparticles *= this->hdf5_state_dims[i];
@@ -235,14 +207,14 @@ particles_io_base<particle_type>::particles_io_base(
     DEBUG_MSG("exiting particles_io_base constructor\n");
 }
 
-template <int particle_type>
+template <particle_types particle_type>
 particles_io_base<particle_type>::~particles_io_base()
 {
     if(this->myrank == 0)
         H5Gclose(this->hdf5_group_id);
 }
 
-template <int particle_type>
+template <particle_types particle_type>
 void particles_io_base<particle_type>::read_state_chunk(
         const int cindex,
         double *data)
@@ -276,7 +248,7 @@ void particles_io_base<particle_type>::read_state_chunk(
     DEBUG_MSG("exiting read_state_chunk\n");
 }
 
-template <int particle_type>
+template <particle_types particle_type>
 void particles_io_base<particle_type>::write_state_chunk(
         const int cindex,
         const double *data)
@@ -308,7 +280,7 @@ void particles_io_base<particle_type>::write_state_chunk(
     delete[] offset;
 }
 
-template <int particle_type>
+template <particle_types particle_type>
 void particles_io_base<particle_type>::read_rhs_chunk(
         const int cindex,
         const int rhsindex,
@@ -350,7 +322,7 @@ void particles_io_base<particle_type>::read_rhs_chunk(
     //DEBUG_MSG("exiting read_rhs_chunk\n");
 }
 
-template <int particle_type>
+template <particle_types particle_type>
 void particles_io_base<particle_type>::write_rhs_chunk(
         const int cindex,
         const int rhsindex,
@@ -387,7 +359,7 @@ void particles_io_base<particle_type>::write_rhs_chunk(
     delete[] offset;
 }
 
-template <int particle_type>
+template <particle_types particle_type>
 void particles_io_base<particle_type>::write_point3D_chunk(
         const std::string dset_name,
         const int cindex,
diff --git a/bfps/cpp/particles_base.hpp b/bfps/cpp/particles_base.hpp
index 923c29253ba1ddabff1791ca40d03f79109eaf20..114661fcc4c8f8e7d1a38bbb6d646e5745b9415f 100644
--- a/bfps/cpp/particles_base.hpp
+++ b/bfps/cpp/particles_base.hpp
@@ -26,6 +26,7 @@
 
 #include <vector>
 #include <hdf5.h>
+#include <unordered_map>
 #include "interpolator_base.hpp"
 
 #ifndef PARTICLES_BASE
@@ -35,13 +36,21 @@
 /* particle types */
 enum particle_types {POINT3D, VELOCITY_TRACER};
 
+/* space dimension */
+constexpr unsigned int state_dimension(particle_types particle_type)
+{
+    return ((particle_type == POINT3D) ? 3 : (
+            (particle_type == VELOCITY_TRACER) ? 3 :
+            3));
+}
+
 /* 1 particle state type */
 
-template <int particle_type>
+template <particle_types particle_type>
 class single_particle_state
 {
     public:
-        double *data;
+        double data[state_dimension(particle_type)];
 
         single_particle_state();
         single_particle_state(const single_particle_state &src);
@@ -61,7 +70,7 @@ std::vector<std::vector<hsize_t>> get_chunk_offsets(
         std::vector<hsize_t> data_dims,
         std::vector<hsize_t> chnk_dims);
 
-template <int particle_type>
+template <particle_types particle_type>
 class particles_io_base
 {
     protected:
@@ -69,7 +78,6 @@ class particles_io_base
         MPI_Comm comm;
 
         int nparticles;
-        int ncomponents;
 
         std::string name;
         int chunk_size;
diff --git a/bfps/cpp/rFFTW_distributed_particles.cpp b/bfps/cpp/rFFTW_distributed_particles.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..412671b89ea000efadd713e9cf67221009b0d2cb
--- /dev/null
+++ b/bfps/cpp/rFFTW_distributed_particles.cpp
@@ -0,0 +1,692 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2015 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+
+//#define NDEBUG
+
+#include <cmath>
+#include <cassert>
+#include <cstring>
+#include <string>
+#include <sstream>
+#include <set>
+
+#include "base.hpp"
+#include "rFFTW_distributed_particles.hpp"
+#include "fftw_tools.hpp"
+
+
+extern int myrank, nprocs;
+
+template <particle_types particle_type, class rnumber, int interp_neighbours>
+rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::rFFTW_distributed_particles(
+        const char *NAME,
+        const hid_t data_file_id,
+        rFFTW_interpolator<rnumber, interp_neighbours> *FIELD,
+        const int TRAJ_SKIP,
+        const int INTEGRATION_STEPS) : particles_io_base<particle_type>(
+            NAME,
+            TRAJ_SKIP,
+            data_file_id,
+            FIELD->descriptor->comm)
+{
+    /* check that integration_steps has a valid value.
+     * If NDEBUG is defined, "assert" doesn't do anything.
+     * With NDEBUG defined, and an invalid INTEGRATION_STEPS,
+     * the particles will simply sit still.
+     * */
+    assert((INTEGRATION_STEPS <= 6) &&
+           (INTEGRATION_STEPS >= 1));
+    /* check that the field layout is compatible with this class.
+     * if it's not, the code will fail in bad ways, most likely ending up
+     * with various CPUs locked in some MPI send/receive.
+     * therefore I prefer to just kill the code at this point,
+     * no matter whether or not NDEBUG is present.
+     * */
+    if (interp_neighbours*2+2 > FIELD->descriptor->subsizes[0])
+    {
+        DEBUG_MSG("parameters incompatible with rFFTW_distributed_particles.\n"
+                  "interp kernel size is %d, local_z_size is %d\n",
+                  interp_neighbours*2+2, FIELD->descriptor->subsizes[0]);
+        if (FIELD->descriptor->myrank == 0)
+            std::cerr << "parameters incompatible with rFFTW_distributed_particles." << std::endl;
+        exit(0);
+    }
+    this->vel = FIELD;
+    this->rhs.resize(INTEGRATION_STEPS);
+    this->integration_steps = INTEGRATION_STEPS;
+    this->state.reserve(2*this->nparticles / this->nprocs);
+    for (unsigned int i=0; i<this->rhs.size(); i++)
+        this->rhs[i].reserve(2*this->nparticles / this->nprocs);
+
+    /* build communicators and stuff for interpolation */
+
+    /* number of processors per domain */
+    this->domain_nprocs[-1] = 2; // domain in common with lower z CPU
+    this->domain_nprocs[ 0] = 1; // local domain
+    this->domain_nprocs[ 1] = 2; // domain in common with higher z CPU
+
+    /* initialize domain bins */
+    this->domain_particles[-1] = std::unordered_set<int>();
+    this->domain_particles[ 0] = std::unordered_set<int>();
+    this->domain_particles[ 1] = std::unordered_set<int>();
+    this->domain_particles[-1].reserve(unsigned(
+                1.5*(interp_neighbours*2+2)*
+                float(this->nparticles) /
+                this->nprocs));
+    this->domain_particles[ 1].reserve(unsigned(
+                1.5*(interp_neighbours*2+2)*
+                float(this->nparticles) /
+                this->nprocs));
+    this->domain_particles[ 0].reserve(unsigned(
+                1.5*(this->vel->descriptor->subsizes[0] - interp_neighbours*2-2)*
+                float(this->nparticles) /
+                this->nprocs));
+
+    int rmaxz, rminz;
+    int color, key;
+    MPI_Comm tmpcomm;
+    for (int rank=0; rank<this->nprocs; rank++)
+    {
+        color = MPI_UNDEFINED;
+        key = MPI_UNDEFINED;
+        if (this->myrank == rank)
+        {
+            color = rank;
+            key = 0;
+        }
+        if (this->myrank == MOD(rank + 1, this->nprocs))
+        {
+            color = rank;
+            key = 1;
+        }
+        MPI_Comm_split(this->comm, color, key, &tmpcomm);
+        if (this->myrank == rank)
+            this->domain_comm[ 1] = tmpcomm;
+        if (this->myrank == MOD(rank+1, this->nprocs))
+            this->domain_comm[-1] = tmpcomm;
+
+    }
+
+    /* following code may be useful in the future for the general case */
+    //this->interp_comm.resize(this->vel->descriptor->sizes[0]);
+    //this->interp_nprocs.resize(this->vel->descriptor->sizes[0]);
+    //for (int zg=0; zg<this->vel->descriptor->sizes[0]; zg++)
+    //{
+    //    color = (this->vel->get_rank_info(
+    //                (zg+.5)*this->vel->dz, rminz, rmaxz) ? zg : MPI_UNDEFINED);
+    //    key = zg - this->vel->descriptor->starts[0] + interp_neighbours;
+    //    MPI_Comm_split(this->comm, color, key, &this->interp_comm[zg]);
+    //    if (this->interp_comm[zg] != MPI_COMM_NULL)
+    //        MPI_Comm_size(this->interp_comm[zg], &this->interp_nprocs[zg]);
+    //    else
+    //        this->interp_nprocs[zg] = 0;
+    //}
+}
+
+template <particle_types particle_type, class rnumber, int interp_neighbours>
+rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::~rFFTW_distributed_particles()
+{
+}
+
+template <particle_types particle_type, class rnumber, int interp_neighbours>
+void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sample(
+        rFFTW_interpolator<rnumber, interp_neighbours> *field,
+        const std::unordered_map<int, single_particle_state<particle_type>> &x,
+        const std::unordered_map<int, std::unordered_set<int>> &dp,
+        std::unordered_map<int, single_particle_state<POINT3D>> &y)
+{
+    double *yyy;
+    double *yy;
+    y.clear();
+    /* local z domain */
+    yy = new double[3];
+    for (auto p: dp.at(0))
+    {
+        (*field)(x.find(p)->second.data, yy);
+        y[p] = yy;
+    }
+    delete[] yy;
+    /* boundary z domains */
+    int domain_index;
+    for (int rankpair = 0; rankpair < this->nprocs; rankpair++)
+    {
+        if (this->myrank == rankpair)
+            domain_index = 1;
+        if (this->myrank == MOD(rankpair+1, this->nprocs))
+            domain_index = -1;
+        if (this->myrank == rankpair ||
+            this->myrank == MOD(rankpair+1, this->nprocs))
+        {
+            yy = new double[3*dp.at(domain_index).size()];
+            yyy = new double[3*dp.at(domain_index).size()];
+            int tindex;
+            tindex = 0;
+            // can this sorting be done more efficiently?
+            std::set<int> ordered_dp;
+            for (auto p: dp.at(domain_index))
+                ordered_dp.insert(p);
+
+            for (auto p: ordered_dp)
+            {
+                (*field)(x.at(p).data, yy + tindex*3);
+                tindex++;
+            }
+            MPI_Allreduce(
+                    yy,
+                    yyy,
+                    3*dp.at(domain_index).size(),
+                    MPI_DOUBLE,
+                    MPI_SUM,
+                    this->domain_comm[domain_index]);
+            tindex = 0;
+            for (auto p: ordered_dp)
+            {
+                y[p] = yyy + tindex*3;
+                tindex++;
+            }
+            delete[] yy;
+            delete[] yyy;
+        }
+    }
+}
+
+template <particle_types particle_type, class rnumber, int interp_neighbours>
+void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::get_rhs(
+        const std::unordered_map<int, single_particle_state<particle_type>> &x,
+        const std::unordered_map<int, std::unordered_set<int>> &dp,
+        std::unordered_map<int, single_particle_state<particle_type>> &y)
+{
+    std::unordered_map<int, single_particle_state<POINT3D>> yy;
+    switch(particle_type)
+    {
+        case VELOCITY_TRACER:
+            this->sample(this->vel, x, dp, yy);
+            y.clear();
+            for (auto &pp: x)
+                y[pp.first] = yy[pp.first].data;
+            break;
+    }
+}
+
+template <particle_types particle_type, class rnumber, int interp_neighbours>
+void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sample(
+        rFFTW_interpolator<rnumber, interp_neighbours> *field,
+        const char *dset_name)
+{
+    std::unordered_map<int, single_particle_state<POINT3D>> y;
+    this->sample(field, this->state, this->domain_particles, y);
+    this->write(dset_name, y);
+}
+
+template <particle_types particle_type, class rnumber, int interp_neighbours>
+void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::roll_rhs()
+{
+    for (int i=this->integration_steps-2; i>=0; i--)
+        rhs[i+1] = rhs[i];
+}
+
+template <particle_types particle_type, class rnumber, int interp_neighbours>
+void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::redistribute(
+        std::unordered_map<int, single_particle_state<particle_type>> &x,
+        std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals,
+        std::unordered_map<int, std::unordered_set<int>> &dp)
+{
+    //DEBUG_MSG("entered redistribute\n");
+    /* get new distribution of particles */
+    std::unordered_map<int, std::unordered_set<int>> newdp;
+    this->sort_into_domains(x, newdp);
+    /* take care of particles that are leaving the shared domains */
+    int dindex[2] = {-1, 1};
+    // for each D of the 2 shared domains
+    for (int di=0; di<2; di++)
+        // for all particles previously in D
+        for (auto p: dp[dindex[di]])
+        {
+            // if the particle is no longer in D
+            if (newdp[dindex[di]].find(p) == newdp[dindex[di]].end())
+            {
+                // and the particle is not in the local domain
+                if (newdp[0].find(p) == newdp[0].end())
+                {
+                    // remove the particle from the local list
+                    x.erase(p);
+                    for (int i=0; i<vals.size(); i++)
+                        vals[i].erase(p);
+                }
+                // if the particle is in the local domain, do nothing
+            }
+        }
+    /* take care of particles that are entering the shared domains */
+    /* neighbouring rank offsets */
+    int ro[2];
+    ro[0] = -1;
+    ro[1] = 1;
+    /* neighbouring ranks */
+    int nr[2];
+    nr[0] = MOD(this->myrank+ro[0], this->nprocs);
+    nr[1] = MOD(this->myrank+ro[1], this->nprocs);
+    /* particles to send, particles to receive */
+    std::vector<int> ps[2], pr[2];
+    /* number of particles to send, number of particles to receive */
+    int nps[2], npr[2];
+    int rsrc, rdst;
+    /* get list of id-s to send */
+    for (auto &p: dp[0])
+    {
+        for (int di=0; di<2; di++)
+        {
+            if (newdp[dindex[di]].find(p) != newdp[dindex[di]].end())
+                ps[di].push_back(p);
+        }
+    }
+    /* prepare data for send recv */
+    for (int i=0; i<2; i++)
+        nps[i] = ps[i].size();
+    for (rsrc = 0; rsrc<this->nprocs; rsrc++)
+        for (int i=0; i<2; i++)
+        {
+            rdst = MOD(rsrc+ro[i], this->nprocs);
+            if (this->myrank == rsrc)
+                MPI_Send(
+                        nps+i,
+                        1,
+                        MPI_INTEGER,
+                        rdst,
+                        2*(rsrc*this->nprocs + rdst)+i,
+                        this->comm);
+            if (this->myrank == rdst)
+                MPI_Recv(
+                        npr+1-i,
+                        1,
+                        MPI_INTEGER,
+                        rsrc,
+                        2*(rsrc*this->nprocs + rdst)+i,
+                        this->comm,
+                        MPI_STATUS_IGNORE);
+        }
+    //DEBUG_MSG("I have to send %d %d particles\n", nps[0], nps[1]);
+    //DEBUG_MSG("I have to recv %d %d particles\n", npr[0], npr[1]);
+    for (int i=0; i<2; i++)
+        pr[i].resize(npr[i]);
+
+    int buffer_size = (nps[0] > nps[1]) ? nps[0] : nps[1];
+    buffer_size = (buffer_size > npr[0])? buffer_size : npr[0];
+    buffer_size = (buffer_size > npr[1])? buffer_size : npr[1];
+    //DEBUG_MSG("buffer size is %d\n", buffer_size);
+    double *buffer = new double[buffer_size*state_dimension(particle_type)*(1+vals.size())];
+    for (rsrc = 0; rsrc<this->nprocs; rsrc++)
+        for (int i=0; i<2; i++)
+        {
+            rdst = MOD(rsrc+ro[i], this->nprocs);
+            if (this->myrank == rsrc && nps[i] > 0)
+            {
+                MPI_Send(
+                        &ps[i].front(),
+                        nps[i],
+                        MPI_INTEGER,
+                        rdst,
+                        2*(rsrc*this->nprocs + rdst),
+                        this->comm);
+                int pcounter = 0;
+                for (int p: ps[i])
+                {
+                    std::copy(x[p].data,
+                              x[p].data + state_dimension(particle_type),
+                              buffer + pcounter*(1+vals.size())*state_dimension(particle_type));
+                    for (int tindex=0; tindex<vals.size(); tindex++)
+                    {
+                        std::copy(vals[tindex][p].data,
+                                  vals[tindex][p].data + state_dimension(particle_type),
+                                  buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type));
+                    }
+                    pcounter++;
+                }
+                MPI_Send(
+                        buffer,
+                        nps[i]*(1+vals.size())*state_dimension(particle_type),
+                        MPI_DOUBLE,
+                        rdst,
+                        2*(rsrc*this->nprocs + rdst)+1,
+                        this->comm);
+            }
+            if (this->myrank == rdst && npr[1-i] > 0)
+            {
+                MPI_Recv(
+                        &pr[1-i].front(),
+                        npr[1-i],
+                        MPI_INTEGER,
+                        rsrc,
+                        2*(rsrc*this->nprocs + rdst),
+                        this->comm,
+                        MPI_STATUS_IGNORE);
+                MPI_Recv(
+                        buffer,
+                        npr[1-i]*(1+vals.size())*state_dimension(particle_type),
+                        MPI_DOUBLE,
+                        rsrc,
+                        2*(rsrc*this->nprocs + rdst)+1,
+                        this->comm,
+                        MPI_STATUS_IGNORE);
+                int pcounter = 0;
+                for (int p: pr[1-i])
+                {
+                    x[p] = buffer + (pcounter*(1+vals.size()))*state_dimension(particle_type);
+                    newdp[1-i].insert(p);
+                    for (int tindex=0; tindex<vals.size(); tindex++)
+                    {
+                        vals[tindex][p] = buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type);
+                    }
+                    pcounter++;
+                }
+            }
+        }
+    delete[] buffer;
+    // x has been changed, so newdp is obsolete
+    // we need to sort into domains again
+    this->sort_into_domains(x, dp);
+
+
+#ifndef NDEBUG
+    /* check that all particles at x are local */
+    //for (auto &pp: x)
+    //    if (this->vel->get_rank(pp.second.data[2]) != this->myrank)
+    //    {
+    //        DEBUG_MSG("found particle %d with rank %d\n",
+    //                pp.first,
+    //                this->vel->get_rank(pp.second.data[2]));
+    //        assert(false);
+    //    }
+#endif
+    //DEBUG_MSG("exiting redistribute\n");
+}
+
+
+
+template <particle_types particle_type, class rnumber, int interp_neighbours>
+void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth(
+        const int nsteps)
+{
+    this->get_rhs(this->state, this->domain_particles, this->rhs[0]);
+    for (auto &pp: this->state)
+        for (int i=0; i<state_dimension(particle_type); i++)
+            switch(nsteps)
+            {
+                case 1:
+                    pp.second[i] += this->dt*this->rhs[0][pp.first][i];
+                    break;
+                case 2:
+                    pp.second[i] += this->dt*(3*this->rhs[0][pp.first][i]
+                                            -   this->rhs[1][pp.first][i])/2;
+                    break;
+                case 3:
+                    pp.second[i] += this->dt*(23*this->rhs[0][pp.first][i]
+                                            - 16*this->rhs[1][pp.first][i]
+                                            +  5*this->rhs[2][pp.first][i])/12;
+                    break;
+                case 4:
+                    pp.second[i] += this->dt*(55*this->rhs[0][pp.first][i]
+                                            - 59*this->rhs[1][pp.first][i]
+                                            + 37*this->rhs[2][pp.first][i]
+                                            -  9*this->rhs[3][pp.first][i])/24;
+                    break;
+                case 5:
+                    pp.second[i] += this->dt*(1901*this->rhs[0][pp.first][i]
+                                            - 2774*this->rhs[1][pp.first][i]
+                                            + 2616*this->rhs[2][pp.first][i]
+                                            - 1274*this->rhs[3][pp.first][i]
+                                            +  251*this->rhs[4][pp.first][i])/720;
+                    break;
+                case 6:
+                    pp.second[i] += this->dt*(4277*this->rhs[0][pp.first][i]
+                                            - 7923*this->rhs[1][pp.first][i]
+                                            + 9982*this->rhs[2][pp.first][i]
+                                            - 7298*this->rhs[3][pp.first][i]
+                                            + 2877*this->rhs[4][pp.first][i]
+                                            -  475*this->rhs[5][pp.first][i])/1440;
+                    break;
+            }
+    this->redistribute(this->state, this->rhs, this->domain_particles);
+    this->roll_rhs();
+}
+
+
+template <particle_types particle_type, class rnumber, int interp_neighbours>
+void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::step()
+{
+    this->AdamsBashforth((this->iteration < this->integration_steps) ?
+                          this->iteration+1 :
+                          this->integration_steps);
+    this->iteration++;
+}
+
+
+template <particle_types particle_type, class rnumber, int interp_neighbours>
+void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sort_into_domains(
+        const std::unordered_map<int, single_particle_state<particle_type>> &x,
+        std::unordered_map<int, std::unordered_set<int>> &dp)
+{
+    int tmpint1, tmpint2;
+    dp.clear();
+    dp[-1] = std::unordered_set<int>();
+    dp[ 0] = std::unordered_set<int>();
+    dp[ 1] = std::unordered_set<int>();
+    dp[-1].reserve(unsigned(
+                1.5*(interp_neighbours*2+2)*
+                float(this->nparticles) /
+                this->nprocs));
+    dp[ 1].reserve(unsigned(
+                1.5*(interp_neighbours*2+2)*
+                float(this->nparticles) /
+                this->nprocs));
+    dp[ 0].reserve(unsigned(
+                1.5*(this->vel->descriptor->subsizes[0] - interp_neighbours*2-2)*
+                float(this->nparticles) /
+                this->nprocs));
+    for (auto &xx: x)
+    {
+        if (this->vel->get_rank_info(xx.second.data[2], tmpint1, tmpint2))
+        {
+            if (tmpint1 == tmpint2)
+                dp[0].insert(xx.first);
+            else
+            {
+                if (this->myrank == tmpint1)
+                    dp[-1].insert(xx.first);
+                else
+                    dp[ 1].insert(xx.first);
+            }
+        }
+    }
+}
+
+
+template <particle_types particle_type, class rnumber, int interp_neighbours>
+void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::read()
+{
+    double *temp = new double[this->chunk_size*state_dimension(particle_type)];
+    int tmpint1, tmpint2;
+    for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
+    {
+        //read state
+        if (this->myrank == 0)
+            this->read_state_chunk(cindex, temp);
+        MPI_Bcast(
+                temp,
+                this->chunk_size*state_dimension(particle_type),
+                MPI_DOUBLE,
+                0,
+                this->comm);
+        for (int p=0; p<this->chunk_size; p++)
+        {
+            if (this->vel->get_rank_info(temp[state_dimension(particle_type)*p+2], tmpint1, tmpint2))
+            {
+                this->state[p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p;
+            }
+        }
+        //read rhs
+        if (this->iteration > 0)
+            for (int i=0; i<this->integration_steps; i++)
+            {
+                if (this->myrank == 0)
+                    this->read_rhs_chunk(cindex, i, temp);
+                MPI_Bcast(
+                        temp,
+                        this->chunk_size*state_dimension(particle_type),
+                        MPI_DOUBLE,
+                        0,
+                        this->comm);
+                for (int p=0; p<this->chunk_size; p++)
+                {
+                    auto pp = this->state.find(p+cindex*this->chunk_size);
+                    if (pp != this->state.end())
+                        this->rhs[i][p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p;
+                }
+            }
+    }
+    this->sort_into_domains(this->state, this->domain_particles);
+    DEBUG_MSG("%s->state.size = %ld\n", this->name.c_str(), this->state.size());
+    for (int domain=-1; domain<=1; domain++)
+    {
+        DEBUG_MSG("domain %d nparticles = %ld\n", domain, this->domain_particles[domain].size());
+    }
+    delete[] temp;
+}
+
+template <particle_types particle_type, class rnumber, int interp_neighbours>
+void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::write(
+        const char *dset_name,
+        std::unordered_map<int, single_particle_state<POINT3D>> &y)
+{
+    double *data = new double[this->nparticles*3];
+    double *yy = new double[this->nparticles*3];
+    int zmin_rank, zmax_rank;
+    int pindex = 0;
+    for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
+    {
+        std::fill_n(yy, this->chunk_size*3, 0);
+        for (int p=0; p<this->chunk_size; p++, pindex++)
+        {
+            if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() ||
+                this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end())
+            {
+                std::copy(y[pindex].data,
+                          y[pindex].data + 3,
+                          yy + p*3);
+            }
+        }
+        MPI_Allreduce(
+                yy,
+                data,
+                3*this->chunk_size,
+                MPI_DOUBLE,
+                MPI_SUM,
+                this->comm);
+        if (this->myrank == 0)
+            this->write_point3D_chunk(dset_name, cindex, data);
+    }
+    delete[] yy;
+    delete[] data;
+}
+
+template <particle_types particle_type, class rnumber, int interp_neighbours>
+void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::write(
+        const bool write_rhs)
+{
+    double *temp0 = new double[this->chunk_size*state_dimension(particle_type)];
+    double *temp1 = new double[this->chunk_size*state_dimension(particle_type)];
+    int zmin_rank, zmax_rank;
+    int pindex = 0;
+    for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++)
+    {
+        //write state
+        std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0);
+        pindex = cindex*this->chunk_size;
+        for (int p=0; p<this->chunk_size; p++, pindex++)
+        {
+            if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() ||
+                this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end())
+            {
+                std::copy(this->state[pindex].data,
+                          this->state[pindex].data + state_dimension(particle_type),
+                          temp0 + p*state_dimension(particle_type));
+            }
+        }
+        MPI_Allreduce(
+                temp0,
+                temp1,
+                state_dimension(particle_type)*this->chunk_size,
+                MPI_DOUBLE,
+                MPI_SUM,
+                this->comm);
+        if (this->myrank == 0)
+            this->write_state_chunk(cindex, temp1);
+        //write rhs
+        if (write_rhs)
+            for (int i=0; i<this->integration_steps; i++)
+            {
+                std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0);
+                pindex = cindex*this->chunk_size;
+                for (int p=0; p<this->chunk_size; p++, pindex++)
+                {
+                    if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() ||
+                        this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end())
+                    {
+                        std::copy(this->rhs[i][pindex].data,
+                                  this->rhs[i][pindex].data + state_dimension(particle_type),
+                                  temp0 + p*state_dimension(particle_type));
+                    }
+                }
+                MPI_Allreduce(
+                        temp0,
+                        temp1,
+                        state_dimension(particle_type)*this->chunk_size,
+                        MPI_DOUBLE,
+                        MPI_SUM,
+                        this->comm);
+                if (this->myrank == 0)
+                    this->write_rhs_chunk(cindex, i, temp1);
+            }
+    }
+    delete[] temp0;
+    delete[] temp1;
+}
+
+
+/*****************************************************************************/
+template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 1>;
+template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 2>;
+template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 3>;
+template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 4>;
+template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 5>;
+template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 6>;
+template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 1>;
+template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 2>;
+template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 3>;
+template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 4>;
+template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 5>;
+template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 6>;
+/*****************************************************************************/
+
diff --git a/bfps/cpp/rFFTW_distributed_particles.hpp b/bfps/cpp/rFFTW_distributed_particles.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..e271bbfae56c0d49bf66cebcb5e8e8158f81940b
--- /dev/null
+++ b/bfps/cpp/rFFTW_distributed_particles.hpp
@@ -0,0 +1,116 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2015 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <iostream>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+#include <hdf5.h>
+#include "base.hpp"
+#include "particles_base.hpp"
+#include "fluid_solver_base.hpp"
+#include "rFFTW_interpolator.hpp"
+
+#ifndef RFFTW_DISTRIBUTED_PARTICLES
+
+#define RFFTW_DISTRIBUTED_PARTICLES
+
+template <particle_types particle_type, class rnumber, int interp_neighbours>
+class rFFTW_distributed_particles: public particles_io_base<particle_type>
+{
+    private:
+        std::unordered_map<int, single_particle_state<particle_type>> state;
+        std::vector<std::unordered_map<int, single_particle_state<particle_type>>> rhs;
+        std::unordered_map<int, int> domain_nprocs;
+        std::unordered_map<int, MPI_Comm> domain_comm;
+        std::unordered_map<int, std::unordered_set<int>> domain_particles;
+
+    public:
+        int integration_steps;
+        // this class only works with rFFTW interpolator
+        rFFTW_interpolator<rnumber, interp_neighbours> *vel;
+
+        /* simulation parameters */
+        double dt;
+
+        /* methods */
+
+        /* constructor and destructor.
+         * allocate and deallocate:
+         *  this->state
+         *  this->rhs
+         * */
+        rFFTW_distributed_particles(
+                const char *NAME,
+                const hid_t data_file_id,
+                rFFTW_interpolator<rnumber, interp_neighbours> *FIELD,
+                const int TRAJ_SKIP,
+                const int INTEGRATION_STEPS = 2);
+        ~rFFTW_distributed_particles();
+
+        void sample(
+                rFFTW_interpolator<rnumber, interp_neighbours> *field,
+                const char *dset_name);
+        void sample(
+                rFFTW_interpolator<rnumber, interp_neighbours> *field,
+                const std::unordered_map<int, single_particle_state<particle_type>> &x,
+                const std::unordered_map<int, std::unordered_set<int>> &dp,
+                std::unordered_map<int, single_particle_state<POINT3D>> &y);
+        void get_rhs(
+                const std::unordered_map<int, single_particle_state<particle_type>> &x,
+                const std::unordered_map<int, std::unordered_set<int>> &dp,
+                std::unordered_map<int, single_particle_state<particle_type>> &y);
+
+
+        void sort_into_domains(
+                const std::unordered_map<int, single_particle_state<particle_type>> &x,
+                std::unordered_map<int, std::unordered_set<int>> &dp);
+        void redistribute(
+                std::unordered_map<int, single_particle_state<particle_type>> &x,
+                std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals,
+                std::unordered_map<int, std::unordered_set<int>> &dp);
+
+
+        /* input/output */
+        void read();
+        void write(
+                const char *dset_name,
+                std::unordered_map<int, single_particle_state<POINT3D>> &y);
+        void write(
+                const char *dset_name,
+                std::unordered_map<int, single_particle_state<particle_type>> &y);
+        void write(const bool write_rhs = true);
+
+        /* solvers */
+        void step();
+        void roll_rhs();
+        void AdamsBashforth(const int nsteps);
+};
+
+#endif//RFFTW_DISTRIBUTED_PARTICLES
+
diff --git a/bfps/cpp/rFFTW_interpolator.cpp b/bfps/cpp/rFFTW_interpolator.cpp
index facc09c42164d44478ffa01b47354a05b0decce0..bffae44f5986f9873a231442e92cba6cf005d3a4 100644
--- a/bfps/cpp/rFFTW_interpolator.cpp
+++ b/bfps/cpp/rFFTW_interpolator.cpp
@@ -54,6 +54,24 @@ rFFTW_interpolator<rnumber, interp_neighbours>::~rFFTW_interpolator()
     delete[] this->compute;
 }
 
+template <class rnumber, int interp_neighbours>
+bool rFFTW_interpolator<rnumber, interp_neighbours>::get_rank_info(double z, int &maxz_rank, int &minz_rank)
+{
+    int zg = int(floor(z/this->dz));
+    minz_rank = this->descriptor->rank[MOD(
+             zg - interp_neighbours,
+            this->descriptor->sizes[0])];
+    maxz_rank = this->descriptor->rank[MOD(
+            zg + 1 + interp_neighbours,
+            this->descriptor->sizes[0])];
+    bool is_here = false;
+    for (int iz = -interp_neighbours; iz <= interp_neighbours+1; iz++)
+        is_here = (is_here ||
+                   (this->descriptor->myrank ==
+                    this->descriptor->rank[MOD(zg+iz, this->descriptor->sizes[0])]));
+    return is_here;
+}
+
 template <class rnumber, int interp_neighbours>
 void rFFTW_interpolator<rnumber, interp_neighbours>::sample(
         const int nparticles,
diff --git a/bfps/cpp/rFFTW_interpolator.hpp b/bfps/cpp/rFFTW_interpolator.hpp
index fb96963eee8dd1361a59edf00b38b6460a27d9e9..795257d2744e432d9c346b93848cadfbd8cc85dc 100644
--- a/bfps/cpp/rFFTW_interpolator.hpp
+++ b/bfps/cpp/rFFTW_interpolator.hpp
@@ -37,6 +37,7 @@ template <class rnumber, int interp_neighbours>
 class rFFTW_interpolator:public interpolator_base<rnumber, interp_neighbours>
 {
     public:
+        using interpolator_base<rnumber, interp_neighbours>::operator();
         /* size of field to interpolate */
         ptrdiff_t field_size;
 
@@ -62,6 +63,8 @@ class rFFTW_interpolator:public interpolator_base<rnumber, interp_neighbours>
             return EXIT_SUCCESS;
         }
 
+        bool get_rank_info(double z, int &maxz_rank, int &minz_rank);
+
         /* interpolate field at an array of locations */
         void sample(
                 const int nparticles,
diff --git a/bfps/tools.py b/bfps/tools.py
index 0f94a98ace7b849ae5f112ca7bd517829cdf4eca..fefb7819cf96b4f9b178252dd444ff5eabe01238 100644
--- a/bfps/tools.py
+++ b/bfps/tools.py
@@ -232,13 +232,16 @@ def particle_finite_diff_test(
     if interp_name not in pars.keys():
         # old format
         interp_name = 'tracers{0}_field'.format(species)
+    interp_name = pars[interp_name].value
+    if type(interp_name) == bytes:
+        interp_name = str(interp_name, 'ASCII')
     to_print = (
             'steps={0}, interp={1}, neighbours={2}, '.format(
                 pars['tracers{0}_integration_steps'.format(species)].value,
-                pars[str(pars[interp_name].value, 'ASCII') + '_type'].value,
-                pars[str(pars[interp_name].value, 'ASCII') + '_neighbours'].value))
-    if 'spline' in str(pars[interp_name].value, 'ASCII'):
-        to_print += 'smoothness = {0}, '.format(pars[str(pars[interp_name].value, 'ASCII') + '_smoothness'].value)
+                pars[interp_name + '_type'].value,
+                pars[interp_name + '_neighbours'].value))
+    if 'spline' in interp_name:
+        to_print += 'smoothness = {0}, '.format(pars[interp_name + '_smoothness'].value)
     to_print += (
             'SNR d1p-vel={0:.3f}'.format(np.mean(snr_vel1)))
     if acc_on:
diff --git a/setup.py b/setup.py
index 8f982144e9188dbc74eea1bb4d6ef1f32508b45c..8cb0e225fb8518645fb6bddef753ae8973f0d43d 100644
--- a/setup.py
+++ b/setup.py
@@ -92,6 +92,7 @@ print('This is bfps version ' + VERSION)
 ### lists of files and MANIFEST.in
 src_file_list = ['field_descriptor',
                  'interpolator_base',
+                 'rFFTW_distributed_particles',
                  'distributed_particles',
                  'particles_base',
                  'interpolator',
diff --git a/tests/base.py b/tests/base.py
index 2c616aeb1bbfbdc1b346ae365836429daafa93b0..b438874d79d837b71a5f286bf1005f3fd10ecf39 100644
--- a/tests/base.py
+++ b/tests/base.py
@@ -195,12 +195,14 @@ def compare_stats(
             key,
             np.max(np.abs(c0.statistics[key + '(t)'] - c0.statistics[key + '(t)']))))
     for i in range(c0.particle_species):
-        print('maximum traj difference species {0} is {1} {2}'.format(
+        print('species={0} differences: max pos(t) {1:.1e} min vel(0) {2:.1e}, max vel(0) {3:.1e}'.format(
             i,
             np.max(np.abs(c0.get_particle_file()['tracers{0}/state'.format(i)][:] -
                           c1.get_particle_file()['tracers{0}/state'.format(i)][:])),
-            np.nanmax(np.abs(c0.get_particle_file()['tracers{0}/state'.format(i)][:] -
-                             c1.get_particle_file()['tracers{0}/state'.format(i)][:]))))
+            np.min(np.abs(c0.get_particle_file()['tracers{0}/velocity'.format(i)][0] -
+                          c1.get_particle_file()['tracers{0}/velocity'.format(i)][0])),
+            np.max(np.abs(c0.get_particle_file()['tracers{0}/velocity'.format(i)][0] -
+                          c1.get_particle_file()['tracers{0}/velocity'.format(i)][0]))))
     if plots_on:
         # plot energy and enstrophy
         fig = plt.figure(figsize = (12, 12))
diff --git a/tests/test_particle_classes.py b/tests/test_particle_classes.py
index 6f38859a22c4f2e5a0ae71627899cd580af740d4..c14145c72fc57a886368e1a0a8cf9744486bf0ef 100644
--- a/tests/test_particle_classes.py
+++ b/tests/test_particle_classes.py
@@ -47,7 +47,7 @@ def scaling(opt):
     c1.compute_statistics()
     compare_stats(opt, c0, c1)
     opt.work_dir = wd + '/N{0:0>3x}_3'.format(opt.n)
-    c2 = launch(opt, dt = c0.parameters['dt'], particle_class = 'particles', interpolator_class = 'interpolator')
+    c2 = launch(opt, dt = c0.parameters['dt'], particle_class = 'rFFTW_distributed_particles')
     c2.compute_statistics()
     compare_stats(opt, c0, c2)
     compare_stats(opt, c1, c2)