diff --git a/bfps/cpp/particles.cpp b/bfps/cpp/particles.cpp
index 4c0592f32820e05534f392c2628047632c806012..765abf2a90a5be792d6e6f549a0f1322ec5f9f45 100644
--- a/bfps/cpp/particles.cpp
+++ b/bfps/cpp/particles.cpp
@@ -69,10 +69,7 @@ particles<particle_type, rnumber, multistep, interp_neighbours>::particles(
     this->traj_skip = TRAJ_SKIP;
     this->myrank = this->vel->descriptor->myrank;
     this->nprocs = this->vel->descriptor->nprocs;
-    // in principle only the buffer width at the top needs the +1,
-    // but things are simpler if buffer_width is the same
-    this->buffer_width = interp_neighbours+1;
-    this->buffer_size = this->buffer_width*this->fs->rd->slice_size;
+    this->comm = this->vel->descriptor->comm;
     this->array_size = this->nparticles * this->ncomponents;
     this->state = fftw_alloc_real(this->array_size);
     std::fill_n(this->state, this->array_size, 0.0);
@@ -85,16 +82,6 @@ particles<particle_type, rnumber, multistep, interp_neighbours>::particles(
     std::fill_n(this->watching, this->fs->rd->nprocs*this->nparticles, false);
     this->computing = new int[nparticles];
 
-    int tdims[4];
-    tdims[0] = this->buffer_width*2*this->fs->rd->nprocs + this->fs->rd->sizes[0];
-    tdims[1] = this->fs->rd->sizes[1];
-    tdims[2] = this->fs->rd->sizes[2];
-    tdims[3] = this->fs->rd->sizes[3];
-    this->buffered_field_descriptor = new field_descriptor<rnumber>(
-            4, tdims,
-            this->fs->rd->mpi_dtype,
-            this->fs->rd->comm);
-
     // compute dx, dy, dz;
     this->dx = 4*acos(0) / (this->fs->dkx*this->fs->rd->sizes[2]);
     this->dy = 4*acos(0) / (this->fs->dky*this->fs->rd->sizes[1]);
@@ -112,7 +99,7 @@ particles<particle_type, rnumber, multistep, interp_neighbours>::particles(
             nprocs,
             MPI_DOUBLE,
             MPI_SUM,
-            this->fs->rd->comm);
+            this->comm);
     std::fill_n(tbound, nprocs, 0.0);
     tbound[this->myrank] = (this->fs->rd->starts[0] + this->fs->rd->subsizes[0])*this->dz;
     MPI_Allreduce(
@@ -121,7 +108,7 @@ particles<particle_type, rnumber, multistep, interp_neighbours>::particles(
             nprocs,
             MPI_DOUBLE,
             MPI_SUM,
-            this->fs->rd->comm);
+            this->comm);
     delete[] tbound;
     //for (int r = 0; r<nprocs; r++)
     //    DEBUG_MSG(
@@ -143,7 +130,64 @@ particles<particle_type, rnumber, multistep, interp_neighbours>::~particles()
     }
     delete[] this->lbound;
     delete[] this->ubound;
-    delete this->buffered_field_descriptor;
+}
+
+template <int particle_type, class rnumber, bool multistep, int interp_neighbours>
+void particles<particle_type, rnumber, multistep, interp_neighbours>::sample_vec_field(
+        interpolator<rnumber, interp_neighbours> *vec,
+        double *x,
+        double *y,
+        const bool synch,
+        int *deriv)
+{
+    /* get grid coordinates */
+    int *xg = new int[3*this->nparticles];
+    double *xx = new double[3*this->nparticles];
+    this->get_grid_coordinates(x, xg, xx);
+    /* perform interpolation */
+    std::fill_n(y, 3*this->nparticles, 0.0);
+    if (multistep)
+    {
+        for (int p=0; p<this->nparticles; p++)
+        {
+            if (this->myrank == this->computing[p])
+               (*vec)(xg + p*3, xx + p*3, y + p*3, deriv);
+        }
+    }
+    else
+    {
+        for (int p=0; p<this->nparticles; p++)
+        {
+            if (this->watching[this->myrank*this->nparticles+p])
+            {
+                int crank = this->get_rank(x[p*3 + 2]);
+                if (this->myrank == crank)
+                    (*vec)(xg + p*3, xx + p*3, y + p*3, deriv);
+                if (crank != this->computing[p])
+                    this->synchronize_single_particle_state(p, y, crank);
+            }
+        }
+    }
+    if (synch)
+    {
+        double *yy =  new double[3*this->nparticles];
+        std::fill_n(yy, 3*this->nparticles, 0.0);
+        for (int p=0; p<this->nparticles; p++)
+        {
+            if (this->myrank == this->computing[p])
+               std::copy(y+3*p, y+3*(p+1), yy+3*p);
+        }
+        MPI_Allreduce(
+                yy,
+                y,
+                3*this->nparticles,
+                MPI_DOUBLE,
+                MPI_SUM,
+                this->comm);
+        delete[] yy;
+    }
+    delete[] xg;
+    delete[] xx;
 }
 
 template <int particle_type, class rnumber, bool multistep, int interp_neighbours>
@@ -240,7 +284,7 @@ void particles<particle_type, rnumber, multistep, interp_neighbours>::synchroniz
                             MPI_DOUBLE,
                             r,
                             p+this->computing[p]*this->nparticles,
-                            this->fs->rd->comm);
+                            this->comm);
                 if (this->myrank == r)
                     MPI_Recv(
                             x+p*this->ncomponents,
@@ -248,7 +292,7 @@ void particles<particle_type, rnumber, multistep, interp_neighbours>::synchroniz
                             MPI_DOUBLE,
                             source,
                             p+this->computing[p]*this->nparticles,
-                            this->fs->rd->comm,
+                            this->comm,
                             MPI_STATUS_IGNORE);
             }
     }
@@ -280,7 +324,7 @@ void particles<particle_type, rnumber, multistep, interp_neighbours>::synchroniz
             this->array_size,
             MPI_DOUBLE,
             MPI_SUM,
-            this->fs->rd->comm);
+            this->comm);
     if (this->integration_steps >= 1)
     {
         for (int i=0; i<this->integration_steps; i++)
@@ -297,7 +341,7 @@ void particles<particle_type, rnumber, multistep, interp_neighbours>::synchroniz
                     this->array_size,
                     MPI_DOUBLE,
                     MPI_SUM,
-                    this->fs->rd->comm);
+                    this->comm);
         }
     }
     fftw_free(tstate);
@@ -305,7 +349,7 @@ void particles<particle_type, rnumber, multistep, interp_neighbours>::synchroniz
     for (int p=0; p<this->nparticles; p++)
     {
         this->computing[p] = this->get_rank(this->state[p*this->ncomponents + 2]);
-        for (int r=0; r<this->buffered_field_descriptor->nprocs; r++)
+        for (int r=0; r<this->nprocs; r++)
             this->watching[r*this->nparticles+p] = (r == this->computing[p]);
         //DEBUG_MSG("synchronizing particles, particle %d computing is %d\n", p, this->computing[p]);
     }
@@ -330,7 +374,7 @@ void particles<particle_type, rnumber, multistep, interp_neighbours>::synchroniz
                 this->nparticles*this->fs->rd->nprocs,
                 MPI_C_BOOL,
                 MPI_LOR,
-                this->fs->rd->comm);
+                this->comm);
         delete[] local_watching;
     }
 }
@@ -604,7 +648,7 @@ void particles<particle_type, rnumber, multistep, interp_neighbours>::read(hid_t
             this->array_size,
             MPI_DOUBLE,
             0,
-            this->fs->rd->comm);
+            this->comm);
     for (int i = 0; i<this->integration_steps; i++)
     {
         MPI_Bcast(
@@ -612,7 +656,7 @@ void particles<particle_type, rnumber, multistep, interp_neighbours>::read(hid_t
                 this->array_size,
                 MPI_DOUBLE,
                 0,
-                this->fs->rd->comm);
+                this->comm);
     }
     // initial assignment of particles
     for (int p=0; p<this->nparticles; p++)
@@ -674,38 +718,6 @@ void particles<particle_type, rnumber, multistep, interp_neighbours>::write(hid_
     }
 }
 
-template <int particle_type, class rnumber, bool multistep, int interp_neighbours>
-void particles<particle_type, rnumber, multistep, interp_neighbours>::sample_vec_field(
-        interpolator<rnumber, interp_neighbours> *field,
-        double *vec_values)
-{
-    double *vec_local =  new double[3*this->nparticles];
-    std::fill_n(vec_local, 3*this->nparticles, 0.0);
-    int deriv[] = {0, 0, 0};
-    /* get grid coordinates */
-    int *xg = new int[3*this->nparticles];
-    double *xx = new double[3*this->nparticles];
-    this->get_grid_coordinates(this->state, xg, xx);
-    /* perform interpolation */
-    for (int p=0; p<this->nparticles; p++)
-        if (this->myrank == this->computing[p])
-            (*field)(
-                    xg + p*3,
-                    xx + p*3,
-                    vec_local + p*3,
-                    deriv);
-    MPI_Allreduce(
-            vec_local,
-            vec_values,
-            3*this->nparticles,
-            MPI_DOUBLE,
-            MPI_SUM,
-            this->fs->rd->comm);
-    delete[] xg;
-    delete[] xx;
-    delete[] vec_local;
-}
-
 
 /*****************************************************************************/
 template class particles<VELOCITY_TRACER, float, true, 1>;
diff --git a/bfps/cpp/particles.hpp b/bfps/cpp/particles.hpp
index fd2572caa10e2b0f17f26422620adea3d033f5f5..b42a4f16de8a6a83214009e947d1e7c58f9b7e53 100644
--- a/bfps/cpp/particles.hpp
+++ b/bfps/cpp/particles.hpp
@@ -44,8 +44,8 @@ class particles
 {
     public:
         int myrank, nprocs;
+        MPI_Comm comm;
         fluid_solver_base<rnumber> *fs;
-        field_descriptor<rnumber> *buffered_field_descriptor;
         rnumber *data;
 
         /* watching is an array of shape [nparticles], with
@@ -114,7 +114,16 @@ class particles
         void synchronize();
         void synchronize_single_particle_state(int p, double *x, int source_id = -1);
         void get_grid_coordinates(double *x, int *xg, double *xx);
-        void sample_vec_field(interpolator<rnumber, interp_neighbours> *field, double *vec_values);
+        void sample_vec_field(
+            interpolator<rnumber, interp_neighbours> *vec,
+            double *x,
+            double *y,
+            const bool synch = false,
+            int *deriv = NULL);
+        inline void sample_vec_field(interpolator<rnumber, interp_neighbours> *field, double *vec_values)
+        {
+            this->sample_vec_field(field, this->state, vec_values, true, NULL);
+        }
 
         /* input/output */
         void read(hid_t data_file_id);