diff --git a/bfps/cpp/particles/p2p_computer.hpp b/bfps/cpp/particles/p2p_computer.hpp
index efe0e5e321f190cf26a66d68c35e828de6ffddb1..eb77729fd0b9d6cdb0ad2586df3bf78696c1ef59 100644
--- a/bfps/cpp/particles/p2p_computer.hpp
+++ b/bfps/cpp/particles/p2p_computer.hpp
@@ -8,7 +8,7 @@ class p2p_computer{
 public:
     template <int size_particle_rhs>
     void init_result_array(real_number rhs[], const partsize_t nbParticles) const{
-        memset(rhs, 0, sizeof(real_number)*nbParticles);
+        memset(rhs, 0, sizeof(real_number)*nbParticles*size_particle_rhs);
     }
 
     template <int size_particle_rhs>
@@ -23,7 +23,8 @@ public:
     template <int size_particle_positions, int size_particle_rhs>
     void compute_interaction(const real_number pos_part1[], real_number rhs_part1[],
                              const real_number pos_part2[], real_number rhs_part2[],
-                             const real_number dist_pow2) const{
+                             const real_number dist_pow2,
+                             const real_number xshift_coef, const real_number yshift_coef, const real_number zshift_coef) const{
         rhs_part1[0] += 1;
         rhs_part2[0] += 1;
     }
diff --git a/bfps/cpp/particles/p2p_distr_mpi.hpp b/bfps/cpp/particles/p2p_distr_mpi.hpp
index 1709dc7284742616eb66d029c5512cc2ec53f9da..aacd5a2f79920502613ab394063a2af226a8c254 100644
--- a/bfps/cpp/particles/p2p_distr_mpi.hpp
+++ b/bfps/cpp/particles/p2p_distr_mpi.hpp
@@ -210,21 +210,16 @@ public:
     }
 
     real_number compute_distance_r2(const real_number x1, const real_number y1, const real_number z1,
-                                    const real_number x2, const real_number y2, const real_number z2) const {
-        real_number diff_x = std::abs(x1-x2);
-        while(diff_x > spatial_box_width[IDX_X]/2){
-            diff_x = std::abs(diff_x - spatial_box_width[IDX_X]);
-        }
+                                    const real_number x2, const real_number y2, const real_number z2,
+                                    const real_number xshift_coef, const real_number yshift_coef, const real_number zshift_coef) const {
+        real_number diff_x = std::abs(x1-x2+xshift_coef*spatial_box_width[IDX_X]);
+        assert(diff_x <= 2*cutoff_radius);
 
-        real_number diff_y = std::abs(y1-y2);
-        while(diff_y > spatial_box_width[IDX_Y]/2){
-            diff_y = std::abs(diff_y - spatial_box_width[IDX_Y]);
-        }
+        real_number diff_y = std::abs(y1-y2+yshift_coef*spatial_box_width[IDX_Y]);
+        assert(diff_y <= 2*cutoff_radius);
 
-        real_number diff_z = std::abs(z1-z2);
-        while(diff_z > spatial_box_width[IDX_Z]/2){
-            diff_z = std::abs(diff_z - spatial_box_width[IDX_Z]);
-        }
+        real_number diff_z = std::abs(z1-z2+zshift_coef*spatial_box_width[IDX_Z]);
+        assert(diff_z <= 2*cutoff_radius);
 
         return (diff_x*diff_x) + (diff_y*diff_y) + (diff_z*diff_z);
     }
@@ -401,8 +396,8 @@ public:
 
         // Find process with at least one neighbor
         {
-//            std::cout << my_rank << " my_top_z_cell_level " << my_top_z_cell_level << std::endl;
-//            std::cout << my_rank << " my_down_z_cell_level " << my_down_z_cell_level << std::endl;
+//            std::cout << my_rank << ">>  my_top_z_cell_level " << my_top_z_cell_level << std::endl;
+//            std::cout << my_rank << ">>  my_down_z_cell_level " << my_down_z_cell_level << std::endl;
 //            std::cout.flush();// TODO
 
             int dest_proc = (my_rank+1)%nb_processes_involved;
@@ -417,8 +412,8 @@ public:
                 }
 
 //                std::cout << my_rank << " dest_proc " << dest_proc << std::endl;
-//                std::cout << my_rank << " first_cell_level_proc(dest_proc) " << first_cell_level_proc(dest_proc) << std::endl;
-//                std::cout << my_rank << " last_cell_level_proc(dest_proc) " << last_cell_level_proc(dest_proc) << std::endl;
+//                std::cout << my_rank << ">> first_cell_level_proc(dest_proc) " << first_cell_level_proc(dest_proc) << std::endl;
+//                std::cout << my_rank << ">> last_cell_level_proc(dest_proc) " << last_cell_level_proc(dest_proc) << std::endl;
 //                std::cout.flush();// TODO
 
                 NeighborDescriptor descriptor;
@@ -427,12 +422,12 @@ public:
                 descriptor.nbParticlesToExchange = particles_offset_layers[my_nb_cell_levels] - particles_offset_layers[my_nb_cell_levels-nb_levels_to_send];
                 descriptor.isRecv = false;
 
-//                std::cout << my_rank << "SEND" << std::endl;
-//                std::cout << "descriptor.destProc " << descriptor.destProc << std::endl;
-//                std::cout << "descriptor.nbLevelsToExchange " << descriptor.nbLevelsToExchange << std::endl;
-//                std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
-//                std::cout << "descriptor.isRecv " << descriptor.isRecv << std::endl;
-//                std::cout << "neigDescriptors.size() " << neigDescriptors.size() << std::endl;
+//                std::cout << my_rank << " SEND" << std::endl;
+//                std::cout << ">> descriptor.destProc " << descriptor.destProc << std::endl;
+//                std::cout << ">> descriptor.nbLevelsToExchange " << descriptor.nbLevelsToExchange << std::endl;
+//                std::cout << ">> descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
+//                std::cout << ">> descriptor.isRecv " << descriptor.isRecv << std::endl;
+//                std::cout << ">> neigDescriptors.size() " << neigDescriptors.size() << std::endl;
 //                std::cout.flush();// TODO
 
                 neigDescriptors.emplace_back(std::move(descriptor));
@@ -467,12 +462,12 @@ public:
                 neigDescriptors.emplace_back(std::move(descriptor));
 
 //                std::cout << my_rank << "] RECV" << std::endl;
-//                std::cout << "descriptor.destProc " << descriptor.destProc << std::endl;
-//                std::cout << "descriptor.nbLevelsToExchange " << descriptor.nbLevelsToExchange << std::endl;
-//                std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
-//                std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
-//                std::cout << "descriptor.isRecv " << descriptor.isRecv << std::endl;
-//                std::cout << "neigDescriptors.size() " << neigDescriptors.size() << std::endl;
+//                std::cout << ">> descriptor.destProc " << descriptor.destProc << std::endl;
+//                std::cout << ">> descriptor.nbLevelsToExchange " << descriptor.nbLevelsToExchange << std::endl;
+//                std::cout << ">> descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
+//                std::cout << ">> descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
+//                std::cout << ">> descriptor.isRecv " << descriptor.isRecv << std::endl;
+//                std::cout << ">> neigDescriptors.size() " << neigDescriptors.size() << std::endl;
 //                std::cout.flush();// TODO
 
                 src_proc = (src_proc-1+nb_processes_involved)%nb_processes_involved;
@@ -544,6 +539,7 @@ public:
             #pragma omp master
             {
                 while(mpiRequests.size()){
+                    TIMEZONE("wait-loop");
                     assert(mpiRequests.size() == whatNext.size());
 
                     int idxDone = int(mpiRequests.size());
@@ -561,6 +557,7 @@ public:
                     /// Data to exchange particles
                     //////////////////////////////////////////////////////////////////////
                     if(releasedAction.first == RECV_PARTICLES){
+                        TIMEZONE("post-recv-particles");
                         NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second];
                         assert(descriptor.isRecv);
                         const int destProc = descriptor.destProc;
@@ -589,6 +586,7 @@ public:
                     /// Computation
                     //////////////////////////////////////////////////////////////////////
                     if(releasedAction.first == COMPUTE_PARTICLES){
+                        TIMEZONE("compute-particles");
                         NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second];
                         assert(descriptor.isRecv);
                         const partsize_t NbParticlesToReceive = descriptor.nbParticlesToExchange;
@@ -613,7 +611,8 @@ public:
 
                             const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27];
                             long int neighbors_indexes[27];
-                            const int nbNeighbors = my_tree.getNeighbors(current_cell_idx, neighbors, neighbors_indexes, true);
+                            std::array<real_number,3> shift[27];
+                            const int nbNeighbors = my_tree.getNeighbors(current_cell_idx, neighbors, neighbors_indexes, shift, true);
 
 //                            for(int idx_test = 0 ; idx_test < nb_parts_in_cell ; ++idx_test){ // TODO
 //                                real_number totest[3] = {8.570442e-01, 7.173084e-02, 8.279754e-03};
@@ -638,14 +637,15 @@ public:
                                                                                             descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Z],
                                                                                             particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
                                                                                             particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
-                                                                                            particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]);
+                                                                                            particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z],
+                                                                                            shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]);
                                             if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
                                                 in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>(
                                                                     &descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions],
                                                                     &descriptor.results[(idxPart+idx_p1)*size_particle_rhs],
                                                                     &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions],
                                                                     &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs],
-                                                                    dist_r2);
+                                                                    dist_r2, shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]);
                                             }
 
 //                                            if(inout_index_particles[(*neighbors[idx_neighbor])[idx_2].first+idx_p2] == 356){// TODO
@@ -693,6 +693,7 @@ public:
                     /// Merge
                     //////////////////////////////////////////////////////////////////////
                     if(releasedAction.first == MERGE_PARTICLES){
+                        TIMEZONE("merge");
                         NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second];
                         assert(descriptor.isRecv == false);
                         assert(descriptor.toRecvAndMerge != nullptr);
@@ -709,6 +710,7 @@ public:
 
         // Compute self data
         for(const auto& iter_cell : my_tree){
+            TIMEZONE("proceed-leaf");
             const std::vector<std::pair<partsize_t,partsize_t>>& intervals = iter_cell.second;
 
             for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){
@@ -742,14 +744,15 @@ public:
                                                                         particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z],
                                                                         particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_X],
                                                                         particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Y],
-                                                                        particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Z]);
+                                                                        particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Z],
+                                                                        0, 0, 0);
                         if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
                             in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>(
                                                 &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
                                                 &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
                                                 &particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions],
                                                 &particles_current_rhs[(intervals[idx_1].first+idx_p2)*size_particle_rhs],
-                                                dist_r2);
+                                                dist_r2, 0, 0, 0);
                         }
 
 //                        if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356)
@@ -784,14 +787,15 @@ public:
                                                                             particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z],
                                                                             particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
                                                                             particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
-                                                                            particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]);
+                                                                            particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Z],
+                                                                            0, 0, 0);
                             if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
                                 in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>(
                                                     &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
                                                     &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
                                                     &particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions],
                                                     &particles_current_rhs[(intervals[idx_2].first+idx_p2)*size_particle_rhs],
-                                                    dist_r2);
+                                                    dist_r2, 0, 0, 0);
                             }
 
 //                            if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356)
@@ -823,7 +827,8 @@ public:
             const long int currenct_cell_idx = iter_cell.first;
             const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27];
             long int neighbors_indexes[27];
-            const int nbNeighbors = my_tree.getNeighbors(currenct_cell_idx, neighbors, neighbors_indexes, false);
+            std::array<real_number,3> shift[27];
+            const int nbNeighbors = my_tree.getNeighbors(currenct_cell_idx, neighbors, neighbors_indexes, shift, false);
 
 //            if(((currenct_cell_idx == 785))){// TODO
 //                printf("box %ld:\n", iter_cell.first);
@@ -843,14 +848,15 @@ public:
                                                                                     particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z],
                                                                                     particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
                                                                                     particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
-                                                                                    particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]);
+                                                                                    particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z],
+                                                                                    shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]);
                                     if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
                                         in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>(
                                                             &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
                                                             &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
                                                             &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions],
                                                             &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs],
-                                                            dist_r2);
+                                                            dist_r2, shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]);
                                     }
 
 //                                    if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356)
diff --git a/bfps/cpp/particles/p2p_tree.hpp b/bfps/cpp/particles/p2p_tree.hpp
index 9f146644120af7c38b65e24dc110545c311f516a..52cb2f2d967ebc3533c8e3c98455734d26c450cb 100644
--- a/bfps/cpp/particles/p2p_tree.hpp
+++ b/bfps/cpp/particles/p2p_tree.hpp
@@ -46,7 +46,9 @@ public:
         return emptyCell;
     }
 
-    int getNeighbors(const long int idx, const CellClass* output[27], long int output_indexes[27], const bool include_target) const{
+    template <class ShiftType>
+    int getNeighbors(const long int idx, const CellClass* output[27], long int output_indexes[27],
+                     std::array<ShiftType,3> shift[27], const bool include_target) const{
         int nbNeighbors = 0;
 
         std::fill_n(output, 27, nullptr);
@@ -57,29 +59,38 @@ public:
 
         for(long int neigh_x = -1 ; neigh_x <= 1 ; ++neigh_x){
             long int neigh_x_pbc = neigh_x+idx_x;
+            ShiftType shift_x = 0;
             if(neigh_x_pbc < 0){
                 neigh_x_pbc += nb_cell_levels[IDX_X];
+                shift_x = 1;
             }
             else if(nb_cell_levels[IDX_X] <= neigh_x_pbc){
                 neigh_x_pbc -= nb_cell_levels[IDX_X];
+                shift_x = -1;
             }
 
             for(long int neigh_y = -1 ; neigh_y <= 1 ; ++neigh_y){
                 long int neigh_y_pbc = neigh_y+idx_y;
+                ShiftType shift_y = 0;
                 if(neigh_y_pbc < 0){
                     neigh_y_pbc += nb_cell_levels[IDX_Y];
+                    shift_y = 1;
                 }
                 else if(nb_cell_levels[IDX_Y] <= neigh_y_pbc){
                     neigh_y_pbc -= nb_cell_levels[IDX_Y];
+                    shift_y = -1;
                 }
 
                 for(long int neigh_z = -1 ; neigh_z <= 1 ; ++neigh_z){
                     long int neigh_z_pbc = neigh_z+idx_z;
+                    ShiftType shift_z = 0;
                     if(neigh_z_pbc < 0){
                         neigh_z_pbc += nb_cell_levels[IDX_Z];
+                        shift_z = 1;
                     }
                     else if(nb_cell_levels[IDX_Z] <= neigh_z_pbc){
                         neigh_z_pbc -= nb_cell_levels[IDX_Z];
+                        shift_z = -1;
                     }
 
                     if(include_target || neigh_x_pbc != idx_x || neigh_y_pbc != idx_y || neigh_z_pbc != idx_z){
@@ -90,6 +101,11 @@ public:
                         if(iter != data.end()){
                             output[nbNeighbors] = &(iter->second);
                             output_indexes[nbNeighbors] = idx_neigh;
+
+                            shift[nbNeighbors][IDX_X] = shift_x;
+                            shift[nbNeighbors][IDX_Y] = shift_y;
+                            shift[nbNeighbors][IDX_Z] = shift_z;
+
                             nbNeighbors += 1;
                         }
                     }