From 69934557735f0e06488641caa4fea78f3c20631f Mon Sep 17 00:00:00 2001
From: Berenger Bramas <berenger.bramas@mpcdf.mpg.de>
Date: Thu, 21 Sep 2017 14:11:36 +0200
Subject: [PATCH] Make it work (when there are more than one cell)

---
 bfps/cpp/particles/p2p_distr_mpi.hpp | 439 +++++++++++++++++----------
 1 file changed, 272 insertions(+), 167 deletions(-)

diff --git a/bfps/cpp/particles/p2p_distr_mpi.hpp b/bfps/cpp/particles/p2p_distr_mpi.hpp
index 45f8bba3..1709dc72 100644
--- a/bfps/cpp/particles/p2p_distr_mpi.hpp
+++ b/bfps/cpp/particles/p2p_distr_mpi.hpp
@@ -15,60 +15,6 @@
 #include "particles_utils.hpp"
 #include "p2p_tree.hpp"
 
-/*
-- method to reorder each particle section following the cutoff grid (will permite index too)
-- exchange particles (with upper only) and receive particle (from lower only)
-- 1 message at a time! so need the offset of each cell of the cutoff grid
-- iterate on what has been received with my own particles, fill both rhs
-- send back the rhs
-- merge rhs
-- update particles property
-  */
-
-
-
-template <class partsize_t, class real_number, int size_particle_positions, int size_particle_rhs>
-struct ParticleView{
-    partsize_t p_index;
-    real_number* ptr_particles_positions;
-    real_number* ptr_particles_current_rhs;
-    partsize_t* ptr_global_idx;
-    long int* ptr_cell_idx;
-
-    ParticleView()
-        : p_index(-1), ptr_particles_positions(nullptr),
-          ptr_particles_current_rhs(nullptr), ptr_global_idx(nullptr),
-          ptr_cell_idx(nullptr){}
-};
-
-template <class partsize_t, class real_number, int size_particle_positions, int size_particle_rhs>
-void swap(ParticleView<partsize_t, real_number, size_particle_positions,size_particle_rhs>& p1,
-          ParticleView<partsize_t, real_number, size_particle_positions,size_particle_rhs>& p2){
-    if(p1.p_index != -1 && p2.p_index != -1){
-        for(int idx_pos = 0 ; idx_pos < size_particle_positions ; ++idx_pos){
-            std::swap(p1.ptr_particles_positions[p1.p_index*size_particle_positions+idx_pos],
-                      p2.ptr_particles_positions[p2.p_index*size_particle_positions+idx_pos]);
-        }
-        for(int idx_rhs = 0 ; idx_rhs < size_particle_rhs ; ++idx_rhs){
-            std::swap(p1.ptr_particles_current_rhs[p1.p_index*size_particle_rhs+idx_rhs],
-                      p2.ptr_particles_current_rhs[p2.p_index*size_particle_rhs+idx_rhs]);
-        }
-        std::swap(p1.ptr_cell_idx[p1.p_index],p2.ptr_cell_idx[p2.p_index]);
-        std::swap(p1.ptr_global_idx[p1.p_index],p2.ptr_global_idx[p2.p_index]);
-        std::swap(p1.p_index,p2.p_index);
-    }
-    else if(p1.p_index != -1){
-        p2 = p1;
-        p1 = ParticleView<partsize_t, real_number, size_particle_positions,size_particle_rhs>();
-    }
-    else if(p2.p_index != -1){
-        p1 = p2;
-        p2 = ParticleView<partsize_t, real_number, size_particle_positions,size_particle_rhs>();
-    }
-}
-
-
-
 template <class partsize_t, class real_number>
 class p2p_distr_mpi {
 protected:
@@ -126,9 +72,46 @@ protected:
     std::array<real_number,3> spatial_box_width;
     std::array<real_number,3> spatial_box_offset;
 
+    const real_number cutoff_radius_compute;
     const real_number cutoff_radius;
     std::array<long int,3> nb_cell_levels;
 
+    template <class DataType, int sizeElement>
+    static void permute_copy(const partsize_t offsetIdx, const partsize_t nbElements,
+                             const std::pair<long int,partsize_t> permutation[],
+                             DataType data[], std::vector<unsigned char>* buffer){
+        buffer->resize(nbElements*sizeof(DataType)*sizeElement);
+        DataType* dataBuffer = reinterpret_cast<DataType*>(buffer->data());
+
+        // Permute
+        for(partsize_t idxPart = 0 ; idxPart < nbElements ; ++idxPart){
+            const partsize_t srcData = permutation[idxPart].second;
+            const partsize_t destData = idxPart;
+            for(int idxVal = 0 ; idxVal < sizeElement ; ++idxVal){
+                dataBuffer[destData*sizeElement + idxVal]
+                        = data[srcData*sizeElement + idxVal];
+            }
+        }
+
+        // Copy back
+        for(partsize_t idxPart = 0 ; idxPart < nbElements ; ++idxPart){
+            const partsize_t srcData = idxPart;
+            const partsize_t destData = idxPart+offsetIdx;
+            for(int idxVal = 0 ; idxVal < sizeElement ; ++idxVal){
+                data[destData*sizeElement + idxVal]
+                        = dataBuffer[srcData*sizeElement + idxVal];
+            }
+        }
+    }
+
+    static real_number getGridCutoff(const real_number in_cutoff_radius, const std::array<real_number,3>& in_spatial_box_width){
+        int idx_factor = 1;
+        while(in_cutoff_radius <= in_spatial_box_width[IDX_Z]/real_number(idx_factor+1)){
+            idx_factor += 1;
+        }
+        return in_spatial_box_width[IDX_Z]/real_number(idx_factor);
+    }
+
 public:
     ////////////////////////////////////////////////////////////////////////////
 
@@ -144,7 +127,8 @@ public:
             current_partition_size(current_partition_interval.second-current_partition_interval.first),
             field_grid_dim(in_field_grid_dim),
             spatial_box_width(in_spatial_box_width), spatial_box_offset(in_spatial_box_offset),
-            cutoff_radius(in_cutoff_radius){
+            cutoff_radius_compute(in_cutoff_radius),
+            cutoff_radius(getGridCutoff(in_cutoff_radius, in_spatial_box_width)){
 
         AssertMpi(MPI_Comm_rank(current_com, &my_rank));
         AssertMpi(MPI_Comm_size(current_com, &nb_processes));
@@ -281,18 +265,20 @@ public:
                                                                               particles_positions[(idxPart)*size_particle_positions + IDX_Z]);
                     assert(my_down_z_cell_level <= get_cell_coord_z_from_index(particles_coord[idxPart]));
                     assert(get_cell_coord_z_from_index(particles_coord[idxPart]) <= my_top_z_cell_level);
-                    if(inout_index_particles[idxPart] == 58576 || inout_index_particles[idxPart] == 0){// TODO
-                        printf("Coord index %ld - %ld (tree index %ld)\n", idxPart, inout_index_particles[idxPart],particles_coord[idxPart]);
-                        printf(">> Box index %ld - %ld - %ld\n", get_cell_coord_x_from_index(particles_coord[idxPart]),
-                               get_cell_coord_y_from_index(particles_coord[idxPart]),
-                               get_cell_coord_z_from_index(particles_coord[idxPart]));
-                        printf("idxPartition %d\n", idxPartition);
-                    }
+//                    if(inout_index_particles[idxPart] == 547){// TODO
+//                        printf("Coord index %ld - %ld (tree index %ld)\n", idxPart, inout_index_particles[idxPart],particles_coord[idxPart]);
+//                        printf(">> Box index %ld - %ld - %ld\n", get_cell_coord_x_from_index(particles_coord[idxPart]),
+//                               get_cell_coord_y_from_index(particles_coord[idxPart]),
+//                               get_cell_coord_z_from_index(particles_coord[idxPart]));
+//                        printf(">> idxPartition %d\n", idxPartition);
+//                        printf(">> position %e %e %e\n", particles_positions[(idxPart)*size_particle_positions + IDX_X],
+//                                particles_positions[(idxPart)*size_particle_positions + IDX_Y],
+//                                particles_positions[(idxPart)*size_particle_positions + IDX_Z]);
+//                    }
                 }
             }
 
-            using ParticleView_t = ParticleView<partsize_t, real_number, size_particle_positions,size_particle_rhs>;
-            std::vector<ParticleView_t> part_to_sort;
+            std::vector<std::pair<long int,partsize_t>> part_to_sort;
 
             // Sort each partition in cells
             for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){
@@ -300,22 +286,36 @@ public:
 
                 for(partsize_t idxPart = current_offset_particles_for_partition[idxPartition] ; idxPart < current_offset_particles_for_partition[idxPartition+1] ; ++idxPart ){
                     part_to_sort.emplace_back();
-                    part_to_sort.back().p_index = idxPart;
-                    part_to_sort.back().ptr_particles_positions = particles_positions;
-                    part_to_sort.back().ptr_particles_current_rhs = particles_current_rhs;
-                    part_to_sort.back().ptr_global_idx = inout_index_particles;
-                    part_to_sort.back().ptr_cell_idx = particles_coord.get();
+                    part_to_sort.back().first = particles_coord[idxPart];
+                    part_to_sort.back().second = idxPart;
                 }
 
-                assert(part_to_sort.size() == (current_offset_particles_for_partition[idxPartition+1]-current_offset_particles_for_partition[idxPartition]));
+                assert(part_to_sort.size() == (current_my_nb_particles_per_partition[idxPartition]));
 
                 std::sort(part_to_sort.begin(), part_to_sort.end(),
-                          [](const ParticleView_t& p1,
-                             const ParticleView_t& p2){
-                    assert(p1.p_index != -1 && p1.ptr_cell_idx);
-                    assert(p2.p_index != -1 && p2.ptr_cell_idx);
-                    return p1.ptr_cell_idx[p1.p_index] < p2.ptr_cell_idx[p2.p_index];
+                          [](const std::pair<long int,partsize_t>& p1,
+                             const std::pair<long int,partsize_t>& p2){
+                    return p1.first < p2.first;
                 });
+
+//                for(partsize_t idxPart = 1 ; idxPart < (long int)part_to_sort.size() ; ++idxPart){// TODO
+//                    assert(part_to_sort[idxPart-1].first <= part_to_sort[idxPart].first);
+//                }
+
+                // Permute array using buffer
+                std::vector<unsigned char> buffer;
+                permute_copy<real_number, size_particle_positions>(current_offset_particles_for_partition[idxPartition],
+                                                                   current_my_nb_particles_per_partition[idxPartition],
+                                                                   part_to_sort.data(), particles_positions, &buffer);
+                permute_copy<real_number, size_particle_rhs>(current_offset_particles_for_partition[idxPartition],
+                                                             current_my_nb_particles_per_partition[idxPartition],
+                                                             part_to_sort.data(), particles_current_rhs, &buffer);
+                permute_copy<partsize_t, 1>(current_offset_particles_for_partition[idxPartition],
+                                            current_my_nb_particles_per_partition[idxPartition],
+                                            part_to_sort.data(), inout_index_particles, &buffer);
+                permute_copy<long int, 1>(current_offset_particles_for_partition[idxPartition],
+                                            current_my_nb_particles_per_partition[idxPartition],
+                                            part_to_sort.data(), particles_coord.get(), &buffer);
             }
         }
 
@@ -336,13 +336,31 @@ public:
                     current_cell_idx = particles_coord[idx_part];
                     current_nb_particles_in_cell = 1;
                     current_cell_offset = idx_part;
-                    if(inout_index_particles[idx_part] == 58576 || inout_index_particles[idx_part] == 0){// TODO
-                        printf("Coord index %ld - %ld (tree index %ld)\n", idx_part, inout_index_particles[idx_part],particles_coord[idx_part]);
-                        printf(">> Box index %ld - %ld - %ld\n", get_cell_coord_x_from_index(particles_coord[idx_part]),
-                               get_cell_coord_y_from_index(particles_coord[idx_part]),
-                               get_cell_coord_z_from_index(particles_coord[idx_part]));
-                        printf("current_cell_offset %ld current_nb_particles_in_cell %ld\n", current_cell_offset, current_nb_particles_in_cell);
-                    }
+//                    if(inout_index_particles[idx_part] == 547){// TODO
+//                        printf("idxPartition %d\n", idxPartition);
+//                        printf(">> Coord index %ld - %ld (tree index %ld)\n", idx_part, inout_index_particles[idx_part],particles_coord[idx_part]);
+//                        printf(">> Box index %ld - %ld - %ld\n", get_cell_coord_x_from_index(particles_coord[idx_part]),
+//                               get_cell_coord_y_from_index(particles_coord[idx_part]),
+//                               get_cell_coord_z_from_index(particles_coord[idx_part]));
+//                        printf(">> current_cell_offset %ld current_nb_particles_in_cell %ld\n", current_cell_offset, current_nb_particles_in_cell);
+//                        printf(">> Position %e %e %e\n", particles_positions[idx_part*size_particle_positions + IDX_X],
+//                                particles_positions[idx_part*size_particle_positions + IDX_Y],
+//                                particles_positions[idx_part*size_particle_positions + IDX_Z]);
+//                    }
+//                    if(inout_index_particles[idx_part] == 356){// TODO
+//                        printf("idxPartition %d\n", idxPartition);
+//                        printf(">> Coord index %ld - %ld (tree index %ld)\n", idx_part, inout_index_particles[idx_part],particles_coord[idx_part]);
+//                        printf(">> Box index %ld - %ld - %ld\n", get_cell_coord_x_from_index(particles_coord[idx_part]),
+//                               get_cell_coord_y_from_index(particles_coord[idx_part]),
+//                               get_cell_coord_z_from_index(particles_coord[idx_part]));
+//                        printf(">> current_cell_offset %ld current_nb_particles_in_cell %ld\n", current_cell_offset, current_nb_particles_in_cell);
+//                        printf(">> Position %e %e %e\n", particles_positions[idx_part*size_particle_positions + IDX_X],
+//                                particles_positions[idx_part*size_particle_positions + IDX_Y],
+//                                particles_positions[idx_part*size_particle_positions + IDX_Z]);
+//                    }
+                }
+                else{
+                    current_nb_particles_in_cell += 1;
                 }
             }
             if(current_nb_particles_in_cell){
@@ -351,9 +369,9 @@ public:
             }
         }
 
-        printf("[%d] go from cutoff level %ld to %ld\n",
-               my_rank, my_down_z_cell_level, my_top_z_cell_level); // TODO remove
-        fflush(stdout); // TODO
+//        printf("[%d] go from cutoff level %ld to %ld\n",
+//               my_rank, my_down_z_cell_level, my_top_z_cell_level); // TODO remove
+//        fflush(stdout); // TODO
 
         // Offset per cell layers
         long int previous_index = 0;
@@ -369,10 +387,10 @@ public:
                 previous_index = part_box_z_index;
             }
         }
-        for(size_t idx_layer = 0 ; idx_layer < my_nb_cell_levels ; ++idx_layer){
-            printf("[%d] nb particles in cutoff level %llu are %ld\n",
-                   my_rank, idx_layer, particles_offset_layers[idx_layer+1]); // TODO remove
-            fflush(stdout); // TODO
+        for(long int idx_layer = 0 ; idx_layer < my_nb_cell_levels ; ++idx_layer){
+//            printf("[%d] nb particles in cutoff level %ld are %ld\n",
+//                   my_rank, idx_layer, particles_offset_layers[idx_layer+1]); // TODO remove
+//            fflush(stdout); // TODO
             particles_offset_layers[idx_layer+1] += particles_offset_layers[idx_layer];
         }
 
@@ -383,9 +401,9 @@ public:
 
         // Find process with at least one neighbor
         {
-            std::cout << my_rank << " my_top_z_cell_level " << my_top_z_cell_level << std::endl;
-            std::cout << my_rank << " my_down_z_cell_level " << my_down_z_cell_level << std::endl;
-            std::cout.flush();// TODO
+//            std::cout << my_rank << " my_top_z_cell_level " << my_top_z_cell_level << std::endl;
+//            std::cout << my_rank << " my_down_z_cell_level " << my_down_z_cell_level << std::endl;
+//            std::cout.flush();// TODO
 
             int dest_proc = (my_rank+1)%nb_processes_involved;
             while(dest_proc != my_rank
@@ -398,10 +416,10 @@ public:
                     nb_levels_to_send += 1;
                 }
 
-                std::cout << my_rank << " dest_proc " << dest_proc << std::endl;
-                std::cout << my_rank << " first_cell_level_proc(dest_proc) " << first_cell_level_proc(dest_proc) << std::endl;
-                std::cout << my_rank << " last_cell_level_proc(dest_proc) " << last_cell_level_proc(dest_proc) << std::endl;
-                std::cout.flush();// TODO
+//                std::cout << my_rank << " dest_proc " << dest_proc << std::endl;
+//                std::cout << my_rank << " first_cell_level_proc(dest_proc) " << first_cell_level_proc(dest_proc) << std::endl;
+//                std::cout << my_rank << " last_cell_level_proc(dest_proc) " << last_cell_level_proc(dest_proc) << std::endl;
+//                std::cout.flush();// TODO
 
                 NeighborDescriptor descriptor;
                 descriptor.destProc = dest_proc;
@@ -409,21 +427,21 @@ public:
                 descriptor.nbParticlesToExchange = particles_offset_layers[my_nb_cell_levels] - particles_offset_layers[my_nb_cell_levels-nb_levels_to_send];
                 descriptor.isRecv = false;
 
-                std::cout << my_rank << "SEND" << std::endl;
-                std::cout << "descriptor.destProc " << descriptor.destProc << std::endl;
-                std::cout << "descriptor.nbLevelsToExchange " << descriptor.nbLevelsToExchange << std::endl;
-                std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
-                std::cout << "descriptor.isRecv " << descriptor.isRecv << std::endl;
-                std::cout << "neigDescriptors.size() " << neigDescriptors.size() << std::endl;
-                std::cout.flush();// TODO
+//                std::cout << my_rank << "SEND" << std::endl;
+//                std::cout << "descriptor.destProc " << descriptor.destProc << std::endl;
+//                std::cout << "descriptor.nbLevelsToExchange " << descriptor.nbLevelsToExchange << std::endl;
+//                std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
+//                std::cout << "descriptor.isRecv " << descriptor.isRecv << std::endl;
+//                std::cout << "neigDescriptors.size() " << neigDescriptors.size() << std::endl;
+//                std::cout.flush();// TODO
 
                 neigDescriptors.emplace_back(std::move(descriptor));
 
                 dest_proc = (dest_proc+1)%nb_processes_involved;
             }
-            std::cout << my_rank << " NO dest_proc " << dest_proc << std::endl;
-            std::cout << my_rank << " NO first_cell_level_proc(dest_proc) " << first_cell_level_proc(dest_proc) << std::endl;
-            std::cout.flush();// TODO
+//            std::cout << my_rank << " NO dest_proc " << dest_proc << std::endl;
+//            std::cout << my_rank << " NO first_cell_level_proc(dest_proc) " << first_cell_level_proc(dest_proc) << std::endl;
+//            std::cout.flush();// TODO
 
             int src_proc = (my_rank-1+nb_processes_involved)%nb_processes_involved;
             while(src_proc != my_rank
@@ -436,9 +454,9 @@ public:
                     nb_levels_to_recv += 1;
                 }
 
-                std::cout << my_rank << " src_proc " << src_proc << std::endl;
-                std::cout << my_rank << " first_cell_level_proc(src_proc) " << first_cell_level_proc(src_proc) << std::endl;
-                std::cout.flush();// TODO
+//                std::cout << my_rank << " src_proc " << src_proc << std::endl;
+//                std::cout << my_rank << " first_cell_level_proc(src_proc) " << first_cell_level_proc(src_proc) << std::endl;
+//                std::cout.flush();// TODO
 
                 NeighborDescriptor descriptor;
                 descriptor.destProc = src_proc;
@@ -448,20 +466,20 @@ public:
 
                 neigDescriptors.emplace_back(std::move(descriptor));
 
-                std::cout << my_rank << "] RECV" << std::endl;
-                std::cout << "descriptor.destProc " << descriptor.destProc << std::endl;
-                std::cout << "descriptor.nbLevelsToExchange " << descriptor.nbLevelsToExchange << std::endl;
-                std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
-                std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
-                std::cout << "descriptor.isRecv " << descriptor.isRecv << std::endl;
-                std::cout << "neigDescriptors.size() " << neigDescriptors.size() << std::endl;
-                std::cout.flush();// TODO
+//                std::cout << my_rank << "] RECV" << std::endl;
+//                std::cout << "descriptor.destProc " << descriptor.destProc << std::endl;
+//                std::cout << "descriptor.nbLevelsToExchange " << descriptor.nbLevelsToExchange << std::endl;
+//                std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
+//                std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
+//                std::cout << "descriptor.isRecv " << descriptor.isRecv << std::endl;
+//                std::cout << "neigDescriptors.size() " << neigDescriptors.size() << std::endl;
+//                std::cout.flush();// TODO
 
                 src_proc = (src_proc-1+nb_processes_involved)%nb_processes_involved;
             }
-            std::cout << my_rank << " NO src_proc " << src_proc << std::endl;
-            std::cout << my_rank << " NO first_cell_level_proc(src_proc) " << first_cell_level_proc(src_proc) << std::endl;
-            std::cout.flush();// TODO
+//            std::cout << my_rank << " NO src_proc " << src_proc << std::endl;
+//            std::cout << my_rank << " NO first_cell_level_proc(src_proc) " << first_cell_level_proc(src_proc) << std::endl;
+//            std::cout.flush();// TODO
         }
 
         //////////////////////////////////////////////////////////////////////
@@ -485,11 +503,11 @@ public:
                                     current_com, &mpiRequests.back()));
 
                 if(descriptor.nbParticlesToExchange){
-                    std::cout << my_rank << "] SEND_PARTICLES" << std::endl;
-                    std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
-                    std::cout << "descriptor.destProc " << descriptor.destProc << std::endl;
-                    std::cout << "idxDescr " << idxDescr << std::endl;
-                    std::cout << "send from part " << particles_offset_layers[my_nb_cell_levels-descriptor.nbLevelsToExchange] << std::endl;
+//                    std::cout << my_rank << "] SEND_PARTICLES" << std::endl;
+//                    std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
+//                    std::cout << "descriptor.destProc " << descriptor.destProc << std::endl;
+//                    std::cout << "idxDescr " << idxDescr << std::endl;
+//                    std::cout << "send from part " << particles_offset_layers[my_nb_cell_levels-descriptor.nbLevelsToExchange] << std::endl;
 
                     whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1});
                     mpiRequests.emplace_back();
@@ -510,8 +528,8 @@ public:
                 }
             }
             else{
-                std::cout << "RECV_PARTICLES " << RECV_PARTICLES << std::endl;
-                std::cout << "idxDescr " << idxDescr << std::endl;
+//                std::cout << "RECV_PARTICLES " << RECV_PARTICLES << std::endl;
+//                std::cout << "idxDescr " << idxDescr << std::endl;
                 whatNext.emplace_back(std::pair<Action,int>{RECV_PARTICLES, idxDescr});
                 mpiRequests.emplace_back();
                 AssertMpi(MPI_Irecv(&descriptor.nbParticlesToExchange,
@@ -550,13 +568,13 @@ public:
                         assert(NbParticlesToReceive != -1);
                         assert(descriptor.toCompute == nullptr);
 
-                        std::cout << my_rank << "] RECV_PARTICLES" << std::endl;
-                        std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
-                        std::cout << "descriptor.destProc " << descriptor.destProc << std::endl;
-                        std::cout << "releasedAction.second " << releasedAction.second << std::endl;
+//                        std::cout << my_rank << "] RECV_PARTICLES" << std::endl;
+//                        std::cout << "descriptor.nbParticlesToExchange " << descriptor.nbParticlesToExchange << std::endl;
+//                        std::cout << "descriptor.destProc " << descriptor.destProc << std::endl;
+//                        std::cout << "releasedAction.second " << releasedAction.second << std::endl;
 
                         if(NbParticlesToReceive){
-                            std::cout << "MPI_Irecv " << std::endl;
+//                            std::cout << "MPI_Irecv " << std::endl;
                             descriptor.toCompute.reset(new real_number[NbParticlesToReceive*size_particle_positions]);
                             whatNext.emplace_back(std::pair<Action,int>{COMPUTE_PARTICLES, releasedAction.second});
                             mpiRequests.emplace_back();
@@ -597,22 +615,18 @@ public:
                             long int neighbors_indexes[27];
                             const int nbNeighbors = my_tree.getNeighbors(current_cell_idx, neighbors, neighbors_indexes, true);
 
-                            for(int idx_test = 0 ; idx_test < nb_parts_in_cell ; ++idx_test){ // TODO
-                                if(int(descriptor.toCompute[(idxPart+idx_test)*size_particle_positions + IDX_X]*1000) == int(1.685800e-01*1000)
-                                        && int(descriptor.toCompute[(idxPart+idx_test)*size_particle_positions + IDX_Y]*1000) == int(7.524981e-01*1000)
-                                        && int(descriptor.toCompute[(idxPart+idx_test)*size_particle_positions + IDX_Z]*1000) == int(9.999596e-01*1000)){
-                                    printf("Found a pos %ld\n", idxPart+idx_test);
-                                    printf("pos %e %e %e\n",
-                                           descriptor.toCompute[(idxPart+idx_test)*size_particle_positions + IDX_X],
-                                            descriptor.toCompute[(idxPart+idx_test)*size_particle_positions + IDX_Y],
-                                            descriptor.toCompute[(idxPart+idx_test)*size_particle_positions + IDX_Z]);
-                                }
-                            }
-                            printf("Remote part from %ld for %ld at idx %ld\n", idxPart, nb_parts_in_cell, current_cell_idx); // TODO
-                            printf("pos of first %e %e %e\n", descriptor.toCompute[idxPart*size_particle_positions + IDX_X],
-                                    descriptor.toCompute[idxPart*size_particle_positions + IDX_Y],
-                                    descriptor.toCompute[idxPart*size_particle_positions + IDX_Z]); // TODO
-                            printf("nbNeighbors %d\n", nbNeighbors); // TODO
+//                            for(int idx_test = 0 ; idx_test < nb_parts_in_cell ; ++idx_test){ // TODO
+//                                real_number totest[3] = {8.570442e-01, 7.173084e-02, 8.279754e-03};
+//                                if(int(descriptor.toCompute[(idxPart+idx_test)*size_particle_positions + IDX_X]*1000) == int(totest[0]*1000)
+//                                        && int(descriptor.toCompute[(idxPart+idx_test)*size_particle_positions + IDX_Y]*1000) == int(totest[1]*1000)
+//                                        && int(descriptor.toCompute[(idxPart+idx_test)*size_particle_positions + IDX_Z]*1000) == int(totest[2]*1000)){
+//                                    printf("Found a pos %ld\n", idxPart+idx_test);
+//                                    printf("pos %e %e %e\n",
+//                                           descriptor.toCompute[(idxPart+idx_test)*size_particle_positions + IDX_X],
+//                                            descriptor.toCompute[(idxPart+idx_test)*size_particle_positions + IDX_Y],
+//                                            descriptor.toCompute[(idxPart+idx_test)*size_particle_positions + IDX_Z]);
+//                                }
+//                            }
 
                             // with other interval
                             for(size_t idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){
@@ -625,7 +639,7 @@ public:
                                                                                             particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
                                                                                             particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
                                                                                             particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]);
-                                            if(dist_r2 < cutoff_radius*cutoff_radius){
+                                            if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
                                                 in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>(
                                                                     &descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions],
                                                                     &descriptor.results[(idxPart+idx_p1)*size_particle_rhs],
@@ -634,21 +648,21 @@ public:
                                                                     dist_r2);
                                             }
 
-                                            if(inout_index_particles[(*neighbors[idx_neighbor])[idx_2].first+idx_p2] == 132){// TODO
-                                                printf("test interaction between :\n");
-                                                printf("index %ld (%ld) pos %e %e %e\n",
-                                                       (idxPart+idx_p1), -1,
-                                                       descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_X],
-                                                       descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Y],
-                                                       descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Z]);
-                                                printf("index %ld (%ld) pos %e %e %e\n",
-                                                       ((*neighbors[idx_neighbor])[idx_2].first+idx_p2),
-                                                       inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)],
-                                                       particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
-                                                       particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
-                                                       particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]);
-                                                printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2);
-                                            }
+//                                            if(inout_index_particles[(*neighbors[idx_neighbor])[idx_2].first+idx_p2] == 356){// TODO
+//                                                printf("test interaction between :\n");
+//                                                printf("index %ld (%ld) pos %e %e %e\n",
+//                                                       (idxPart+idx_p1), -1L,
+//                                                       descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_X],
+//                                                       descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Y],
+//                                                       descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDX_Z]);
+//                                                printf("index %ld (%ld) pos %e %e %e\n",
+//                                                       ((*neighbors[idx_neighbor])[idx_2].first+idx_p2),
+//                                                       inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)],
+//                                                       particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
+//                                                       particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
+//                                                       particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]);
+//                                                printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2);
+//                                            }
                                         }
                                     }
                                 }
@@ -700,6 +714,28 @@ public:
             for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){
                 // self interval
                 for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
+//                    if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356))){// TODO
+//                        printf("box %ld:\n", iter_cell.first);
+//                        printf("intervals.size() %lu:\n", intervals.size());
+//                        printf("intervals[idx_1].second %ld:\n", intervals[idx_1].second);
+//                        printf("index %ld (%ld) pos %e %e %e\n",
+//                               (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)],
+//                               particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
+//                               particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
+//                               particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]);
+//                    }
+//                    if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547))){// TODO
+//                        printf("box %ld:\n", iter_cell.first);
+//                        printf("intervals.size() %lu:\n", intervals.size());
+//                        printf("intervals[idx_1].second %ld:\n", intervals[idx_1].second);
+//                        printf("index %ld (%ld) pos %e %e %e\n",
+//                               (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)],
+//                               particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
+//                               particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
+//                               particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]);
+//                    }
+
+
                     for(partsize_t idx_p2 = idx_p1+1 ; idx_p2 < intervals[idx_1].second ; ++idx_p2){
                         const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
                                                                         particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
@@ -707,7 +743,7 @@ public:
                                                                         particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_X],
                                                                         particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Y],
                                                                         particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Z]);
-                        if(dist_r2 < cutoff_radius*cutoff_radius){
+                        if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
                             in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>(
                                                 &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
                                                 &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
@@ -715,6 +751,27 @@ public:
                                                 &particles_current_rhs[(intervals[idx_1].first+idx_p2)*size_particle_rhs],
                                                 dist_r2);
                         }
+
+//                        if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356)
+//                                || inout_index_particles[(intervals[idx_1].first+idx_p2)] == 356)/*
+//                                && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 1832)
+//                                    || inout_index_particles[(intervals[idx_1].first+idx_p2)] == 1832)
+//                                && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547)
+//                                    || inout_index_particles[(intervals[idx_1].first+idx_p2)] == 547)*/){// TODO
+//                            printf("print between :\n");
+//                            printf("index %ld (%ld) pos %e %e %e\n",
+//                                   (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)],
+//                                   particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
+//                                   particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
+//                                   particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]);
+//                            printf("index %ld (%ld) pos %e %e %e\n",
+//                                   (intervals[idx_1].first+idx_p2),
+//                                   inout_index_particles[(intervals[idx_1].first+idx_p2)],
+//                                   particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_X],
+//                                   particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Y],
+//                                   particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Z]);
+//                            printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2);
+//                        }
                     }
                 }
 
@@ -728,7 +785,7 @@ public:
                                                                             particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
                                                                             particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
                                                                             particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]);
-                            if(dist_r2 < cutoff_radius*cutoff_radius){
+                            if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
                                 in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>(
                                                     &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
                                                     &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
@@ -736,6 +793,27 @@ public:
                                                     &particles_current_rhs[(intervals[idx_2].first+idx_p2)*size_particle_rhs],
                                                     dist_r2);
                             }
+
+//                            if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356)
+//                                    || inout_index_particles[(intervals[idx_2].first+idx_p2)] == 356)/*
+//                                    && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547)
+//                                        || inout_index_particles[(intervals[idx_2].first+idx_p2)] == 547)
+//                                    && ((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 1832)
+//                                        || inout_index_particles[(intervals[idx_2].first+idx_p2)] == 1832)*/){// TODO
+//                                printf("print between :\n");
+//                                printf("index %ld (%ld) pos %e %e %e\n",
+//                                       (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)],
+//                                       particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
+//                                       particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
+//                                       particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]);
+//                                printf("index %ld (%ld) pos %e %e %e\n",
+//                                       (intervals[idx_2].first+idx_p2),
+//                                       inout_index_particles[(intervals[idx_2].first+idx_p2)],
+//                                       particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
+//                                       particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
+//                                       particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]);
+//                                printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2);
+//                            }
                         }
                     }
                 }
@@ -747,6 +825,12 @@ public:
             long int neighbors_indexes[27];
             const int nbNeighbors = my_tree.getNeighbors(currenct_cell_idx, neighbors, neighbors_indexes, false);
 
+//            if(((currenct_cell_idx == 785))){// TODO
+//                printf("box %ld:\n", iter_cell.first);
+//                printf("intervals.size() %lu:\n", intervals.size());
+//                printf("nbNeighbors %d\n",nbNeighbors);
+//            }
+
             for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){
                 // with other interval
                 for(size_t idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){
@@ -760,7 +844,7 @@ public:
                                                                                     particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
                                                                                     particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
                                                                                     particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]);
-                                    if(dist_r2 < cutoff_radius*cutoff_radius){
+                                    if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
                                         in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>(
                                                             &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
                                                             &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
@@ -768,6 +852,27 @@ public:
                                                             &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs],
                                                             dist_r2);
                                     }
+
+//                                    if(((inout_index_particles[(intervals[idx_1].first+idx_p1)] == 356)
+//                                            || inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)] == 356)/*
+//                                        && (inout_index_particles[(intervals[idx_1].first+idx_p1)] == 547)
+//                                            || inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)] == 547
+//                                        && (inout_index_particles[(intervals[idx_1].first+idx_p1)] == 1832)
+//                                            || inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)] == 1832*/){// TODO
+//                                        printf("print between :\n");
+//                                        printf("index %ld (%ld) pos %e %e %e\n",
+//                                               (intervals[idx_1].first+idx_p1), inout_index_particles[(intervals[idx_1].first+idx_p1)],
+//                                               particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_X],
+//                                               particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Y],
+//                                               particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDX_Z]);
+//                                        printf("index %ld (%ld) pos %e %e %e\n",
+//                                               ((*neighbors[idx_neighbor])[idx_2].first+idx_p2),
+//                                               inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)],
+//                                               particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_X],
+//                                               particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Y],
+//                                               particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z]);
+//                                        printf("Radius = %e (%e)\n", sqrt(dist_r2), dist_r2);
+//                                    }
                                 }
                             }
                         }
-- 
GitLab