From 78d0d11e152978355238f8d65628624cfa115921 Mon Sep 17 00:00:00 2001
From: Berenger Bramas <Berenger.Bramas@inria.fr>
Date: Mon, 14 Oct 2019 12:53:57 +0200
Subject: [PATCH] Add first version of the P2P collision counter

---
 .../static_field_with_ghost_collisions.cpp    | 253 ++++++++++++++++++
 .../static_field_with_ghost_collisions.hpp    |  87 ++++++
 .../abstract_particles_system_with_p2p.hpp    |  13 +
 cpp/particles/p2p_computer.hpp                |   3 +
 cpp/particles/p2p_computer_empty.hpp          |   2 +
 cpp/particles/p2p_cylinder_collisions.hpp     |  75 ++++++
 cpp/particles/p2p_distr_mpi.hpp               | 165 ++++++------
 cpp/particles/p2p_ghost_collisions.hpp        |  72 +++++
 cpp/particles/particles_system.hpp            |  11 +-
 cpp/particles/particles_system_builder.hpp    |   8 +-
 10 files changed, 608 insertions(+), 81 deletions(-)
 create mode 100644 cpp/full_code/static_field_with_ghost_collisions.cpp
 create mode 100644 cpp/full_code/static_field_with_ghost_collisions.hpp
 create mode 100644 cpp/particles/abstract_particles_system_with_p2p.hpp
 create mode 100644 cpp/particles/p2p_cylinder_collisions.hpp
 create mode 100644 cpp/particles/p2p_ghost_collisions.hpp

diff --git a/cpp/full_code/static_field_with_ghost_collisions.cpp b/cpp/full_code/static_field_with_ghost_collisions.cpp
new file mode 100644
index 00000000..6a93d142
--- /dev/null
+++ b/cpp/full_code/static_field_with_ghost_collisions.cpp
@@ -0,0 +1,253 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+
+
+#include <string>
+#include <cmath>
+#include "static_field_with_ghost_collisions.hpp"
+#include "scope_timer.hpp"
+#include "fftw_tools.hpp"
+
+
+template <typename rnumber>
+int static_field<rnumber>::initialize(void)
+{
+    TIMEZONE("static_file::initialize");
+    this->read_iteration();
+    this->read_parameters();
+    if (this->myrank == 0)
+    {
+        // set caching parameters
+        hid_t fapl = H5Pcreate(H5P_FILE_ACCESS);
+        herr_t cache_err = H5Pset_cache(fapl, 0, 521, 134217728, 1.0);
+        variable_used_only_in_assert(cache_err);
+        DEBUG_MSG("when setting stat_file cache I got %d\n", cache_err);
+        this->stat_file = H5Fopen(
+                (this->simname + ".h5").c_str(),
+                H5F_ACC_RDWR,
+                fapl);
+    }
+    int data_file_problem;
+    if (this->myrank == 0)
+        data_file_problem = this->grow_file_datasets();
+    MPI_Bcast(&data_file_problem, 1, MPI_INT, 0, this->comm);
+    if (data_file_problem > 0)
+    {
+        std::cerr <<
+            data_file_problem <<
+            " problems growing file datasets.\ntrying to exit now." <<
+            std::endl;
+        return EXIT_FAILURE;
+    }
+
+    this->vorticity = new field<rnumber, FFTW, THREE>(
+            nx, ny, nz,
+            this->comm,
+            fftw_planner_string_to_flag[this->fftw_plan_rigor]);
+    this->vorticity->real_space_representation = false;
+
+    this->velocity = new field<rnumber, FFTW, THREE>(
+            nx, ny, nz,
+            this->comm,
+            fftw_planner_string_to_flag[this->fftw_plan_rigor]);
+    this->velocity->real_space_representation = false;
+    //read vorticity field
+    this->vorticity->io(
+            this->get_current_fname(),
+            "vorticity",
+            this->iteration,
+            true);
+    this->kk = new kspace<FFTW, SMOOTH>(
+            this->vorticity->clayout, this->dkx, this->dky, this->dkz);
+
+    // compute the velocity field and store
+    invert_curl(
+        this->kk,
+        this->vorticity,
+        velocity);
+    // transform velocity and vorticity fields to real space
+    this->velocity->ift();
+    this->vorticity->ift();
+
+    p2p_ghost_collisions<double, long long int> p2p_ghost_collisions;
+    p2p_ghost_collisions.setEnable(this->enable_p2p);
+
+    this->ps = Template_double_for_if::evaluate<std::unique_ptr<abstract_particles_system<long long int, double>>,
+                       int, 1, 11, 1, // interpolation_size
+                       int, 0, 3, 1, // spline_mode
+                       particles_system_build_container<long long int, rnumber,FFTW,THREE,double,
+                                                        collision_detector<double, long long int>,
+                                                        particles_inner_computer_empty<double,long long int>,
+                                                        3,3>>(
+                           this->tracers0_neighbours, // template iterator 1
+                           this->tracers0_smoothness, // template iterator 2
+                           this->velocity,
+                           this->kk,
+                           tracers0_integration_steps,
+                           (long long int)nparticles,
+                           this->get_current_fname(),
+                           std::string("/tracers0/state/") + std::to_string(this->iteration),
+                           std::string("/tracers0/rhs/") + std::to_string(this->iteration),
+                           this->comm,
+                           this->iteration+1,
+                           std::move(cd),
+                           std::move(p2p_ghost_collisions),
+                           this->tracers0_cutoff);
+
+    // initialize output objects
+    this->particles_output_writer_mpi = new particles_output_hdf5<
+        long long int, double, 3>(
+                MPI_COMM_WORLD,
+                "tracers0",
+                nparticles,
+                tracers0_integration_steps);
+    this->particles_output_writer_mpi->setParticleFileLayout(this->ps->getParticleFileLayout());
+    this->particles_sample_writer_mpi = new particles_output_sampling_hdf5<
+        long long int, double, 3>(
+                MPI_COMM_WORLD,
+                this->ps->getGlobalNbParticles(),
+                (this->simname + "_particles.h5"),
+                "tracers0",
+                "position/0");
+    this->particles_sample_writer_mpi->setParticleFileLayout(this->ps->getParticleFileLayout());
+
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int static_field<rnumber>::step(void)
+{
+    TIMEZONE("static_file::step");
+    this->ps->getP2PComputer().reset_collision_counter(); // TODO should it be here ?
+    this->ps->completeLoop(this->dt);
+    const long int nbCollision = this->ps->getP2PComputer().get_collision_counter(); // TODO should it be here ?
+    this->iteration++;
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int static_field<rnumber>::write_checkpoint(void)
+{
+    TIMEZONE("static_file::write_checkpoint");
+    this->particles_output_writer_mpi->open_file(this->get_current_fname());
+    this->particles_output_writer_mpi->template save<3>(
+            this->ps->getParticlesState(),
+            this->ps->getParticlesRhs(),
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->iteration);
+    this->particles_output_writer_mpi->close_file();
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int static_field<rnumber>::finalize(void)
+{
+    TIMEZONE("static_field::finalize");
+    if (this->myrank == 0)
+        H5Fclose(this->stat_file);
+    // good practice rule number n+1: always delete in inverse order of allocation
+    delete this->ps.release();
+    delete this->particles_output_writer_mpi;
+    delete this->particles_sample_writer_mpi;
+    delete this->kk;
+    delete this->velocity;
+    delete this->vorticity;
+    return EXIT_SUCCESS;
+}
+
+/** \brief Compute statistics.
+ *
+ */
+
+template <typename rnumber>
+int static_field<rnumber>::do_stats()
+{
+    TIMEZONE("static_field::do_stats");
+    /// either one of two conditions suffices to compute statistics:
+    /// 1) current iteration is a multiple of niter_part
+    /// 2) we are within niter_part_fine_duration/2 of a multiple of niter_part_fine_period
+    if (!(this->iteration % this->niter_part == 0 ||
+          ((this->iteration + this->niter_part_fine_duration/2) % this->niter_part_fine_period <=
+           this->niter_part_fine_duration)))
+        return EXIT_SUCCESS;
+
+    // allocate temporary data array
+    std::unique_ptr<double[]> pdata(new double[3*this->ps->getLocalNbParticles()]);
+
+    /// copy position data
+
+    /// sample position
+    std::copy(this->ps->getParticlesState(),
+              this->ps->getParticlesState()+3*this->ps->getLocalNbParticles(),
+              pdata.get());
+    this->particles_sample_writer_mpi->template save_dataset<3>(
+            "tracers0",
+            "position",
+            this->ps->getParticlesState(),
+            &pdata,
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->ps->get_step_idx()-1);
+
+    /// sample velocity
+    std::fill_n(pdata.get(), 3*this->ps->getLocalNbParticles(), 0);
+    this->ps->sample_compute_field(*this->velocity, pdata.get());
+    this->particles_sample_writer_mpi->template save_dataset<3>(
+            "tracers0",
+            "velocity",
+            this->ps->getParticlesState(),
+            &pdata,
+            this->ps->getParticlesIndexes(),
+            this->ps->getLocalNbParticles(),
+            this->ps->get_step_idx()-1);
+
+    // deallocate temporary data array
+    delete[] pdata.release();
+
+    return EXIT_SUCCESS;
+}
+
+template <typename rnumber>
+int static_field<rnumber>::read_parameters(void)
+{
+    TIMEZONE("static_field::read_parameters");
+    this->direct_numerical_simulation::read_parameters();
+    hid_t parameter_file = H5Fopen((this->simname + ".h5").c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
+    this->fftw_plan_rigor = hdf5_tools::read_string(parameter_file, "parameters/fftw_plan_rigor");
+    this->dt = hdf5_tools::read_value<double>(parameter_file, "parameters/dt");
+    this->niter_part = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_part");
+    this->niter_part_fine_period = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_part_fine_period");
+    this->niter_part_fine_duration = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_part_fine_duration");
+    this->nparticles = hdf5_tools::read_value<int>(parameter_file, "parameters/nparticles");
+    this->tracers0_integration_steps = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_integration_steps");
+    this->tracers0_neighbours = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_neighbours");
+    this->tracers0_smoothness = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_smoothness");
+    H5Fclose(parameter_file);
+    return EXIT_SUCCESS;
+}
+
+template class static_field<float>;
+template class static_field<double>;
+
+
diff --git a/cpp/full_code/static_field_with_ghost_collisions.hpp b/cpp/full_code/static_field_with_ghost_collisions.hpp
new file mode 100644
index 00000000..1e280fd4
--- /dev/null
+++ b/cpp/full_code/static_field_with_ghost_collisions.hpp
@@ -0,0 +1,87 @@
+/**********************************************************************
+*                                                                     *
+*  Copyright 2017 Max Planck Institute                                *
+*                 for Dynamics and Self-Organization                  *
+*                                                                     *
+*  This file is part of bfps.                                         *
+*                                                                     *
+*  bfps is free software: you can redistribute it and/or modify       *
+*  it under the terms of the GNU General Public License as published  *
+*  by the Free Software Foundation, either version 3 of the License,  *
+*  or (at your option) any later version.                             *
+*                                                                     *
+*  bfps is distributed in the hope that it will be useful,            *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
+*  GNU General Public License for more details.                       *
+*                                                                     *
+*  You should have received a copy of the GNU General Public License  *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
+*                                                                     *
+* Contact: Cristian.Lalescu@ds.mpg.de                                 *
+*                                                                     *
+**********************************************************************/
+
+
+#ifndef STATIC_FIELD_WITH_GHOST_COLLISIONS_HPP
+#define STATIC_FIELD_WITH_GHOST_COLLISIONS_HPP
+
+#include <cstdlib>
+#include "base.hpp"
+#include "kspace.hpp"
+#include "field.hpp"
+#include "full_code/direct_numerical_simulation.hpp"
+#include "particles/particles_system_builder.hpp"
+#include "particles/particles_output_hdf5.hpp"
+#include "particles/particles_sampling.hpp"
+#include "particles/p2p_ghost_collisions.hpp"
+
+template <typename rnumber>
+class static_field: public direct_numerical_simulation
+{
+    public:
+
+        /* parameters that are read in read_parameters */
+        double dt;
+        std::string fftw_plan_rigor;
+
+        /* particle parameters */
+        int niter_part;
+        int niter_part_fine_period;
+        int niter_part_fine_duration;
+        int nparticles;
+        int tracers0_integration_steps;
+        int tracers0_neighbours;
+        int tracers0_smoothness;
+
+        /* other stuff */
+        kspace<FFTW, SMOOTH> *kk;
+        field<rnumber, FFTW, THREE> *vorticity;
+        field<rnumber, FFTW, THREE> *velocity;
+
+        /* other stuff */
+        std::unique_ptr<abstract_particles_system_with_p2p<long long int, double, p2p_ghost_collisions>> ps;
+
+        particles_output_hdf5<long long int, double,3> *particles_output_writer_mpi;
+        particles_output_sampling_hdf5<long long int, double, 3> *particles_sample_writer_mpi;
+
+
+        static_field(
+                const MPI_Comm COMMUNICATOR,
+                const std::string &simulation_name):
+            direct_numerical_simulation(
+                    COMMUNICATOR,
+                    simulation_name){}
+        ~static_field(){}
+
+        int initialize(void);
+        int step(void);
+        int finalize(void);
+
+        virtual int read_parameters(void);
+        int write_checkpoint(void);
+        int do_stats(void);
+};
+
+#endif//STATIC_FIELD_HPP
+
diff --git a/cpp/particles/abstract_particles_system_with_p2p.hpp b/cpp/particles/abstract_particles_system_with_p2p.hpp
new file mode 100644
index 00000000..3c31abf6
--- /dev/null
+++ b/cpp/particles/abstract_particles_system_with_p2p.hpp
@@ -0,0 +1,13 @@
+#ifndef ABSTRACT_PARTICLES_SYSTEM_WITH_P2P_HPP
+#define ABSTRACT_PARTICLES_SYSTEM_WITH_P2P_HPP
+
+#include "abstract_particles_system.hpp"
+
+template <class partsize_t, class real_number, class p2p_computer_class>
+class abstract_particles_system_with_p2p : public abstract_particles_system<partsize_t,real_number> {
+public:
+    virtual p2p_computer_class& getP2PComputer() = 0;
+    virtual const p2p_computer_class& getP2PComputer() const = 0;
+};
+
+#endif // ABSTRACT_PARTICLES_SYSTEM_WITH_P2P_HPP
diff --git a/cpp/particles/p2p_computer.hpp b/cpp/particles/p2p_computer.hpp
index 74d9c9eb..5fd2aef8 100644
--- a/cpp/particles/p2p_computer.hpp
+++ b/cpp/particles/p2p_computer.hpp
@@ -98,6 +98,9 @@ public:
         rhs_part2[3+IDXC_Z] += pos_part1[3+IDXC_Z] * 4 * ww * dot_product;
     }
 
+
+    void merge(const p2p_computer&){}
+
     bool isEnable() const {
         return isActive;
     }
diff --git a/cpp/particles/p2p_computer_empty.hpp b/cpp/particles/p2p_computer_empty.hpp
index 0599dc1a..c7c0057f 100644
--- a/cpp/particles/p2p_computer_empty.hpp
+++ b/cpp/particles/p2p_computer_empty.hpp
@@ -46,6 +46,8 @@ public:
                              const real_number /*xshift_coef*/, const real_number /*yshift_coef*/, const real_number /*zshift_coef*/) const{
     }
 
+    void merge(const p2p_computer_empty&){}
+
     constexpr static bool isEnable() {
         return false;
     }
diff --git a/cpp/particles/p2p_cylinder_collisions.hpp b/cpp/particles/p2p_cylinder_collisions.hpp
new file mode 100644
index 00000000..bcc2b3e7
--- /dev/null
+++ b/cpp/particles/p2p_cylinder_collisions.hpp
@@ -0,0 +1,75 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+#ifndef P2P_CYLINDER_COLLISIONS_HPP
+#define P2P_CYLINDER_COLLISIONS_HPP
+
+#include <cstring>
+
+template <class real_number, class partsize_t>
+class p2p_cylinder_collisions{
+    long int collision_counter;
+
+public:
+    p2p_cylinder_collisions() : collision_counter(0){}
+
+    // Copy constructor use a counter set to zero
+    p2p_cylinder_collisions(const p2p_cylinder_collisions&) : collision_counter(0){}
+
+    template <int size_particle_rhs>
+    void init_result_array(real_number /*rhs*/[], const partsize_t /*nbParticles*/) const{
+    }
+
+    template <int size_particle_rhs>
+    void reduce_particles_rhs(real_number /*rhs_dst*/[], const real_number /*rhs_src*/[], const partsize_t /*nbParticles*/) const{
+    }
+
+    template <int size_particle_positions, int size_particle_rhs>
+    void compute_interaction(const real_number /*pos_part1*/[], real_number /*rhs_part1*/[],
+                             const real_number /*pos_part2*/[], real_number /*rhs_part2*/[],
+                             const real_number /*dist_pow2*/,  const real_number /*cutoff*/,
+                             const real_number /*xshift_coef*/, const real_number /*yshift_coef*/, const real_number /*zshift_coef*/) const{
+        // TODO check the orientation, etc....
+        if(check the test here){
+            collision_counter += 1;
+        }
+    }
+
+    void merge(const p2p_cylinder_collisions& other){
+        collision_counter += other.collision_counter;
+    }
+
+    constexpr static bool isEnable() {
+        return false;
+    }
+
+    long int get_collision_counter() const{
+        return collision_counter;
+    }
+
+    void reset_collision_counter() const{
+        collision_counter = 0;
+    }
+};
+
+
+#endif // P2P_CYLINDER_COLLISIONS_HPP
diff --git a/cpp/particles/p2p_distr_mpi.hpp b/cpp/particles/p2p_distr_mpi.hpp
index 7ab3a8b3..64b7b605 100644
--- a/cpp/particles/p2p_distr_mpi.hpp
+++ b/cpp/particles/p2p_distr_mpi.hpp
@@ -530,9 +530,15 @@ public:
 
         lock_free_bool_array cells_locker(512);
 
+        std::vector<std::unique_ptr<computer_class>> computer_for_all_threads(omp_get_num_threads()-1);
+        for(int idxThread = 1 ; idxThread < omp_get_num_threads() ; ++idxThread){
+            computer_for_all_threads[idxThread-1].reset(new computer_class(in_computer));
+        }
+
         TIMEZONE_OMP_INIT_PREPARALLEL(omp_get_max_threads())
         #pragma omp parallel default(shared)
         {
+            computer_class& computer_thread = (omp_get_thread_num() == 0 ? in_computer : *computer_for_all_threads[omp_get_thread_num()-1]);
             #pragma omp master
             {
                 while(mpiRequests.size()){
@@ -584,7 +590,7 @@ public:
 
                         assert(descriptor.toCompute != nullptr);
                         descriptor.results.reset(new real_number[NbParticlesToReceive*size_particle_rhs]);
-                        in_computer.template init_result_array<size_particle_rhs>(descriptor.results.get(), NbParticlesToReceive);
+                        computer_thread.template init_result_array<size_particle_rhs>(descriptor.results.get(), NbParticlesToReceive);
 
                         // Compute
                         partsize_t idxPart = 0;
@@ -622,7 +628,7 @@ public:
                                                                                                 particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z],
                                                                                                 shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]);
                                                 if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
-                                                    in_computer.template compute_interaction<size_particle_positions, size_particle_rhs>(
+                                                    computer_thread.template compute_interaction<size_particle_positions, size_particle_rhs>(
                                                                         &descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions],
                                                                         &descriptor.results[(idxPart+idx_p1)*size_particle_rhs],
                                                                         &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions],
@@ -669,7 +675,7 @@ public:
                         NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second];
                         assert(descriptor.isRecv == false);
                         assert(descriptor.toRecvAndMerge != nullptr);
-                        in_computer.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[particles_offset_layers[my_nb_cell_levels-descriptor.nbLevelsToExchange]*size_particle_rhs],
+                        computer_thread.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[particles_offset_layers[my_nb_cell_levels-descriptor.nbLevelsToExchange]*size_particle_rhs],
                                 descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToExchange);
                         delete[] descriptor.toRecvAndMerge.release();
                     }
@@ -680,104 +686,111 @@ public:
         assert(whatNext.size() == 0);
         assert(mpiRequests.size() == 0);
 
-        // Compute self data
-        for(const auto& iter_cell : my_tree){
-            TIMEZONE("proceed-leaf");
-            const long int currenct_cell_idx = iter_cell.first;
-            const std::vector<std::pair<partsize_t,partsize_t>>* intervals_ptr = &iter_cell.second;
+        {
+            computer_class& computer_thread = (omp_get_thread_num() == 0 ? in_computer : *computer_for_all_threads[omp_get_thread_num()-1]);
+            // Compute self data
+            for(const auto& iter_cell : my_tree){
+                TIMEZONE("proceed-leaf");
+                const long int currenct_cell_idx = iter_cell.first;
+                const std::vector<std::pair<partsize_t,partsize_t>>* intervals_ptr = &iter_cell.second;
 
-#pragma omp task default(shared) firstprivate(currenct_cell_idx, intervals_ptr)
-            {
-                const std::vector<std::pair<partsize_t,partsize_t>>& intervals = (*intervals_ptr);
-
-                cells_locker.lock(currenct_cell_idx);
-
-                for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){
-                    // self interval
-                    for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
-                        for(partsize_t idx_p2 = idx_p1+1 ; idx_p2 < intervals[idx_1].second ; ++idx_p2){
-                            const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X],
-                                                                            particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y],
-                                                                            particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z],
-                                                                            particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_X],
-                                                                            particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_Y],
-                                                                            particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_Z],
-                                                                            0, 0, 0);
-                            if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
-                                in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>(
-                                                    &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
-                                                    &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
-                                                    &particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions],
-                                                    &particles_current_rhs[(intervals[idx_1].first+idx_p2)*size_particle_rhs],
-                                                    dist_r2, cutoff_radius_compute, 0, 0, 0);
-                            }
-                        }
-                    }
+    #pragma omp task default(shared) firstprivate(currenct_cell_idx, intervals_ptr)
+                {
+                    const std::vector<std::pair<partsize_t,partsize_t>>& intervals = (*intervals_ptr);
+
+                    cells_locker.lock(currenct_cell_idx);
 
-                    // with other interval
-                    for(size_t idx_2 = idx_1+1 ; idx_2 < intervals.size() ; ++idx_2){
+                    for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){
+                        // self interval
                         for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
-                            for(partsize_t idx_p2 = 0 ; idx_p2 < intervals[idx_2].second ; ++idx_p2){
+                            for(partsize_t idx_p2 = idx_p1+1 ; idx_p2 < intervals[idx_1].second ; ++idx_p2){
                                 const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X],
                                                                                 particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y],
                                                                                 particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z],
-                                                                                particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_X],
-                                                                                particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y],
-                                                                                particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z],
+                                                                                particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_X],
+                                                                                particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_Y],
+                                                                                particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_Z],
                                                                                 0, 0, 0);
                                 if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
-                                    in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>(
+                                    computer_thread.template compute_interaction<size_particle_positions,size_particle_rhs>(
                                                         &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
                                                         &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
-                                                        &particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions],
-                                                        &particles_current_rhs[(intervals[idx_2].first+idx_p2)*size_particle_rhs],
+                                                        &particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions],
+                                                        &particles_current_rhs[(intervals[idx_1].first+idx_p2)*size_particle_rhs],
                                                         dist_r2, cutoff_radius_compute, 0, 0, 0);
                                 }
                             }
                         }
+
+                        // with other interval
+                        for(size_t idx_2 = idx_1+1 ; idx_2 < intervals.size() ; ++idx_2){
+                            for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
+                                for(partsize_t idx_p2 = 0 ; idx_p2 < intervals[idx_2].second ; ++idx_p2){
+                                    const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X],
+                                                                                    particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y],
+                                                                                    particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z],
+                                                                                    particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_X],
+                                                                                    particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y],
+                                                                                    particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z],
+                                                                                    0, 0, 0);
+                                    if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
+                                        computer_thread.template compute_interaction<size_particle_positions,size_particle_rhs>(
+                                                            &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
+                                                            &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
+                                                            &particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions],
+                                                            &particles_current_rhs[(intervals[idx_2].first+idx_p2)*size_particle_rhs],
+                                                            dist_r2, cutoff_radius_compute, 0, 0, 0);
+                                    }
+                                }
+                            }
+                        }
                     }
-                }
 
-                const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27];
-                long int neighbors_indexes[27];
-                std::array<real_number,3> shift[27];
-                const int nbNeighbors = my_tree.getNeighbors(currenct_cell_idx, neighbors, neighbors_indexes, shift, false);
-
-                for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){
-                    // with other interval
-                    for(int idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){
-                        if(currenct_cell_idx < neighbors_indexes[idx_neighbor]){
-                            cells_locker.lock(neighbors_indexes[idx_neighbor]);
-
-                            for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){
-                                for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
-                                    for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){
-                                        const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X],
-                                                                                        particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y],
-                                                                                        particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z],
-                                                                                        particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_X],
-                                                                                        particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y],
-                                                                                        particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z],
-                                                                                        shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]);
-                                        if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
-                                            in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>(
-                                                                &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
-                                                                &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
-                                                                &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions],
-                                                                &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs],
-                                                                dist_r2, cutoff_radius_compute, shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]);
+                    const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27];
+                    long int neighbors_indexes[27];
+                    std::array<real_number,3> shift[27];
+                    const int nbNeighbors = my_tree.getNeighbors(currenct_cell_idx, neighbors, neighbors_indexes, shift, false);
+
+                    for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){
+                        // with other interval
+                        for(int idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){
+                            if(currenct_cell_idx < neighbors_indexes[idx_neighbor]){
+                                cells_locker.lock(neighbors_indexes[idx_neighbor]);
+
+                                for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){
+                                    for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){
+                                        for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){
+                                            const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X],
+                                                                                            particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y],
+                                                                                            particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z],
+                                                                                            particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_X],
+                                                                                            particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y],
+                                                                                            particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z],
+                                                                                            shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]);
+                                            if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){
+                                                computer_thread.template compute_interaction<size_particle_positions,size_particle_rhs>(
+                                                                    &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions],
+                                                                    &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs],
+                                                                    &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions],
+                                                                    &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs],
+                                                                    dist_r2, cutoff_radius_compute, shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]);
+                                            }
                                         }
                                     }
                                 }
+                                cells_locker.unlock(neighbors_indexes[idx_neighbor]);
                             }
-                            cells_locker.unlock(neighbors_indexes[idx_neighbor]);
                         }
                     }
-                }
 
-                cells_locker.unlock(currenct_cell_idx);
+                    cells_locker.unlock(currenct_cell_idx);
+                }
             }
         }
+
+        for(int idxThread = 1 ; idxThread < omp_get_num_threads() ; ++idxThread){
+            in_computer.merge(*computer_for_all_threads[idxThread-1]);
+        }
     }
 };
 
diff --git a/cpp/particles/p2p_ghost_collisions.hpp b/cpp/particles/p2p_ghost_collisions.hpp
new file mode 100644
index 00000000..95bdc9e1
--- /dev/null
+++ b/cpp/particles/p2p_ghost_collisions.hpp
@@ -0,0 +1,72 @@
+/******************************************************************************
+*                                                                             *
+*  Copyright 2019 Max Planck Institute for Dynamics and Self-Organization     *
+*                                                                             *
+*  This file is part of bfps.                                                 *
+*                                                                             *
+*  bfps is free software: you can redistribute it and/or modify               *
+*  it under the terms of the GNU General Public License as published          *
+*  by the Free Software Foundation, either version 3 of the License,          *
+*  or (at your option) any later version.                                     *
+*                                                                             *
+*  bfps is distributed in the hope that it will be useful,                    *
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+*  GNU General Public License for more details.                               *
+*                                                                             *
+*  You should have received a copy of the GNU General Public License          *
+*  along with bfps.  If not, see <http://www.gnu.org/licenses/>               *
+*                                                                             *
+* Contact: Cristian.Lalescu@ds.mpg.de                                         *
+*                                                                             *
+******************************************************************************/
+#ifndef P2P_GHOST_COLLISIONS_HPP
+#define P2P_GHOST_COLLISIONS_HPP
+
+#include <cstring>
+
+template <class real_number, class partsize_t>
+class p2p_ghost_collisions{
+    long int collision_counter;
+
+public:
+    p2p_ghost_collisions() : collision_counter(0){}
+
+    // Copy constructor use a counter set to zero
+    p2p_ghost_collisions(const p2p_ghost_collisions&) : collision_counter(0){}
+
+    template <int size_particle_rhs>
+    void init_result_array(real_number /*rhs*/[], const partsize_t /*nbParticles*/) const{
+    }
+
+    template <int size_particle_rhs>
+    void reduce_particles_rhs(real_number /*rhs_dst*/[], const real_number /*rhs_src*/[], const partsize_t /*nbParticles*/) const{
+    }
+
+    template <int size_particle_positions, int size_particle_rhs>
+    void compute_interaction(const real_number /*pos_part1*/[], real_number /*rhs_part1*/[],
+                             const real_number /*pos_part2*/[], real_number /*rhs_part2*/[],
+                             const real_number /*dist_pow2*/,  const real_number /*cutoff*/,
+                             const real_number /*xshift_coef*/, const real_number /*yshift_coef*/, const real_number /*zshift_coef*/) const{
+        collision_counter += 1;
+    }
+
+    void merge(const p2p_ghost_collisions& other){
+        collision_counter += other.collision_counter;
+    }
+
+    constexpr static bool isEnable() {
+        return false;
+    }
+
+    long int get_collision_counter() const{
+        return collision_counter;
+    }
+
+    void reset_collision_counter() const{
+        collision_counter = 0;
+    }
+};
+
+
+#endif // P2P_GHOST_COLLISIONS_HPP
diff --git a/cpp/particles/particles_system.hpp b/cpp/particles/particles_system.hpp
index 4981592c..cdb523bb 100644
--- a/cpp/particles/particles_system.hpp
+++ b/cpp/particles/particles_system.hpp
@@ -29,6 +29,7 @@
 #include <array>
 
 #include "abstract_particles_system.hpp"
+#include "abstract_particles_system_with_p2p.hpp"
 #include "particles_distr_mpi.hpp"
 #include "particles_output_hdf5.hpp"
 #include "particles_output_mpiio.hpp"
@@ -41,7 +42,7 @@
 
 template <class partsize_t, class real_number, class field_rnumber, class field_class, class interpolator_class, int interp_neighbours,
           int size_particle_positions, int size_particle_rhs, class p2p_computer_class, class particles_inner_computer_class>
-class particles_system : public abstract_particles_system<partsize_t, real_number> {
+class particles_system : public abstract_particles_system_with_p2p<partsize_t, real_number, p2p_computer_class> {
     static_assert(size_particle_positions >= 3, "There should be at least the positions X,Y,Z in the state");
 
     MPI_Comm mpi_com;
@@ -402,6 +403,14 @@ public:
             }
         }
     }
+
+    const p2p_computer_class& getP2PComputer() const final{
+        return computer_p2p;
+    }
+
+    p2p_computer_class& getP2PComputer() final{
+        return computer_p2p;
+    }
 };
 
 
diff --git a/cpp/particles/particles_system_builder.hpp b/cpp/particles/particles_system_builder.hpp
index 6a6f4a26..dfc20dc6 100644
--- a/cpp/particles/particles_system_builder.hpp
+++ b/cpp/particles/particles_system_builder.hpp
@@ -140,7 +140,7 @@ template <class partsize_t, class field_rnumber, field_backend be, field_compone
           class particles_inner_computer_class, int size_particle_positions, int size_particle_rhs>
 struct particles_system_build_container {
     template <const int interpolation_size, const int spline_mode>
-    static std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>> instanciate(
+    static std::unique_ptr<abstract_particles_system_with_p2p<partsize_t, particles_rnumber, p2p_computer_class>> instanciate(
              const field<field_rnumber, be, fc>* fs_field, // (field object)
              const kspace<be, SMOOTH>* fs_kk, // (kspace object, contains dkx, dky, dkz)
              const int nsteps, // to check coherency between parameters and hdf input file (nb rhs)
@@ -272,7 +272,7 @@ struct particles_system_build_container {
         part_sys->setParticleFileLayout(generator.getParticleFileLayout());
 
         // Return the created particles system
-        return std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>(part_sys);
+        return std::unique_ptr<abstract_particles_system_with_p2p<partsize_t, particles_rnumber, p2p_computer_class>>(part_sys);
     }
 };
 
@@ -305,7 +305,7 @@ inline std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>
 template <class partsize_t, class field_rnumber, field_backend be, field_components fc,
           class p2p_computer_class, class particles_inner_computer_class,
           class particles_rnumber = double>
-inline std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>> particles_system_builder_with_p2p(
+inline std::unique_ptr<abstract_particles_system_with_p2p<partsize_t, particles_rnumber, p2p_computer_class>> particles_system_builder_with_p2p(
         const field<field_rnumber, be, fc>* fs_field, // (field object)
         const kspace<be, SMOOTH>* fs_kk, // (kspace object, contains dkx, dky, dkz)
         const int nsteps, // to check coherency between parameters and hdf input file (nb rhs)
@@ -319,7 +319,7 @@ inline std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>
         p2p_computer_class p2p_computer,
         particles_inner_computer_class inner_computer,
         const particles_rnumber cutoff){
-    return Template_double_for_if::evaluate<std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>,
+    return Template_double_for_if::evaluate<std::unique_ptr<abstract_particles_system_with_p2p<partsize_t, particles_rnumber, p2p_computer_class>>,
                        int, 1, 11, 1, // interpolation_size
                        int, 0, 3, 1, // spline_mode
                        particles_system_build_container<partsize_t, field_rnumber,be,fc,particles_rnumber,
-- 
GitLab