From 78d0d11e152978355238f8d65628624cfa115921 Mon Sep 17 00:00:00 2001 From: Berenger Bramas <Berenger.Bramas@inria.fr> Date: Mon, 14 Oct 2019 12:53:57 +0200 Subject: [PATCH] Add first version of the P2P collision counter --- .../static_field_with_ghost_collisions.cpp | 253 ++++++++++++++++++ .../static_field_with_ghost_collisions.hpp | 87 ++++++ .../abstract_particles_system_with_p2p.hpp | 13 + cpp/particles/p2p_computer.hpp | 3 + cpp/particles/p2p_computer_empty.hpp | 2 + cpp/particles/p2p_cylinder_collisions.hpp | 75 ++++++ cpp/particles/p2p_distr_mpi.hpp | 165 ++++++------ cpp/particles/p2p_ghost_collisions.hpp | 72 +++++ cpp/particles/particles_system.hpp | 11 +- cpp/particles/particles_system_builder.hpp | 8 +- 10 files changed, 608 insertions(+), 81 deletions(-) create mode 100644 cpp/full_code/static_field_with_ghost_collisions.cpp create mode 100644 cpp/full_code/static_field_with_ghost_collisions.hpp create mode 100644 cpp/particles/abstract_particles_system_with_p2p.hpp create mode 100644 cpp/particles/p2p_cylinder_collisions.hpp create mode 100644 cpp/particles/p2p_ghost_collisions.hpp diff --git a/cpp/full_code/static_field_with_ghost_collisions.cpp b/cpp/full_code/static_field_with_ghost_collisions.cpp new file mode 100644 index 00000000..6a93d142 --- /dev/null +++ b/cpp/full_code/static_field_with_ghost_collisions.cpp @@ -0,0 +1,253 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ + + +#include <string> +#include <cmath> +#include "static_field_with_ghost_collisions.hpp" +#include "scope_timer.hpp" +#include "fftw_tools.hpp" + + +template <typename rnumber> +int static_field<rnumber>::initialize(void) +{ + TIMEZONE("static_file::initialize"); + this->read_iteration(); + this->read_parameters(); + if (this->myrank == 0) + { + // set caching parameters + hid_t fapl = H5Pcreate(H5P_FILE_ACCESS); + herr_t cache_err = H5Pset_cache(fapl, 0, 521, 134217728, 1.0); + variable_used_only_in_assert(cache_err); + DEBUG_MSG("when setting stat_file cache I got %d\n", cache_err); + this->stat_file = H5Fopen( + (this->simname + ".h5").c_str(), + H5F_ACC_RDWR, + fapl); + } + int data_file_problem; + if (this->myrank == 0) + data_file_problem = this->grow_file_datasets(); + MPI_Bcast(&data_file_problem, 1, MPI_INT, 0, this->comm); + if (data_file_problem > 0) + { + std::cerr << + data_file_problem << + " problems growing file datasets.\ntrying to exit now." << + std::endl; + return EXIT_FAILURE; + } + + this->vorticity = new field<rnumber, FFTW, THREE>( + nx, ny, nz, + this->comm, + fftw_planner_string_to_flag[this->fftw_plan_rigor]); + this->vorticity->real_space_representation = false; + + this->velocity = new field<rnumber, FFTW, THREE>( + nx, ny, nz, + this->comm, + fftw_planner_string_to_flag[this->fftw_plan_rigor]); + this->velocity->real_space_representation = false; + //read vorticity field + this->vorticity->io( + this->get_current_fname(), + "vorticity", + this->iteration, + true); + this->kk = new kspace<FFTW, SMOOTH>( + this->vorticity->clayout, this->dkx, this->dky, this->dkz); + + // compute the velocity field and store + invert_curl( + this->kk, + this->vorticity, + velocity); + // transform velocity and vorticity fields to real space + this->velocity->ift(); + this->vorticity->ift(); + + p2p_ghost_collisions<double, long long int> p2p_ghost_collisions; + p2p_ghost_collisions.setEnable(this->enable_p2p); + + this->ps = Template_double_for_if::evaluate<std::unique_ptr<abstract_particles_system<long long int, double>>, + int, 1, 11, 1, // interpolation_size + int, 0, 3, 1, // spline_mode + particles_system_build_container<long long int, rnumber,FFTW,THREE,double, + collision_detector<double, long long int>, + particles_inner_computer_empty<double,long long int>, + 3,3>>( + this->tracers0_neighbours, // template iterator 1 + this->tracers0_smoothness, // template iterator 2 + this->velocity, + this->kk, + tracers0_integration_steps, + (long long int)nparticles, + this->get_current_fname(), + std::string("/tracers0/state/") + std::to_string(this->iteration), + std::string("/tracers0/rhs/") + std::to_string(this->iteration), + this->comm, + this->iteration+1, + std::move(cd), + std::move(p2p_ghost_collisions), + this->tracers0_cutoff); + + // initialize output objects + this->particles_output_writer_mpi = new particles_output_hdf5< + long long int, double, 3>( + MPI_COMM_WORLD, + "tracers0", + nparticles, + tracers0_integration_steps); + this->particles_output_writer_mpi->setParticleFileLayout(this->ps->getParticleFileLayout()); + this->particles_sample_writer_mpi = new particles_output_sampling_hdf5< + long long int, double, 3>( + MPI_COMM_WORLD, + this->ps->getGlobalNbParticles(), + (this->simname + "_particles.h5"), + "tracers0", + "position/0"); + this->particles_sample_writer_mpi->setParticleFileLayout(this->ps->getParticleFileLayout()); + + return EXIT_SUCCESS; +} + +template <typename rnumber> +int static_field<rnumber>::step(void) +{ + TIMEZONE("static_file::step"); + this->ps->getP2PComputer().reset_collision_counter(); // TODO should it be here ? + this->ps->completeLoop(this->dt); + const long int nbCollision = this->ps->getP2PComputer().get_collision_counter(); // TODO should it be here ? + this->iteration++; + return EXIT_SUCCESS; +} + +template <typename rnumber> +int static_field<rnumber>::write_checkpoint(void) +{ + TIMEZONE("static_file::write_checkpoint"); + this->particles_output_writer_mpi->open_file(this->get_current_fname()); + this->particles_output_writer_mpi->template save<3>( + this->ps->getParticlesState(), + this->ps->getParticlesRhs(), + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->iteration); + this->particles_output_writer_mpi->close_file(); + return EXIT_SUCCESS; +} + +template <typename rnumber> +int static_field<rnumber>::finalize(void) +{ + TIMEZONE("static_field::finalize"); + if (this->myrank == 0) + H5Fclose(this->stat_file); + // good practice rule number n+1: always delete in inverse order of allocation + delete this->ps.release(); + delete this->particles_output_writer_mpi; + delete this->particles_sample_writer_mpi; + delete this->kk; + delete this->velocity; + delete this->vorticity; + return EXIT_SUCCESS; +} + +/** \brief Compute statistics. + * + */ + +template <typename rnumber> +int static_field<rnumber>::do_stats() +{ + TIMEZONE("static_field::do_stats"); + /// either one of two conditions suffices to compute statistics: + /// 1) current iteration is a multiple of niter_part + /// 2) we are within niter_part_fine_duration/2 of a multiple of niter_part_fine_period + if (!(this->iteration % this->niter_part == 0 || + ((this->iteration + this->niter_part_fine_duration/2) % this->niter_part_fine_period <= + this->niter_part_fine_duration))) + return EXIT_SUCCESS; + + // allocate temporary data array + std::unique_ptr<double[]> pdata(new double[3*this->ps->getLocalNbParticles()]); + + /// copy position data + + /// sample position + std::copy(this->ps->getParticlesState(), + this->ps->getParticlesState()+3*this->ps->getLocalNbParticles(), + pdata.get()); + this->particles_sample_writer_mpi->template save_dataset<3>( + "tracers0", + "position", + this->ps->getParticlesState(), + &pdata, + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->ps->get_step_idx()-1); + + /// sample velocity + std::fill_n(pdata.get(), 3*this->ps->getLocalNbParticles(), 0); + this->ps->sample_compute_field(*this->velocity, pdata.get()); + this->particles_sample_writer_mpi->template save_dataset<3>( + "tracers0", + "velocity", + this->ps->getParticlesState(), + &pdata, + this->ps->getParticlesIndexes(), + this->ps->getLocalNbParticles(), + this->ps->get_step_idx()-1); + + // deallocate temporary data array + delete[] pdata.release(); + + return EXIT_SUCCESS; +} + +template <typename rnumber> +int static_field<rnumber>::read_parameters(void) +{ + TIMEZONE("static_field::read_parameters"); + this->direct_numerical_simulation::read_parameters(); + hid_t parameter_file = H5Fopen((this->simname + ".h5").c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); + this->fftw_plan_rigor = hdf5_tools::read_string(parameter_file, "parameters/fftw_plan_rigor"); + this->dt = hdf5_tools::read_value<double>(parameter_file, "parameters/dt"); + this->niter_part = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_part"); + this->niter_part_fine_period = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_part_fine_period"); + this->niter_part_fine_duration = hdf5_tools::read_value<int>(parameter_file, "parameters/niter_part_fine_duration"); + this->nparticles = hdf5_tools::read_value<int>(parameter_file, "parameters/nparticles"); + this->tracers0_integration_steps = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_integration_steps"); + this->tracers0_neighbours = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_neighbours"); + this->tracers0_smoothness = hdf5_tools::read_value<int>(parameter_file, "parameters/tracers0_smoothness"); + H5Fclose(parameter_file); + return EXIT_SUCCESS; +} + +template class static_field<float>; +template class static_field<double>; + + diff --git a/cpp/full_code/static_field_with_ghost_collisions.hpp b/cpp/full_code/static_field_with_ghost_collisions.hpp new file mode 100644 index 00000000..1e280fd4 --- /dev/null +++ b/cpp/full_code/static_field_with_ghost_collisions.hpp @@ -0,0 +1,87 @@ +/********************************************************************** +* * +* Copyright 2017 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + +#ifndef STATIC_FIELD_WITH_GHOST_COLLISIONS_HPP +#define STATIC_FIELD_WITH_GHOST_COLLISIONS_HPP + +#include <cstdlib> +#include "base.hpp" +#include "kspace.hpp" +#include "field.hpp" +#include "full_code/direct_numerical_simulation.hpp" +#include "particles/particles_system_builder.hpp" +#include "particles/particles_output_hdf5.hpp" +#include "particles/particles_sampling.hpp" +#include "particles/p2p_ghost_collisions.hpp" + +template <typename rnumber> +class static_field: public direct_numerical_simulation +{ + public: + + /* parameters that are read in read_parameters */ + double dt; + std::string fftw_plan_rigor; + + /* particle parameters */ + int niter_part; + int niter_part_fine_period; + int niter_part_fine_duration; + int nparticles; + int tracers0_integration_steps; + int tracers0_neighbours; + int tracers0_smoothness; + + /* other stuff */ + kspace<FFTW, SMOOTH> *kk; + field<rnumber, FFTW, THREE> *vorticity; + field<rnumber, FFTW, THREE> *velocity; + + /* other stuff */ + std::unique_ptr<abstract_particles_system_with_p2p<long long int, double, p2p_ghost_collisions>> ps; + + particles_output_hdf5<long long int, double,3> *particles_output_writer_mpi; + particles_output_sampling_hdf5<long long int, double, 3> *particles_sample_writer_mpi; + + + static_field( + const MPI_Comm COMMUNICATOR, + const std::string &simulation_name): + direct_numerical_simulation( + COMMUNICATOR, + simulation_name){} + ~static_field(){} + + int initialize(void); + int step(void); + int finalize(void); + + virtual int read_parameters(void); + int write_checkpoint(void); + int do_stats(void); +}; + +#endif//STATIC_FIELD_HPP + diff --git a/cpp/particles/abstract_particles_system_with_p2p.hpp b/cpp/particles/abstract_particles_system_with_p2p.hpp new file mode 100644 index 00000000..3c31abf6 --- /dev/null +++ b/cpp/particles/abstract_particles_system_with_p2p.hpp @@ -0,0 +1,13 @@ +#ifndef ABSTRACT_PARTICLES_SYSTEM_WITH_P2P_HPP +#define ABSTRACT_PARTICLES_SYSTEM_WITH_P2P_HPP + +#include "abstract_particles_system.hpp" + +template <class partsize_t, class real_number, class p2p_computer_class> +class abstract_particles_system_with_p2p : public abstract_particles_system<partsize_t,real_number> { +public: + virtual p2p_computer_class& getP2PComputer() = 0; + virtual const p2p_computer_class& getP2PComputer() const = 0; +}; + +#endif // ABSTRACT_PARTICLES_SYSTEM_WITH_P2P_HPP diff --git a/cpp/particles/p2p_computer.hpp b/cpp/particles/p2p_computer.hpp index 74d9c9eb..5fd2aef8 100644 --- a/cpp/particles/p2p_computer.hpp +++ b/cpp/particles/p2p_computer.hpp @@ -98,6 +98,9 @@ public: rhs_part2[3+IDXC_Z] += pos_part1[3+IDXC_Z] * 4 * ww * dot_product; } + + void merge(const p2p_computer&){} + bool isEnable() const { return isActive; } diff --git a/cpp/particles/p2p_computer_empty.hpp b/cpp/particles/p2p_computer_empty.hpp index 0599dc1a..c7c0057f 100644 --- a/cpp/particles/p2p_computer_empty.hpp +++ b/cpp/particles/p2p_computer_empty.hpp @@ -46,6 +46,8 @@ public: const real_number /*xshift_coef*/, const real_number /*yshift_coef*/, const real_number /*zshift_coef*/) const{ } + void merge(const p2p_computer_empty&){} + constexpr static bool isEnable() { return false; } diff --git a/cpp/particles/p2p_cylinder_collisions.hpp b/cpp/particles/p2p_cylinder_collisions.hpp new file mode 100644 index 00000000..bcc2b3e7 --- /dev/null +++ b/cpp/particles/p2p_cylinder_collisions.hpp @@ -0,0 +1,75 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ +#ifndef P2P_CYLINDER_COLLISIONS_HPP +#define P2P_CYLINDER_COLLISIONS_HPP + +#include <cstring> + +template <class real_number, class partsize_t> +class p2p_cylinder_collisions{ + long int collision_counter; + +public: + p2p_cylinder_collisions() : collision_counter(0){} + + // Copy constructor use a counter set to zero + p2p_cylinder_collisions(const p2p_cylinder_collisions&) : collision_counter(0){} + + template <int size_particle_rhs> + void init_result_array(real_number /*rhs*/[], const partsize_t /*nbParticles*/) const{ + } + + template <int size_particle_rhs> + void reduce_particles_rhs(real_number /*rhs_dst*/[], const real_number /*rhs_src*/[], const partsize_t /*nbParticles*/) const{ + } + + template <int size_particle_positions, int size_particle_rhs> + void compute_interaction(const real_number /*pos_part1*/[], real_number /*rhs_part1*/[], + const real_number /*pos_part2*/[], real_number /*rhs_part2*/[], + const real_number /*dist_pow2*/, const real_number /*cutoff*/, + const real_number /*xshift_coef*/, const real_number /*yshift_coef*/, const real_number /*zshift_coef*/) const{ + // TODO check the orientation, etc.... + if(check the test here){ + collision_counter += 1; + } + } + + void merge(const p2p_cylinder_collisions& other){ + collision_counter += other.collision_counter; + } + + constexpr static bool isEnable() { + return false; + } + + long int get_collision_counter() const{ + return collision_counter; + } + + void reset_collision_counter() const{ + collision_counter = 0; + } +}; + + +#endif // P2P_CYLINDER_COLLISIONS_HPP diff --git a/cpp/particles/p2p_distr_mpi.hpp b/cpp/particles/p2p_distr_mpi.hpp index 7ab3a8b3..64b7b605 100644 --- a/cpp/particles/p2p_distr_mpi.hpp +++ b/cpp/particles/p2p_distr_mpi.hpp @@ -530,9 +530,15 @@ public: lock_free_bool_array cells_locker(512); + std::vector<std::unique_ptr<computer_class>> computer_for_all_threads(omp_get_num_threads()-1); + for(int idxThread = 1 ; idxThread < omp_get_num_threads() ; ++idxThread){ + computer_for_all_threads[idxThread-1].reset(new computer_class(in_computer)); + } + TIMEZONE_OMP_INIT_PREPARALLEL(omp_get_max_threads()) #pragma omp parallel default(shared) { + computer_class& computer_thread = (omp_get_thread_num() == 0 ? in_computer : *computer_for_all_threads[omp_get_thread_num()-1]); #pragma omp master { while(mpiRequests.size()){ @@ -584,7 +590,7 @@ public: assert(descriptor.toCompute != nullptr); descriptor.results.reset(new real_number[NbParticlesToReceive*size_particle_rhs]); - in_computer.template init_result_array<size_particle_rhs>(descriptor.results.get(), NbParticlesToReceive); + computer_thread.template init_result_array<size_particle_rhs>(descriptor.results.get(), NbParticlesToReceive); // Compute partsize_t idxPart = 0; @@ -622,7 +628,7 @@ public: particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z], shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]); if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ - in_computer.template compute_interaction<size_particle_positions, size_particle_rhs>( + computer_thread.template compute_interaction<size_particle_positions, size_particle_rhs>( &descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions], &descriptor.results[(idxPart+idx_p1)*size_particle_rhs], &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions], @@ -669,7 +675,7 @@ public: NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; assert(descriptor.isRecv == false); assert(descriptor.toRecvAndMerge != nullptr); - in_computer.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[particles_offset_layers[my_nb_cell_levels-descriptor.nbLevelsToExchange]*size_particle_rhs], + computer_thread.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[particles_offset_layers[my_nb_cell_levels-descriptor.nbLevelsToExchange]*size_particle_rhs], descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToExchange); delete[] descriptor.toRecvAndMerge.release(); } @@ -680,104 +686,111 @@ public: assert(whatNext.size() == 0); assert(mpiRequests.size() == 0); - // Compute self data - for(const auto& iter_cell : my_tree){ - TIMEZONE("proceed-leaf"); - const long int currenct_cell_idx = iter_cell.first; - const std::vector<std::pair<partsize_t,partsize_t>>* intervals_ptr = &iter_cell.second; + { + computer_class& computer_thread = (omp_get_thread_num() == 0 ? in_computer : *computer_for_all_threads[omp_get_thread_num()-1]); + // Compute self data + for(const auto& iter_cell : my_tree){ + TIMEZONE("proceed-leaf"); + const long int currenct_cell_idx = iter_cell.first; + const std::vector<std::pair<partsize_t,partsize_t>>* intervals_ptr = &iter_cell.second; -#pragma omp task default(shared) firstprivate(currenct_cell_idx, intervals_ptr) - { - const std::vector<std::pair<partsize_t,partsize_t>>& intervals = (*intervals_ptr); - - cells_locker.lock(currenct_cell_idx); - - for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){ - // self interval - for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){ - for(partsize_t idx_p2 = idx_p1+1 ; idx_p2 < intervals[idx_1].second ; ++idx_p2){ - const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X], - particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y], - particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z], - particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_X], - particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_Y], - particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_Z], - 0, 0, 0); - if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ - in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>( - &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], - &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], - &particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions], - &particles_current_rhs[(intervals[idx_1].first+idx_p2)*size_particle_rhs], - dist_r2, cutoff_radius_compute, 0, 0, 0); - } - } - } + #pragma omp task default(shared) firstprivate(currenct_cell_idx, intervals_ptr) + { + const std::vector<std::pair<partsize_t,partsize_t>>& intervals = (*intervals_ptr); + + cells_locker.lock(currenct_cell_idx); - // with other interval - for(size_t idx_2 = idx_1+1 ; idx_2 < intervals.size() ; ++idx_2){ + for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){ + // self interval for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){ - for(partsize_t idx_p2 = 0 ; idx_p2 < intervals[idx_2].second ; ++idx_p2){ + for(partsize_t idx_p2 = idx_p1+1 ; idx_p2 < intervals[idx_1].second ; ++idx_p2){ const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X], particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y], particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z], - particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_X], - particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y], - particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z], + particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_X], + particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_Y], + particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDXC_Z], 0, 0, 0); if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ - in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>( + computer_thread.template compute_interaction<size_particle_positions,size_particle_rhs>( &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], - &particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions], - &particles_current_rhs[(intervals[idx_2].first+idx_p2)*size_particle_rhs], + &particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions], + &particles_current_rhs[(intervals[idx_1].first+idx_p2)*size_particle_rhs], dist_r2, cutoff_radius_compute, 0, 0, 0); } } } + + // with other interval + for(size_t idx_2 = idx_1+1 ; idx_2 < intervals.size() ; ++idx_2){ + for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){ + for(partsize_t idx_p2 = 0 ; idx_p2 < intervals[idx_2].second ; ++idx_p2){ + const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X], + particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y], + particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z], + particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_X], + particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y], + particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z], + 0, 0, 0); + if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ + computer_thread.template compute_interaction<size_particle_positions,size_particle_rhs>( + &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], + &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], + &particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions], + &particles_current_rhs[(intervals[idx_2].first+idx_p2)*size_particle_rhs], + dist_r2, cutoff_radius_compute, 0, 0, 0); + } + } + } + } } - } - const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27]; - long int neighbors_indexes[27]; - std::array<real_number,3> shift[27]; - const int nbNeighbors = my_tree.getNeighbors(currenct_cell_idx, neighbors, neighbors_indexes, shift, false); - - for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){ - // with other interval - for(int idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){ - if(currenct_cell_idx < neighbors_indexes[idx_neighbor]){ - cells_locker.lock(neighbors_indexes[idx_neighbor]); - - for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){ - for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){ - for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){ - const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X], - particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y], - particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z], - particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_X], - particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y], - particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z], - shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]); - if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ - in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>( - &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], - &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], - &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions], - &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs], - dist_r2, cutoff_radius_compute, shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]); + const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27]; + long int neighbors_indexes[27]; + std::array<real_number,3> shift[27]; + const int nbNeighbors = my_tree.getNeighbors(currenct_cell_idx, neighbors, neighbors_indexes, shift, false); + + for(size_t idx_1 = 0 ; idx_1 < intervals.size() ; ++idx_1){ + // with other interval + for(int idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){ + if(currenct_cell_idx < neighbors_indexes[idx_neighbor]){ + cells_locker.lock(neighbors_indexes[idx_neighbor]); + + for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){ + for(partsize_t idx_p1 = 0 ; idx_p1 < intervals[idx_1].second ; ++idx_p1){ + for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){ + const real_number dist_r2 = compute_distance_r2(particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_X], + particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Y], + particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions + IDXC_Z], + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_X], + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y], + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z], + shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]); + if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ + computer_thread.template compute_interaction<size_particle_positions,size_particle_rhs>( + &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], + &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], + &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions], + &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs], + dist_r2, cutoff_radius_compute, shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]); + } } } } + cells_locker.unlock(neighbors_indexes[idx_neighbor]); } - cells_locker.unlock(neighbors_indexes[idx_neighbor]); } } - } - cells_locker.unlock(currenct_cell_idx); + cells_locker.unlock(currenct_cell_idx); + } } } + + for(int idxThread = 1 ; idxThread < omp_get_num_threads() ; ++idxThread){ + in_computer.merge(*computer_for_all_threads[idxThread-1]); + } } }; diff --git a/cpp/particles/p2p_ghost_collisions.hpp b/cpp/particles/p2p_ghost_collisions.hpp new file mode 100644 index 00000000..95bdc9e1 --- /dev/null +++ b/cpp/particles/p2p_ghost_collisions.hpp @@ -0,0 +1,72 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ +#ifndef P2P_GHOST_COLLISIONS_HPP +#define P2P_GHOST_COLLISIONS_HPP + +#include <cstring> + +template <class real_number, class partsize_t> +class p2p_ghost_collisions{ + long int collision_counter; + +public: + p2p_ghost_collisions() : collision_counter(0){} + + // Copy constructor use a counter set to zero + p2p_ghost_collisions(const p2p_ghost_collisions&) : collision_counter(0){} + + template <int size_particle_rhs> + void init_result_array(real_number /*rhs*/[], const partsize_t /*nbParticles*/) const{ + } + + template <int size_particle_rhs> + void reduce_particles_rhs(real_number /*rhs_dst*/[], const real_number /*rhs_src*/[], const partsize_t /*nbParticles*/) const{ + } + + template <int size_particle_positions, int size_particle_rhs> + void compute_interaction(const real_number /*pos_part1*/[], real_number /*rhs_part1*/[], + const real_number /*pos_part2*/[], real_number /*rhs_part2*/[], + const real_number /*dist_pow2*/, const real_number /*cutoff*/, + const real_number /*xshift_coef*/, const real_number /*yshift_coef*/, const real_number /*zshift_coef*/) const{ + collision_counter += 1; + } + + void merge(const p2p_ghost_collisions& other){ + collision_counter += other.collision_counter; + } + + constexpr static bool isEnable() { + return false; + } + + long int get_collision_counter() const{ + return collision_counter; + } + + void reset_collision_counter() const{ + collision_counter = 0; + } +}; + + +#endif // P2P_GHOST_COLLISIONS_HPP diff --git a/cpp/particles/particles_system.hpp b/cpp/particles/particles_system.hpp index 4981592c..cdb523bb 100644 --- a/cpp/particles/particles_system.hpp +++ b/cpp/particles/particles_system.hpp @@ -29,6 +29,7 @@ #include <array> #include "abstract_particles_system.hpp" +#include "abstract_particles_system_with_p2p.hpp" #include "particles_distr_mpi.hpp" #include "particles_output_hdf5.hpp" #include "particles_output_mpiio.hpp" @@ -41,7 +42,7 @@ template <class partsize_t, class real_number, class field_rnumber, class field_class, class interpolator_class, int interp_neighbours, int size_particle_positions, int size_particle_rhs, class p2p_computer_class, class particles_inner_computer_class> -class particles_system : public abstract_particles_system<partsize_t, real_number> { +class particles_system : public abstract_particles_system_with_p2p<partsize_t, real_number, p2p_computer_class> { static_assert(size_particle_positions >= 3, "There should be at least the positions X,Y,Z in the state"); MPI_Comm mpi_com; @@ -402,6 +403,14 @@ public: } } } + + const p2p_computer_class& getP2PComputer() const final{ + return computer_p2p; + } + + p2p_computer_class& getP2PComputer() final{ + return computer_p2p; + } }; diff --git a/cpp/particles/particles_system_builder.hpp b/cpp/particles/particles_system_builder.hpp index 6a6f4a26..dfc20dc6 100644 --- a/cpp/particles/particles_system_builder.hpp +++ b/cpp/particles/particles_system_builder.hpp @@ -140,7 +140,7 @@ template <class partsize_t, class field_rnumber, field_backend be, field_compone class particles_inner_computer_class, int size_particle_positions, int size_particle_rhs> struct particles_system_build_container { template <const int interpolation_size, const int spline_mode> - static std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>> instanciate( + static std::unique_ptr<abstract_particles_system_with_p2p<partsize_t, particles_rnumber, p2p_computer_class>> instanciate( const field<field_rnumber, be, fc>* fs_field, // (field object) const kspace<be, SMOOTH>* fs_kk, // (kspace object, contains dkx, dky, dkz) const int nsteps, // to check coherency between parameters and hdf input file (nb rhs) @@ -272,7 +272,7 @@ struct particles_system_build_container { part_sys->setParticleFileLayout(generator.getParticleFileLayout()); // Return the created particles system - return std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>(part_sys); + return std::unique_ptr<abstract_particles_system_with_p2p<partsize_t, particles_rnumber, p2p_computer_class>>(part_sys); } }; @@ -305,7 +305,7 @@ inline std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>> template <class partsize_t, class field_rnumber, field_backend be, field_components fc, class p2p_computer_class, class particles_inner_computer_class, class particles_rnumber = double> -inline std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>> particles_system_builder_with_p2p( +inline std::unique_ptr<abstract_particles_system_with_p2p<partsize_t, particles_rnumber, p2p_computer_class>> particles_system_builder_with_p2p( const field<field_rnumber, be, fc>* fs_field, // (field object) const kspace<be, SMOOTH>* fs_kk, // (kspace object, contains dkx, dky, dkz) const int nsteps, // to check coherency between parameters and hdf input file (nb rhs) @@ -319,7 +319,7 @@ inline std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>> p2p_computer_class p2p_computer, particles_inner_computer_class inner_computer, const particles_rnumber cutoff){ - return Template_double_for_if::evaluate<std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>, + return Template_double_for_if::evaluate<std::unique_ptr<abstract_particles_system_with_p2p<partsize_t, particles_rnumber, p2p_computer_class>>, int, 1, 11, 1, // interpolation_size int, 0, 3, 1, // spline_mode particles_system_build_container<partsize_t, field_rnumber,be,fc,particles_rnumber, -- GitLab