diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c35f1c4efa464b1c9b293499d1f66405cc4f29f..9bebb964949d1e275e05e2936f6caebb1aa1db2b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -364,6 +364,7 @@ set(hpp_for_lib ${PROJECT_SOURCE_DIR}/cpp/particles/lock_free_bool_array.hpp ${PROJECT_SOURCE_DIR}/cpp/particles/p2p_computer_empty.hpp ${PROJECT_SOURCE_DIR}/cpp/particles/p2p_computer.hpp + ${PROJECT_SOURCE_DIR}/cpp/particles/p2p_ghost_collision_base.hpp ${PROJECT_SOURCE_DIR}/cpp/particles/p2p_ghost_collisions.hpp ${PROJECT_SOURCE_DIR}/cpp/particles/p2p_cylinder_collisions.hpp ${PROJECT_SOURCE_DIR}/cpp/particles/p2p_distr_mpi.hpp diff --git a/README.rst b/README.rst index 5c0206466a05279f134cdb15395a9f680d40ee71..b366b908e69bf68ddeaabd4a292d7021a30989e5 100644 --- a/README.rst +++ b/README.rst @@ -257,7 +257,7 @@ Afterwards, please run variations of the following command: .. code:: bash - python ${TURTLE REPOSITORY}/tests/DNS/test_scaling.py D \ + python ${TURTLE_REPOSITORY}/tests/DNS/test_scaling.py D \ -n 128 \ --nprocesses 4 \ --ncores 1 \ diff --git a/TurTLE/DNS.py b/TurTLE/DNS.py index d4b1a7833f61ab913046bd2e479af04feccc66dc..c0ac8328553e9c8a5e6785fef24a2150de288cb4 100644 --- a/TurTLE/DNS.py +++ b/TurTLE/DNS.py @@ -445,6 +445,7 @@ class DNS(_code): 'NSVE_Stokes_particles', 'NSVEparticles', 'static_field', + 'static_field_with_ghost_collisions', 'kraichnan_field']: assert (self.parameters['niter_todo'] % self.parameters['niter_part'] == 0) assert (self.parameters['niter_out'] % self.parameters['niter_part'] == 0) @@ -488,6 +489,8 @@ class DNS(_code): 4), dtype = np.int64) ofile['checkpoint'] = int(0) + if self.dns_type in ['static_field_with_ghost_collisions']: + ofile.create_group('statistics/collisions') if (self.dns_type in ['NSVE', 'NSVE_no_output']): return None return None @@ -646,6 +649,10 @@ class DNS(_code): 'static_field', help = 'static field with basic fluid tracers') + parser_static_field_with_ghost_collisions = subparsers.add_parser( + 'static_field_with_ghost_collisions', + help = 'static field with basic fluid tracers and ghost collisions') + parser_kraichnan_field = subparsers.add_parser( 'kraichnan_field', help = 'Kraichnan field with basic fluid tracers') @@ -673,6 +680,7 @@ class DNS(_code): 'NSVE_Stokes_particles', 'NSVEp_extra', 'static_field', + 'static_field_with_ghost_collisions', 'kraichnan_field']: eval('self.simulation_parser_arguments({0})'.format('parser_' + pp)) eval('self.job_parser_arguments({0})'.format('parser_' + pp)) @@ -749,6 +757,7 @@ class DNS(_code): 'NSVEparticles_no_output', 'NSVEp_extra_sampling', 'static_field', + 'static_field_with_ghost_collisions', 'kraichnan_field']: for k in self.NSVEp_extra_parameters.keys(): self.parameters[k] = self.NSVEp_extra_parameters[k] @@ -823,6 +832,7 @@ class DNS(_code): if self.dns_type in [ 'kraichnan_field', 'static_field', + 'static_field_with_ghost_collisions', 'NSVEparticles', 'NSVEcomplex_particles', 'NSVE_Stokes_particles', @@ -1138,6 +1148,7 @@ class DNS(_code): if self.dns_type in [ 'kraichnan_field', 'static_field', + 'static_field_with_ghost_collisions', 'NSVEparticles', 'NSVEcomplex_particles', 'NSVE_Stokes_particles', diff --git a/cpp/base.hpp b/cpp/base.hpp index 0d6e8609e971998a13399d9373cbc77915b4e99c..60fbb4168b5a091ceac476d41737c4d56543fc8a 100644 --- a/cpp/base.hpp +++ b/cpp/base.hpp @@ -43,6 +43,11 @@ inline int MOD(int a, int n) return ((a%n) + n) % n; } +template <typename T> int sgn(T val) +{ + return (T(0) < val) - (val < T(0)); +} + ///////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////// diff --git a/cpp/full_code/static_field.cpp b/cpp/full_code/static_field.cpp index 8b2feaa23c052635dabec23b116c7dbe469dcb19..268e30f7aa44cc81e0683e2e572f1020f36e3ecc 100644 --- a/cpp/full_code/static_field.cpp +++ b/cpp/full_code/static_field.cpp @@ -64,19 +64,24 @@ int static_field<rnumber>::initialize(void) this->vorticity = new field<rnumber, FFTW, THREE>( nx, ny, nz, this->comm, - fftw_planner_string_to_flag[this->fftw_plan_rigor]); + fftw_planner_string_to_flag[this->fftw_plan_rigor]); this->vorticity->real_space_representation = false; this->velocity = new field<rnumber, FFTW, THREE>( nx, ny, nz, this->comm, - fftw_planner_string_to_flag[this->fftw_plan_rigor]); + fftw_planner_string_to_flag[this->fftw_plan_rigor]); this->velocity->real_space_representation = false; - //read vorticity field + //read static vorticity field from iteration 0 + std::string checkpoint_fname = ( + std::string(this->simname) + + std::string("_checkpoint_") + + std::to_string(0) + + std::string(".h5")); this->vorticity->io( - this->get_current_fname(), + checkpoint_fname, "vorticity", - this->iteration, + 0, true); this->kk = new kspace<FFTW, SMOOTH>( this->vorticity->clayout, this->dkx, this->dky, this->dkz); @@ -96,7 +101,7 @@ int static_field<rnumber>::initialize(void) this->kk, // (kspace object, contains dkx, dky, dkz) tracers0_integration_steps, // to check coherency between parameters and hdf input file (nb rhs) (long long int)nparticles, // to check coherency between parameters and hdf input file - this->get_current_fname(), // particles input filename + this->get_current_fname(), // particles input filename std::string("/tracers0/state/") + std::to_string(this->iteration), // dataset name for initial input std::string("/tracers0/rhs/") + std::to_string(this->iteration), // dataset name for initial input tracers0_neighbours, // parameter (interpolation no neighbours) @@ -144,6 +149,7 @@ int static_field<rnumber>::write_checkpoint(void) this->ps->getLocalNbParticles(), this->iteration); this->particles_output_writer_mpi->close_file(); + this->write_iteration(); return EXIT_SUCCESS; } diff --git a/cpp/hdf5_tools.cpp b/cpp/hdf5_tools.cpp index 7e6869d84f4cb6283721f1c321d2a7aaf258401b..334c0a1f30ef56224e04b71b335fa678381961e5 100644 --- a/cpp/hdf5_tools.cpp +++ b/cpp/hdf5_tools.cpp @@ -3,6 +3,63 @@ #include <cfloat> #include <climits> +template <> hid_t hdf5_tools::hdf5_type_id<char>() +{ + return H5T_NATIVE_CHAR; +} +template <> hid_t hdf5_tools::hdf5_type_id<signed char>() +{ + return H5T_NATIVE_SCHAR; +} +template <> hid_t hdf5_tools::hdf5_type_id<unsigned char>() +{ + return H5T_NATIVE_UCHAR; +} +template <> hid_t hdf5_tools::hdf5_type_id<short>() +{ + return H5T_NATIVE_SHORT; +} +template <> hid_t hdf5_tools::hdf5_type_id<unsigned short>() +{ + return H5T_NATIVE_USHORT; +} +template <> hid_t hdf5_tools::hdf5_type_id<int>() +{ + return H5T_NATIVE_INT; +} +template <> hid_t hdf5_tools::hdf5_type_id<unsigned>() +{ + return H5T_NATIVE_UINT; +} +template <> hid_t hdf5_tools::hdf5_type_id<long>() +{ + return H5T_NATIVE_LONG; +} +template <> hid_t hdf5_tools::hdf5_type_id<unsigned long>() +{ + return H5T_NATIVE_ULONG; +} +template <> hid_t hdf5_tools::hdf5_type_id<long long>() +{ + return H5T_NATIVE_LLONG; +} +template <> hid_t hdf5_tools::hdf5_type_id<unsigned long long>() +{ + return H5T_NATIVE_ULLONG; +} +template <> hid_t hdf5_tools::hdf5_type_id<float>() +{ + return H5T_NATIVE_FLOAT; +} +template <> hid_t hdf5_tools::hdf5_type_id<double>() +{ + return H5T_NATIVE_DOUBLE; +} +template <> hid_t hdf5_tools::hdf5_type_id<long double>() +{ + return H5T_NATIVE_LDOUBLE; +} + int hdf5_tools::require_size_single_dataset(hid_t dset, int tsize) { TIMEZONE("hdf5_tools::require_size_single_dataset"); @@ -178,6 +235,36 @@ number hdf5_tools::read_value( return result; } +template <typename number> +int hdf5_tools::write_value_with_single_rank( + const hid_t group, + const std::string dset_name, + const number value) +{ + hid_t dset, mem_dtype; + if (typeid(number) == typeid(int)) + mem_dtype = H5Tcopy(H5T_NATIVE_INT); + else if (typeid(number) == typeid(double)) + mem_dtype = H5Tcopy(H5T_NATIVE_DOUBLE); + if (H5Lexists(group, dset_name.c_str(), H5P_DEFAULT)) + { + dset = H5Dopen(group, dset_name.c_str(), H5P_DEFAULT); + } + else + { + hid_t fspace; + hsize_t count[1]; + count[0] = 1; + fspace = H5Screate_simple(1, count, NULL); + dset = H5Dcreate(group, dset_name.c_str(), mem_dtype, fspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5Sclose(fspace); + } + H5Dwrite(dset, mem_dtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &value); + H5Dclose(dset); + H5Tclose(mem_dtype); + return EXIT_SUCCESS; +} + template <typename dtype> std::vector<dtype> hdf5_tools::read_vector_with_single_rank( const int myrank, @@ -244,6 +331,44 @@ std::string hdf5_tools::read_string( } } +template <class partsize_t> +int hdf5_tools::write_particle_ID_pairs_with_single_rank( + const std::vector<partsize_t> v, + const hid_t group, + const std::string dset_name) +{ + TIMEZONE("hdf5_tools::write_particle_ID_pairs_with_single_rank"); + // the vector contains pair information, so its size must be a multiple of 2 + assert((v.size() % 2) == 0); + // file space creation + hid_t fspace; + hsize_t dims[2]; + dims[0] = v.size()/2; + dims[1] = 2; + fspace = H5Screate_simple(2, dims, NULL); + // create dataset + hsize_t dset_id = H5Dcreate( + group, + dset_name.c_str(), + hdf5_tools::hdf5_type_id<partsize_t>(), + fspace, + H5P_DEFAULT, + H5P_DEFAULT, + H5P_DEFAULT); + // write data + H5Dwrite( + dset_id, + hdf5_tools::hdf5_type_id<partsize_t>(), + H5S_ALL, + H5S_ALL, + H5P_DEFAULT, + &v.front()); + // clean up + H5Dclose(dset_id); + H5Sclose(fspace); + return EXIT_SUCCESS; +} + template std::vector<int> hdf5_tools::read_vector<int>( const hid_t, @@ -280,3 +405,21 @@ double hdf5_tools::read_value<double>( const hid_t, const std::string); +template +int hdf5_tools::write_value_with_single_rank<int>( + const hid_t group, + const std::string dset_name, + const int value); + +template +int hdf5_tools::write_value_with_single_rank<double>( + const hid_t group, + const std::string dset_name, + const double value); + +template +int hdf5_tools::write_particle_ID_pairs_with_single_rank( + const std::vector<long long> v, + const hid_t group, + const std::string dset_name); + diff --git a/cpp/hdf5_tools.hpp b/cpp/hdf5_tools.hpp index 3fe4c4b7b067a617f25fc213545ad5e3e436e3b4..57a257625c5e50eef39438f85487962745893185 100644 --- a/cpp/hdf5_tools.hpp +++ b/cpp/hdf5_tools.hpp @@ -33,6 +33,9 @@ namespace hdf5_tools { + // see https://support.hdfgroup.org/HDF5/doc/H5.user/Datatypes.html + template <typename data_type> hid_t hdf5_type_id(); + int grow_single_dataset( hid_t dset, int tincrement); @@ -84,6 +87,18 @@ namespace hdf5_tools number read_value( const hid_t group, const std::string dset_name); + + template <typename number> + int write_value_with_single_rank( + const hid_t group, + const std::string dset_name, + const number value); + + template <class partsize_t> + int write_particle_ID_pairs_with_single_rank( + const std::vector<partsize_t> v, + const hid_t group, + const std::string dset_name); } #endif//HDF5_TOOLS_HPP diff --git a/cpp/particles/abstract_particles_system.hpp b/cpp/particles/abstract_particles_system.hpp index 73674ae218c00087b91114eea7ea91879773c4e3..abcb8e684c5f269f10f47f337e36260486fba931 100644 --- a/cpp/particles/abstract_particles_system.hpp +++ b/cpp/particles/abstract_particles_system.hpp @@ -27,6 +27,7 @@ #define ABSTRACT_PARTICLES_SYSTEM_HPP #include <memory> +#include <vector> //- Not generic to enable sampling begin #include "field.hpp" @@ -39,6 +40,8 @@ class abstract_particles_system { public: virtual ~abstract_particles_system(){} + virtual void delete_particles_from_indexes(const std::vector<partsize_t>& inIndexToDelete) = 0; + virtual void compute() = 0; virtual void compute_p2p() = 0; diff --git a/cpp/particles/p2p_computer.hpp b/cpp/particles/p2p_computer.hpp index 28f9f384031accd2345e6a6faf51b384ea7e5ff9..47d193e095403d329cd704e8623567c0f080a39b 100644 --- a/cpp/particles/p2p_computer.hpp +++ b/cpp/particles/p2p_computer.hpp @@ -70,7 +70,9 @@ public: } template <int size_particle_positions, int size_particle_rhs> - void compute_interaction(const real_number pos_part1[], real_number rhs_part1[], + void compute_interaction(const partsize_t /*idx_part1*/, + const real_number pos_part1[], real_number rhs_part1[], + const partsize_t /*idx_part2*/, const real_number pos_part2[], real_number rhs_part2[], const real_number dist_pow2, const real_number cutoff, const real_number /*xshift_coef*/, const real_number /*yshift_coef*/, const real_number /*zshift_coef*/) const{ diff --git a/cpp/particles/p2p_computer_empty.hpp b/cpp/particles/p2p_computer_empty.hpp index ddec0d6540d54de9df787a933a5177274dabc1cd..ce4e282c3bdb7ff4bfc479b5a0b16d106f0492c1 100644 --- a/cpp/particles/p2p_computer_empty.hpp +++ b/cpp/particles/p2p_computer_empty.hpp @@ -40,7 +40,9 @@ public: } template <int size_particle_positions, int size_particle_rhs> - void compute_interaction(const real_number /*pos_part1*/[], real_number /*rhs_part1*/[], + void compute_interaction(const partsize_t /*idx_part1*/, + const real_number /*pos_part1*/[], real_number /*rhs_part1*/[], + const partsize_t /*idx_part2*/, const real_number /*pos_part2*/[], real_number /*rhs_part2*/[], const real_number /*dist_pow2*/, const real_number /*cutoff*/, const real_number /*xshift_coef*/, const real_number /*yshift_coef*/, const real_number /*zshift_coef*/) const{ diff --git a/cpp/particles/p2p_cylinder_collisions.hpp b/cpp/particles/p2p_cylinder_collisions.hpp index fda889f9208f3b75575db1b36b1dedfc16cb3971..44736ffc88643dd5664cdcd8440f19f0ea5778d2 100644 --- a/cpp/particles/p2p_cylinder_collisions.hpp +++ b/cpp/particles/p2p_cylinder_collisions.hpp @@ -24,54 +24,138 @@ #define P2P_CYLINDER_GHOST_COLLISIONS_HPP #include <cstring> +#include <math.h> /* for sqrt, abs */ + +#include <set> +#include <utility> +#include <vector> +#include "particles/p2p_ghost_collision_base.hpp" + template <class real_number, class partsize_t> -class p2p_cylinder_ghost_collisions{ - long int collision_counter; +class p2p_cylinder_ghost_collisions: public p2p_ghost_collision_base<real_number, partsize_t> +{ + private: + // Following doubles are needed for the collision computation + double width; + double length; public: - p2p_cylinder_ghost_collisions() : collision_counter(0){} + p2p_cylinder_ghost_collisions() : width(1.),length(1.){} // Copy constructor use a counter set to zero - p2p_cylinder_ghost_collisions(const p2p_cylinder_ghost_collisions&) : collision_counter(0){} - - template <int size_particle_rhs> - void init_result_array(real_number /*rhs*/[], const partsize_t /*nbParticles*/) const{ - } - - template <int size_particle_rhs> - void reduce_particles_rhs(real_number /*rhs_dst*/[], const real_number /*rhs_src*/[], const partsize_t /*nbParticles*/) const{ - } + p2p_cylinder_ghost_collisions(const p2p_cylinder_ghost_collisions&){} template <int size_particle_positions, int size_particle_rhs> - void compute_interaction(const real_number /*pos_part1*/[], real_number /*rhs_part1*/[], - const real_number /*pos_part2*/[], real_number /*rhs_part2*/[], + void compute_interaction(const partsize_t idx_part1, + const real_number pos_part1[], real_number /*rhs_part1*/[], + const partsize_t idx_part2, + const real_number pos_part2[], real_number /*rhs_part2*/[], const real_number /*dist_pow2*/, const real_number /*cutoff*/, const real_number /*xshift_coef*/, const real_number /*yshift_coef*/, const real_number /*zshift_coef*/){ - bool cylinders_overlap = false; + double x, y, z, pq, xp, xq, t, s, x_dist, y_dist, z_dist ,min_distance, pi, pi2; - /* TODO: check if cylinders overlap or not, set `cylinders_overlap` accordingly */ + pi2 = atan(1.0)*8.0; + pi = atan(1.0)*4.0; - if(cylinders_overlap) - collision_counter += 1; + /* Relative position vector of the two particles (x,y,z)^T */ + /* complicated usage of fmod, fabs and sgn because C++ fmod does not do what it should. */ + x = std::fmod(pos_part2[0],pi2)-std::fmod(pos_part1[0],pi2); + y = std::fmod(pos_part2[1],pi2)-std::fmod(pos_part1[1],pi2); + z = std::fmod(pos_part2[2],pi2)-std::fmod(pos_part1[2],pi2); + + x = ( std::fmod( std::fabs(x) +pi ,pi2) - pi ) * sgn(x) ; + y = ( std::fmod( std::fabs(y) +pi ,pi2) - pi ) * sgn(y) ; + z = ( std::fmod( std::fabs(z) +pi ,pi2) - pi ) * sgn(z) ; + + if( sqrt(x*x+y*y+z*z) <= length ) + { + + /* p and q are the orientation vectors of the first and second particles. */ + /* pq, xp, xq are scalar products of these vectors with x, relative position */ + pq = pos_part1[3] * pos_part2[3] + pos_part1[4] * pos_part2[4] + pos_part1[5] * pos_part2[5]; + xp = x * pos_part1[3] + y * pos_part1[4] + z * pos_part1[5]; + xq = x * pos_part2[3] + y * pos_part2[4] + z * pos_part2[5]; + /* t and s parametrize the two rods. Find min distance: */ + assert(this->length > 0); + t = 2.0/(this->length*(pq*pq-1.0))*(-xp+pq*xq); + s = 2.0/(this->length*(pq*pq-1.0))*(-pq*xp+xq); + /* Test if -1<s<1 and -1<t<1 */ + if( abs(t)<=1.0 and abs(s)<=1.0 ) { + /* Get minimal distance in case of both t and s in {-1,1}. Else: check edges */ + x_dist = this->length*0.5*t*pos_part1[3]-this->length*0.5*s*pos_part2[3]-x; + y_dist = this->length*0.5*t*pos_part1[4]-this->length*0.5*s*pos_part2[4]-y; + z_dist = this->length*0.5*t*pos_part1[5]-this->length*0.5*s*pos_part2[5]-z; + min_distance = sqrt(x_dist*x_dist+y_dist*y_dist+z_dist*z_dist); + } + else + { + min_distance = this->length; + /* t fixed at 1, find min along s */ + t = 1.0; + s = t*pq-2.0/this->length*xq; + if( abs(s)>1.0 ) { s = s / abs(s) ;} + x_dist = this->length*0.5*t*pos_part1[3]-this->length*0.5*s*pos_part2[3]-x; + y_dist = this->length*0.5*t*pos_part1[4]-this->length*0.5*s*pos_part2[4]-y; + z_dist = this->length*0.5*t*pos_part1[5]-this->length*0.5*s*pos_part2[5]-z; + min_distance = fmin( sqrt(x_dist*x_dist+y_dist*y_dist+z_dist*z_dist), min_distance ); + /* t fixed at -1, find min along s */ + t = -1.0; + s = t*pq-2.0/this->length*xq; + if( abs(s)>1.0 ) { s = s / abs(s) ;} + x_dist = this->length*0.5*t*pos_part1[3]-this->length*0.5*s*pos_part2[3]-x; + y_dist = this->length*0.5*t*pos_part1[4]-this->length*0.5*s*pos_part2[4]-y; + z_dist = this->length*0.5*t*pos_part1[5]-this->length*0.5*s*pos_part2[5]-z; + min_distance = fmin( sqrt(x_dist*x_dist+y_dist*y_dist+z_dist*z_dist), min_distance ); + /* s fixed at 1, find min along t */ + s = 1.0; + t = s*pq+2.0/this->length*xp; + if( abs(t)>1.0 ) { t = t / abs(t) ;} + x_dist = this->length*0.5*t*pos_part1[3]-this->length*0.5*s*pos_part2[3]-x; + y_dist = this->length*0.5*t*pos_part1[4]-this->length*0.5*s*pos_part2[4]-y; + z_dist = this->length*0.5*t*pos_part1[5]-this->length*0.5*s*pos_part2[5]-z; + min_distance = fmin( sqrt(x_dist*x_dist+y_dist*y_dist+z_dist*z_dist), min_distance ); + /* s fixed at -1, find min along t */ + s = -1.0; + t = s*pq+2.0/this->length*xp; + if( abs(t)>1.0 ) { t = t / abs(t) ;} + x_dist = this->length*0.5*t*pos_part1[3]-this->length*0.5*s*pos_part2[3]-x; + y_dist = this->length*0.5*t*pos_part1[4]-this->length*0.5*s*pos_part2[4]-y; + z_dist = this->length*0.5*t*pos_part1[5]-this->length*0.5*s*pos_part2[5]-z; + min_distance = fmin( sqrt(x_dist*x_dist+y_dist*y_dist+z_dist*z_dist), min_distance ); + } + /* If cylinders overlap count it as a collision */ + if( min_distance<=width ){ + std::pair <partsize_t, partsize_t> single_collision_pair(idx_part1, idx_part2); + this->collision_pairs_local.insert(single_collision_pair); + //DEBUG_MSG("inside compute interaction idx_part1 = %ld and idx_part2 = %ld\n", idx_part1, idx_part2); + assert(idx_part1!=idx_part2); + } + + } } - void merge(const p2p_cylinder_ghost_collisions& other){ - collision_counter += other.collision_counter; + void set_width(const double WIDTH) + { + this->width = WIDTH; } - constexpr static bool isEnable() { - return true; + double get_width() + { + return this->width; } - long int get_collision_counter() const{ - return collision_counter; + void set_length(const double LENGTH) + { + this->length = LENGTH; } - void reset_collision_counter(){ - collision_counter = 0; + double get_length() + { + return this->length; } }; #endif // P2P_CYLINDER_GHOST_COLLISIONS_HPP + diff --git a/cpp/particles/p2p_distr_mpi.hpp b/cpp/particles/p2p_distr_mpi.hpp index e5b5afac61f23b4a3f1631204f63ca85fbc68d77..e06dc894901f21b98fd8024d39d825adbceefe1f 100644 --- a/cpp/particles/p2p_distr_mpi.hpp +++ b/cpp/particles/p2p_distr_mpi.hpp @@ -49,6 +49,7 @@ protected: enum MpiTag{ TAG_NB_PARTICLES, TAG_POSITION_PARTICLES, + TAG_INDEX_PARTICLES, TAG_RESULT_PARTICLES, }; @@ -57,10 +58,12 @@ protected: int destProc; int nbLevelsToExchange; bool isRecv; + int nbReceived; std::unique_ptr<real_number[]> toRecvAndMerge; std::unique_ptr<real_number[]> toCompute; std::unique_ptr<real_number[]> results; + std::unique_ptr<partsize_t[]> indexes; }; enum Action{ @@ -274,7 +277,7 @@ public: void compute_distr(computer_class& in_computer, const partsize_t current_my_nb_particles_per_partition[], real_number particles_positions[], - real_number particles_current_rhs[], + std::unique_ptr<real_number[]> particles_current_rhs[], const int nb_rhs, partsize_t inout_index_particles[]){ TIMEZONE("compute_distr"); @@ -330,19 +333,34 @@ public: }); // Permute array using buffer + // permute 4th function parameter using buffer, based on information in first 3 parameters std::vector<unsigned char> buffer; - permute_copy<real_number, size_particle_positions>(current_offset_particles_for_partition[idxPartition], - current_my_nb_particles_per_partition[idxPartition], - part_to_sort.data(), particles_positions, &buffer); - permute_copy<real_number, size_particle_rhs>(current_offset_particles_for_partition[idxPartition], - current_my_nb_particles_per_partition[idxPartition], - part_to_sort.data(), particles_current_rhs, &buffer); - permute_copy<partsize_t, 1>(current_offset_particles_for_partition[idxPartition], - current_my_nb_particles_per_partition[idxPartition], - part_to_sort.data(), inout_index_particles, &buffer); - permute_copy<long int, 1>(current_offset_particles_for_partition[idxPartition], - current_my_nb_particles_per_partition[idxPartition], - part_to_sort.data(), particles_coord.get(), &buffer); + permute_copy<real_number, size_particle_positions>( + current_offset_particles_for_partition[idxPartition], + current_my_nb_particles_per_partition[idxPartition], + part_to_sort.data(), + particles_positions, + &buffer); + for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ + permute_copy<real_number, size_particle_rhs>( + current_offset_particles_for_partition[idxPartition], + current_my_nb_particles_per_partition[idxPartition], + part_to_sort.data(), + particles_current_rhs[idx_rhs].get(), + &buffer); + } + permute_copy<partsize_t, 1>( + current_offset_particles_for_partition[idxPartition], + current_my_nb_particles_per_partition[idxPartition], + part_to_sort.data(), + inout_index_particles, + &buffer); + permute_copy<long int, 1>( + current_offset_particles_for_partition[idxPartition], + current_my_nb_particles_per_partition[idxPartition], + part_to_sort.data(), + particles_coord.get(), + &buffer); } } @@ -416,6 +434,7 @@ public: descriptor.nbLevelsToExchange = nb_levels_to_send; descriptor.nbParticlesToExchange = particles_offset_layers[my_nb_cell_levels] - particles_offset_layers[my_nb_cell_levels-nb_levels_to_send]; descriptor.isRecv = false; + descriptor.nbReceived = 0; neigDescriptors.emplace_back(std::move(descriptor)); @@ -438,6 +457,7 @@ public: descriptor.nbLevelsToExchange = nb_levels_to_recv; descriptor.nbParticlesToExchange = -1; descriptor.isRecv = true; + descriptor.nbReceived = 0; neigDescriptors.emplace_back(std::move(descriptor)); @@ -479,6 +499,14 @@ public: descriptor.destProc, TAG_POSITION_PARTICLES, current_com, &mpiRequests.back())); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + assert(descriptor.nbParticlesToExchange*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(const_cast<partsize_t*>(&inout_index_particles[particles_offset_layers[my_nb_cell_levels-descriptor.nbLevelsToExchange]]), + int(descriptor.nbParticlesToExchange), particles_utils::GetMpiType(partsize_t()), + descriptor.destProc, TAG_INDEX_PARTICLES, + current_com, &mpiRequests.back())); + assert(descriptor.toRecvAndMerge == nullptr); descriptor.toRecvAndMerge.reset(new real_number[descriptor.nbParticlesToExchange*size_particle_rhs]); whatNext.emplace_back(std::pair<Action,int>{MERGE_PARTICLES, idxDescr}); @@ -567,6 +595,7 @@ public: const partsize_t NbParticlesToReceive = descriptor.nbParticlesToExchange; assert(NbParticlesToReceive != -1); assert(descriptor.toCompute == nullptr); + assert(descriptor.indexes == nullptr); if(NbParticlesToReceive){ descriptor.toCompute.reset(new real_number[NbParticlesToReceive*size_particle_positions]); @@ -576,6 +605,14 @@ public: AssertMpi(MPI_Irecv(descriptor.toCompute.get(), int(NbParticlesToReceive*size_particle_positions), particles_utils::GetMpiType(real_number()), destProc, TAG_POSITION_PARTICLES, current_com, &mpiRequests.back())); + + descriptor.indexes.reset(new partsize_t[NbParticlesToReceive]); + whatNext.emplace_back(std::pair<Action,int>{COMPUTE_PARTICLES, releasedAction.second}); + mpiRequests.emplace_back(); + assert(NbParticlesToReceive*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(descriptor.indexes.get(), int(NbParticlesToReceive), + particles_utils::GetMpiType(partsize_t()), destProc, TAG_INDEX_PARTICLES, + current_com, &mpiRequests.back())); } } @@ -583,80 +620,100 @@ public: /// Computation ////////////////////////////////////////////////////////////////////// if(releasedAction.first == COMPUTE_PARTICLES){ - TIMEZONE("compute-particles"); NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; - assert(descriptor.isRecv); - const partsize_t NbParticlesToReceive = descriptor.nbParticlesToExchange; - - assert(descriptor.toCompute != nullptr); - descriptor.results.reset(new real_number[NbParticlesToReceive*size_particle_rhs]); - computer_thread.template init_result_array<size_particle_rhs>(descriptor.results.get(), NbParticlesToReceive); - - // Compute - partsize_t idxPart = 0; - while(idxPart != NbParticlesToReceive){ - const long int current_cell_idx = get_cell_idx(descriptor.toCompute[idxPart*size_particle_positions + IDXC_X], - descriptor.toCompute[idxPart*size_particle_positions + IDXC_Y], - descriptor.toCompute[idxPart*size_particle_positions + IDXC_Z]); - partsize_t nb_parts_in_cell = 1; - while(idxPart+nb_parts_in_cell != NbParticlesToReceive - && current_cell_idx == get_cell_idx(descriptor.toCompute[(idxPart+nb_parts_in_cell)*size_particle_positions + IDXC_X], - descriptor.toCompute[(idxPart+nb_parts_in_cell)*size_particle_positions + IDXC_Y], - descriptor.toCompute[(idxPart+nb_parts_in_cell)*size_particle_positions + IDXC_Z])){ - nb_parts_in_cell += 1; - } + descriptor.nbReceived += 1; + assert(descriptor.nbReceived <= 2); + + if(descriptor.nbReceived == 2){ + TIMEZONE("compute-particles"); + NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; + assert(descriptor.isRecv); + const partsize_t NbParticlesToReceive = descriptor.nbParticlesToExchange; + + assert(descriptor.toCompute != nullptr); + assert(descriptor.indexes != nullptr); + descriptor.results.reset(new real_number[NbParticlesToReceive*size_particle_rhs]); + computer_thread.template init_result_array<size_particle_rhs>(descriptor.results.get(), NbParticlesToReceive); + + // Compute + partsize_t idxPart = 0; + while(idxPart != NbParticlesToReceive){ + const long int current_cell_idx = get_cell_idx(descriptor.toCompute[idxPart*size_particle_positions + IDXC_X], + descriptor.toCompute[idxPart*size_particle_positions + IDXC_Y], + descriptor.toCompute[idxPart*size_particle_positions + IDXC_Z]); + partsize_t nb_parts_in_cell = 1; + while(idxPart+nb_parts_in_cell != NbParticlesToReceive + && current_cell_idx == get_cell_idx(descriptor.toCompute[(idxPart+nb_parts_in_cell)*size_particle_positions + IDXC_X], + descriptor.toCompute[(idxPart+nb_parts_in_cell)*size_particle_positions + IDXC_Y], + descriptor.toCompute[(idxPart+nb_parts_in_cell)*size_particle_positions + IDXC_Z])){ + nb_parts_in_cell += 1; + } - #pragma omp task default(shared) firstprivate(idxPart, nb_parts_in_cell, current_cell_idx) - { - const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27]; - long int neighbors_indexes[27]; - std::array<real_number,3> shift[27]; - const int nbNeighbors = my_tree.getNeighbors(current_cell_idx, neighbors, neighbors_indexes, shift, true); - - // with other interval - for(int idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){ - cells_locker.lock(neighbors_indexes[idx_neighbor]); - - for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){ - for(partsize_t idx_p1 = 0 ; idx_p1 < nb_parts_in_cell ; ++idx_p1){ - for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){ - const real_number dist_r2 = compute_distance_r2(descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_X], - descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_Y], - descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_Z], - particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_X], - particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y], - particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z], - shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]); - if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ - computer_thread.template compute_interaction<size_particle_positions, size_particle_rhs>( - &descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions], - &descriptor.results[(idxPart+idx_p1)*size_particle_rhs], - &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions], - &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs], - dist_r2, cutoff_radius_compute, shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]); + #pragma omp task default(shared) firstprivate(idxPart, nb_parts_in_cell, current_cell_idx) + { + const std::vector<std::pair<partsize_t,partsize_t>>* neighbors[27]; + long int neighbors_indexes[27]; + std::array<real_number,3> shift[27]; + const int nbNeighbors = my_tree.getNeighbors( + current_cell_idx, + neighbors, + neighbors_indexes, + shift, + true); + + // with other interval + for(int idx_neighbor = 0 ; idx_neighbor < nbNeighbors ; ++idx_neighbor){ + cells_locker.lock(neighbors_indexes[idx_neighbor]); + + for(size_t idx_2 = 0 ; idx_2 < (*neighbors[idx_neighbor]).size() ; ++idx_2){ + for(partsize_t idx_p1 = 0 ; idx_p1 < nb_parts_in_cell ; ++idx_p1){ + for(partsize_t idx_p2 = 0 ; idx_p2 < (*neighbors[idx_neighbor])[idx_2].second ; ++idx_p2){ + const real_number dist_r2 = compute_distance_r2( + descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_X], + descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_Y], + descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions + IDXC_Z], + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_X], + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Y], + particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDXC_Z], + shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]); + if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ + computer_thread.template compute_interaction<size_particle_positions, size_particle_rhs>( + descriptor.indexes[(idxPart+idx_p1)], + &descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions], + &descriptor.results[(idxPart+idx_p1)*size_particle_rhs], + inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)], + &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions], + &particles_current_rhs[0][((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs], + dist_r2, + cutoff_radius_compute, + shift[idx_neighbor][IDXC_X], + shift[idx_neighbor][IDXC_Y], + shift[idx_neighbor][IDXC_Z]); + } } } } - } - cells_locker.unlock(neighbors_indexes[idx_neighbor]); + cells_locker.unlock(neighbors_indexes[idx_neighbor]); + } } - } - idxPart += nb_parts_in_cell; - } + idxPart += nb_parts_in_cell; + } - #pragma omp taskwait + #pragma omp taskwait - // Send back - const int destProc = descriptor.destProc; - whatNext.emplace_back(std::pair<Action,int>{RELEASE_BUFFER_PARTICLES, releasedAction.second}); - mpiRequests.emplace_back(); - assert(NbParticlesToReceive*size_particle_rhs < std::numeric_limits<int>::max()); - AssertMpi(MPI_Isend(descriptor.results.get(), int(NbParticlesToReceive*size_particle_rhs), - particles_utils::GetMpiType(real_number()), destProc, TAG_RESULT_PARTICLES, - current_com, &mpiRequests.back())); - delete[] descriptor.toCompute.release(); + // Send back + const int destProc = descriptor.destProc; + whatNext.emplace_back(std::pair<Action,int>{RELEASE_BUFFER_PARTICLES, releasedAction.second}); + mpiRequests.emplace_back(); + assert(NbParticlesToReceive*size_particle_rhs < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(descriptor.results.get(), int(NbParticlesToReceive*size_particle_rhs), + particles_utils::GetMpiType(real_number()), destProc, TAG_RESULT_PARTICLES, + current_com, &mpiRequests.back())); + delete[] descriptor.toCompute.release(); + delete[] descriptor.indexes.release(); + } } ////////////////////////////////////////////////////////////////////// /// Release memory that was sent back @@ -675,7 +732,7 @@ public: NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; assert(descriptor.isRecv == false); assert(descriptor.toRecvAndMerge != nullptr); - computer_thread.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[particles_offset_layers[my_nb_cell_levels-descriptor.nbLevelsToExchange]*size_particle_rhs], + computer_thread.template reduce_particles_rhs<size_particle_rhs>(&particles_current_rhs[0][particles_offset_layers[my_nb_cell_levels-descriptor.nbLevelsToExchange]*size_particle_rhs], descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToExchange); delete[] descriptor.toRecvAndMerge.release(); } @@ -713,10 +770,12 @@ public: 0, 0, 0); if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ computer_thread.template compute_interaction<size_particle_positions,size_particle_rhs>( + inout_index_particles[(intervals[idx_1].first+idx_p1)], &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], - &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], + &particles_current_rhs[0][(intervals[idx_1].first+idx_p1)*size_particle_rhs], + inout_index_particles[(intervals[idx_1].first+idx_p2)], &particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions], - &particles_current_rhs[(intervals[idx_1].first+idx_p2)*size_particle_rhs], + &particles_current_rhs[0][(intervals[idx_1].first+idx_p2)*size_particle_rhs], dist_r2, cutoff_radius_compute, 0, 0, 0); } } @@ -735,10 +794,12 @@ public: 0, 0, 0); if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ computer_thread.template compute_interaction<size_particle_positions,size_particle_rhs>( + inout_index_particles[(intervals[idx_1].first+idx_p1)], &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], - &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], + &particles_current_rhs[0][(intervals[idx_1].first+idx_p1)*size_particle_rhs], + inout_index_particles[(intervals[idx_2].first+idx_p2)], &particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions], - &particles_current_rhs[(intervals[idx_2].first+idx_p2)*size_particle_rhs], + &particles_current_rhs[0][(intervals[idx_2].first+idx_p2)*size_particle_rhs], dist_r2, cutoff_radius_compute, 0, 0, 0); } } @@ -769,10 +830,12 @@ public: shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]); if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ computer_thread.template compute_interaction<size_particle_positions,size_particle_rhs>( + inout_index_particles[(intervals[idx_1].first+idx_p1)], &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], - &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], + &particles_current_rhs[0][(intervals[idx_1].first+idx_p1)*size_particle_rhs], + inout_index_particles[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)], &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions], - &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs], + &particles_current_rhs[0][((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs], dist_r2, cutoff_radius_compute, shift[idx_neighbor][IDXC_X], shift[idx_neighbor][IDXC_Y], shift[idx_neighbor][IDXC_Z]); } } diff --git a/cpp/particles/p2p_ghost_collision_base.hpp b/cpp/particles/p2p_ghost_collision_base.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6b950a22a5212142db5ff48b088952c6b6d63dbf --- /dev/null +++ b/cpp/particles/p2p_ghost_collision_base.hpp @@ -0,0 +1,217 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ +#ifndef P2P_GHOST_COLLISION_BASE_HPP +#define P2P_GHOST_COLLISION_BASE_HPP + +#include <cstring> +#include <set> +#include <utility> +#include <vector> +template < class partsize_t> +std::vector<partsize_t> pairs2vec(std::set <std::pair <partsize_t,partsize_t>> ID_pairs){ + std::vector<partsize_t> v(2*ID_pairs.size()); + int i = 0; + for(auto p: ID_pairs) + { + v[2*i] = p.first; + v[2*i+1] = p.second; + i++; + } + return v; +} + +template < class partsize_t> +std::set <std::pair <partsize_t,partsize_t>> vec2pairs(std::vector<partsize_t> v){ + std::set <std::pair <partsize_t,partsize_t>> ID_pairs; + assert(v.size()%2 == 0); + for(int i=0; i < int(v.size())/2; i++) + { + //DEBUG_MSG((std::to_string(v[2*i])+" "+std::to_string(v[2*i+1])+"\n").c_str()); + std::pair <partsize_t, partsize_t> single_collision_pair(v[2*i],v[2*i+1]); + ID_pairs.insert(single_collision_pair); + } + return ID_pairs; +} + +template <class partsize_t> +void print_pair_vec(std::vector<partsize_t> vec) +{ + assert(vec.size() % 2 == 0); + for (int i = 0; i < int(vec.size())/2; i++) + DEBUG_MSG("in print_pair_vec, pair %d is (%ld,%ld)\n", i, vec[2*i], vec[2*i+1]); +} + +template <class partsize_t> +void print_pair_set(std::set<std::pair<partsize_t, partsize_t>> ID_pairs) +{ + int i = 0; + for (auto p: ID_pairs) + { + DEBUG_MSG("in print_pair_set, pair %d is (%ld,%ld)\n", i, p.first, p.second); + i++; + } +} + + +template <class real_number, class partsize_t> +class p2p_ghost_collision_base +{ + protected: + bool synchronisation; + std::set <std::pair <partsize_t, partsize_t>> collision_pairs_local; + std::set <std::pair <partsize_t, partsize_t>> collision_pairs_global; + std::vector <partsize_t> collision_pairs_vec_global; + +public: + p2p_ghost_collision_base(): synchronisation(false) {} + + // Copy constructor use a counter set to zero + p2p_ghost_collision_base(const p2p_ghost_collision_base&): synchronisation(false) {} + + template <int size_particle_rhs> + void init_result_array(real_number /*rhs*/[], const partsize_t /*nbParticles*/) const{ + } + + template <int size_particle_rhs> + void reduce_particles_rhs(real_number /*rhs_dst*/[], const real_number /*rhs_src*/[], const partsize_t /*nbParticles*/) const{ + } + + // the following declaration is illegal under the C++ standard, + // but it should be there. therefore I'm leaving it here in spirit. + // + // template <int size_particle_positions, int size_particle_rhs> + //virtual void compute_interaction(const partsize_t idx_part1, + // const real_number /*pos_part1*/[], + // real_number /*rhs_part1*/[], + // const partsize_t idx_part2, + // const real_number /*pos_part2*/[], + // real_number /*rhs_part2*/[], + // const real_number /*dist_pow2*/, + // const real_number /*cutoff*/, + // const real_number /*xshift_coef*/, + // const real_number /*yshift_coef*/, + // const real_number /*zshift_coef*/) = 0; + + void merge(const p2p_ghost_collision_base& other){ + std::set <std::pair <partsize_t, partsize_t>> new_collision_pairs; + std::set_union(collision_pairs_local.begin(), collision_pairs_local.end(), + other.collision_pairs_local.begin(), other.collision_pairs_local.end(), + std::inserter(new_collision_pairs, new_collision_pairs.begin())); + collision_pairs_local = new_collision_pairs; + } + + void MPI_merge(MPI_Comm comm, int myrank, int nprocs) { + ///collect collision pairs + std::vector <partsize_t> collision_pairs_vec_local = pairs2vec(this->collision_pairs_local); + DEBUG_MSG("transformed local collision pairs\n"); + if(this->collision_pairs_local.size()>0) + { + DEBUG_MSG(("local IDs of vec: "+std::to_string(collision_pairs_vec_local[0])+" "+std::to_string(collision_pairs_vec_local[1])+"\n").c_str()); + } + int * recvcounts; + int vec_len_local = collision_pairs_vec_local.size(); + int * displ; + if (myrank == 0) + { + recvcounts = new int[nprocs]; + } + MPI_Gather(&vec_len_local, 1, MPI_INT, + recvcounts, 1, MPI_INT, + 0, comm); + DEBUG_MSG("after gather\n"); + if (myrank == 0) + { + int nbElements; + displ = new int[nprocs]; + displ[0]=0; + for (int i=1; i<nprocs; i++) + { + displ[i]=displ[i-1]+recvcounts[i-1]; + } + nbElements = displ[nprocs-1] + recvcounts[nprocs-1]; + this->collision_pairs_vec_global.resize(nbElements); + DEBUG_MSG(("nbElements: "+std::to_string(nbElements)+"\n").c_str()); + } + MPI_Gatherv(&collision_pairs_vec_local.front(), + vec_len_local, + MPI_LONG_LONG_INT, + &this->collision_pairs_vec_global.front(), + recvcounts, + displ, + MPI_LONG_LONG_INT, + 0, + comm); + if(myrank == 0) + { + this->collision_pairs_global = vec2pairs(this->collision_pairs_vec_global); + print_pair_vec(this->collision_pairs_vec_global); + print_pair_set(this->collision_pairs_global); + } + this->synchronisation = true; + // free root rank memory + if (myrank == 0) + { + delete[] recvcounts; + delete[] displ; + } + } + + constexpr static bool isEnable() { + return true;//interaction has to be turned off here by hand. + } + + //get_collision_counter() will only give the correct result on rank 0 + long int get_collision_counter(MPI_Comm comm, int myrank, int nprocs) { + if(synchronisation==false){ + MPI_merge(comm, myrank, nprocs); + } + return this->collision_pairs_global.size(); + } + + //get_collision_pairs() will only give global pairs on rank 0 + std::set <std::pair <partsize_t,partsize_t>> get_collision_pairs(MPI_Comm comm, int myrank, int nprocs) { + if(synchronisation==false){ + MPI_merge(comm, myrank, nprocs); + } + return this->collision_pairs_global; + } + + std::vector <partsize_t> get_collision_pairs_vec(MPI_Comm comm, int myrank, int nprocs) { + if(synchronisation==false){ + MPI_merge(comm, myrank, nprocs); + } + return this->collision_pairs_vec_global; + } + + + void reset_collision_pairs(){ + this->collision_pairs_local.clear(); + this->collision_pairs_global.clear(); + this->collision_pairs_vec_global.clear(); + this->synchronisation = false; + } +}; + + +#endif // P2P_GHOST_COLLISION_BASE_HPP + diff --git a/cpp/particles/p2p_ghost_collisions.hpp b/cpp/particles/p2p_ghost_collisions.hpp index 997d8fbdf19bb256fbb3e3fc6f73cfdfd32c63cf..82662695af316fd7bbce7e311feec2c9c1c48ee0 100644 --- a/cpp/particles/p2p_ghost_collisions.hpp +++ b/cpp/particles/p2p_ghost_collisions.hpp @@ -24,49 +24,40 @@ #define P2P_GHOST_COLLISIONS_HPP #include <cstring> +#include <set> +#include <utility> +#include <vector> +#include "particles/p2p_ghost_collision_base.hpp" -template <class real_number, class partsize_t> -class p2p_ghost_collisions{ - long int collision_counter; +template <class real_number, class partsize_t> +class p2p_ghost_collisions: public p2p_ghost_collision_base<real_number, partsize_t> +{ public: - p2p_ghost_collisions() : collision_counter(0){} + p2p_ghost_collisions(){} // Copy constructor use a counter set to zero - p2p_ghost_collisions(const p2p_ghost_collisions&) : collision_counter(0){} - - template <int size_particle_rhs> - void init_result_array(real_number /*rhs*/[], const partsize_t /*nbParticles*/) const{ - } - - template <int size_particle_rhs> - void reduce_particles_rhs(real_number /*rhs_dst*/[], const real_number /*rhs_src*/[], const partsize_t /*nbParticles*/) const{ - } + p2p_ghost_collisions(const p2p_ghost_collisions&){} template <int size_particle_positions, int size_particle_rhs> - void compute_interaction(const real_number /*pos_part1*/[], real_number /*rhs_part1*/[], - const real_number /*pos_part2*/[], real_number /*rhs_part2*/[], - const real_number /*dist_pow2*/, const real_number /*cutoff*/, - const real_number /*xshift_coef*/, const real_number /*yshift_coef*/, const real_number /*zshift_coef*/){ - collision_counter += 1; - } - - void merge(const p2p_ghost_collisions& other){ - collision_counter += other.collision_counter; - } - - constexpr static bool isEnable() { - return true; - } - - long int get_collision_counter() const{ - return collision_counter; - } - - void reset_collision_counter(){ - collision_counter = 0; + void compute_interaction(const partsize_t idx_part1, + const real_number /*pos_part1*/[], + real_number /*rhs_part1*/[], + const partsize_t idx_part2, + const real_number /*pos_part2*/[], + real_number /*rhs_part2*/[], + const real_number /*dist_pow2*/, + const real_number /*cutoff*/, + const real_number /*xshift_coef*/, + const real_number /*yshift_coef*/, + const real_number /*zshift_coef*/){ + std::pair <partsize_t, partsize_t> single_collision_pair(idx_part1, idx_part2); + this->collision_pairs_local.insert(single_collision_pair); + //DEBUG_MSG("inside compute interaction idx_part1 = %ld and idx_part2 = %ld\n", idx_part1, idx_part2); + assert(idx_part1!=idx_part2); } }; #endif // P2P_GHOST_COLLISIONS_HPP + diff --git a/cpp/particles/p2p_merge_collisions.hpp b/cpp/particles/p2p_merge_collisions.hpp new file mode 100644 index 0000000000000000000000000000000000000000..96fff9c2d7f6fa9f23d21ff346f1b8b819c9c1d5 --- /dev/null +++ b/cpp/particles/p2p_merge_collisions.hpp @@ -0,0 +1,76 @@ +/****************************************************************************** +* * +* Copyright 2019 Max Planck Institute for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +******************************************************************************/ +#ifndef P2P_MERGE_COLLISIONS_HPP +#define P2P_MERGE_COLLISIONS_HPP + +#include <cstring> + +template <class real_number, class partsize_t> +class p2p_merge_collisions{ + long int collision_counter; + + std::vector<partsize_t> mergedParticles; + +public: + p2p_merge_collisions() {} + + // Copy constructor use a counter set to zero + p2p_merge_collisions(const p2p_merge_collisions&){} + + template <int size_particle_rhs> + void init_result_array(real_number /*rhs*/[], const partsize_t /*nbParticles*/) const{ + } + + template <int size_particle_rhs> + void reduce_particles_rhs(real_number /*rhs_dst*/[], const real_number /*rhs_src*/[], const partsize_t /*nbParticles*/) const{ + } + + template <int size_particle_positions, int size_particle_rhs> + void compute_interaction(const partsize_t idx_part1, + const real_number /*pos_part1*/[], real_number /*rhs_part1*/[], + const partsize_t idx_part2, + const real_number /*pos_part2*/[], real_number /*rhs_part2*/[], + const real_number /*dist_pow2*/, const real_number /*cutoff*/, + const real_number /*xshift_coef*/, const real_number /*yshift_coef*/, const real_number /*zshift_coef*/){ + mergedParticles.emplace_back(std::max(idx_part1,idx_part2)); + } + + void merge(const p2p_merge_collisions& other){ + collision_counter.insert(collision_counter.end(), other.collision_counter.begin(), other.collision_counter.end()); + } + + constexpr static bool isEnable() { + return true; + } + + auto& get_merge_list() const{ + return collision_counter; + } + + void reset_merge_list(){ + mergedParticles.clear(); + } +}; + + +#endif // P2P_GHOST_COLLISIONS_HPP diff --git a/cpp/particles/particles_system.hpp b/cpp/particles/particles_system.hpp index 61bc35402dc384af8498bac1f674418373a46987..9e29861284f362b2d2e7a5ba412faf8a51b73a14 100644 --- a/cpp/particles/particles_system.hpp +++ b/cpp/particles/particles_system.hpp @@ -27,6 +27,8 @@ #define PARTICLES_SYSTEM_HPP #include <array> +#include <set> +#include <algorithm> #include "abstract_particles_system.hpp" #include "abstract_particles_system_with_p2p.hpp" @@ -72,7 +74,7 @@ class particles_system : public abstract_particles_system_with_p2p<partsize_t, r std::unique_ptr<real_number[]> my_particles_positions; std::unique_ptr<partsize_t[]> my_particles_positions_indexes; partsize_t my_nb_particles; - const partsize_t total_nb_particles; + partsize_t total_nb_particles; std::vector<std::unique_ptr<real_number[]>> my_particles_rhs; std::vector<hsize_t> particle_file_layout; @@ -161,6 +163,106 @@ public: } } + void delete_particles_from_indexes(const std::vector<partsize_t>& inIndexToDelete) final { + // We consider that the given indexes are here or in our neighbors, + // so we exchange them + int my_rank; + AssertMpi(MPI_Comm_rank(mpi_com, &my_rank)); + int nb_processes; + AssertMpi(MPI_Comm_size(mpi_com, &nb_processes)); + + std::set<partsize_t> setIndexToDelete(inIndexToDelete.begin(), inIndexToDelete.end()); + + if(nb_processes > 1){ + const int TopToBottom = 0; + const int BottomToTop = 1; + + partsize_t nbIndexesFromTop = 0; + partsize_t nbIndexesFromBottom = 0; + partsize_t myNbIndexes = inIndexToDelete.size(); + { + MPI_Request requests[4]; + AssertMpi(MPI_Isend(&myNbIndexes, 1, particles_utils::GetMpiType(partsize_t()), + (my_rank+1)%nb_processes, BottomToTop, mpi_com, &requests[0])); + AssertMpi(MPI_Isend(&myNbIndexes, 1, particles_utils::GetMpiType(partsize_t()), + (my_rank+nb_processes-1)%nb_processes, TopToBottom, mpi_com, &requests[1])); + + AssertMpi(MPI_Irecv(&nbIndexesFromTop, 1, particles_utils::GetMpiType(partsize_t()), + (my_rank+1)%nb_processes, TopToBottom, mpi_com, &requests[2])); + AssertMpi(MPI_Irecv(&nbIndexesFromBottom, 1, particles_utils::GetMpiType(partsize_t()), + (my_rank+nb_processes-1)%nb_processes, BottomToTop, mpi_com, &requests[3])); + + AssertMpi(MPI_Waitall(4, requests, MPI_STATUSES_IGNORE)); + } + { + MPI_Request requests[4]; + int nbRequests = 0; + + if(myNbIndexes){ + AssertMpi(MPI_Isend(&inIndexToDelete[0], myNbIndexes, particles_utils::GetMpiType(partsize_t()), + (my_rank+1)%nb_processes, BottomToTop, mpi_com, &requests[nbRequests++])); + AssertMpi(MPI_Isend(&inIndexToDelete[0], myNbIndexes, particles_utils::GetMpiType(partsize_t()), + (my_rank+nb_processes-1)%nb_processes, TopToBottom, mpi_com, &requests[nbRequests++])); + } + + std::vector<partsize_t> indexesFromTop(nbIndexesFromTop); + std::vector<partsize_t> indexesFromBottom(nbIndexesFromTop); + + if(nbIndexesFromTop){ + AssertMpi(MPI_Irecv(&indexesFromTop[0], int(nbIndexesFromTop), particles_utils::GetMpiType(partsize_t()), + (my_rank+1)%nb_processes, TopToBottom, mpi_com, &requests[nbRequests++])); + } + if(nbIndexesFromBottom){ + AssertMpi(MPI_Irecv(&indexesFromBottom[0], int(nbIndexesFromTop), particles_utils::GetMpiType(partsize_t()), + (my_rank+nb_processes-1)%nb_processes, BottomToTop, mpi_com, &requests[nbRequests++])); + } + + AssertMpi(MPI_Waitall(nbRequests, requests, MPI_STATUSES_IGNORE)); + + std::copy( indexesFromTop.begin(), indexesFromTop.end(), std::inserter( setIndexToDelete, setIndexToDelete.end() ) ); + std::copy( indexesFromBottom.begin(), indexesFromBottom.end(), std::inserter( setIndexToDelete, setIndexToDelete.end() ) ); + } + } + + if(setIndexToDelete.size()){ + partsize_t nbDeletedParticles = 0; + + for(int idxPartition = 0 ; idxPartition < partition_interval_size ; ++idxPartition){ + const partsize_t nbDeletedParticlesBefore = nbDeletedParticles; + for(partsize_t idx = current_offset_particles_for_partition[idxPartition] ; idx < current_offset_particles_for_partition[idxPartition+1] ; ++idx){ + if(setIndexToDelete.find(my_particles_positions_indexes[idx]) != setIndexToDelete.end()){ + nbDeletedParticles += 1; + } + else if(nbDeletedParticles){ + my_particles_positions_indexes[idx-nbDeletedParticles] = my_particles_positions_indexes[idx]; + + for(int idx_pos = 0 ; idx_pos < size_particle_positions ; ++idx_pos){ + my_particles_positions[(idx-nbDeletedParticles)*size_particle_positions+idx_pos] = + my_particles_positions[idx*size_particle_positions+idx_pos]; + } + + for(int idx_rhs = 0 ; idx_rhs < int(my_particles_rhs.size()) ; ++idx_rhs){ + for(int idx_val = 0 ; idx_val < size_particle_rhs ; ++idx_val){ + my_particles_rhs[idx_rhs][(idx-nbDeletedParticles)*size_particle_rhs + idx_val] = + my_particles_rhs[idx_rhs][idx*size_particle_rhs + idx_val]; + } + } + } + } + + current_offset_particles_for_partition[idxPartition] -= nbDeletedParticlesBefore; + } + + current_offset_particles_for_partition[partition_interval_size] -= nbDeletedParticles; + + my_nb_particles -= nbDeletedParticles; + assert(my_nb_particles == current_offset_particles_for_partition[partition_interval_size]); + } + + AssertMpi(MPI_Allreduce(const_cast<partsize_t*>(&my_nb_particles), &total_nb_particles, 1, + particles_utils::GetMpiType(partsize_t()), MPI_SUM, mpi_com)); + } + void compute() final { TIMEZONE("particles_system::compute"); particles_distr.template compute_distr<computer_class, field_class, size_particle_positions, size_particle_rhs>( @@ -177,7 +279,7 @@ public: TIMEZONE("particles_system::compute_p2p"); distr_p2p.template compute_distr<p2p_computer_class, size_particle_positions, size_particle_rhs>( computer_p2p, current_my_nb_particles_per_partition.get(), - my_particles_positions.get(), my_particles_rhs.front().get(), + my_particles_positions.get(), my_particles_rhs.data(), int(my_particles_rhs.size()), my_particles_positions_indexes.get()); } } @@ -426,6 +528,14 @@ public: p2p_computer_class& getP2PComputer() final{ return computer_p2p; } + + const particles_inner_computer_class& getInnerComputer() const{ + return computer_particules_inner; + } + + particles_inner_computer_class& getInnnerComputer(){ + return computer_particules_inner; + } }; diff --git a/tests/run_all_tests.sh b/tests/run_all_tests.sh index 0d1b641c37a9daf745f3d0440eeccf86dee8fd9c..227df5ee533d37d7039b70cfa41ed892f1bd3e89 100644 --- a/tests/run_all_tests.sh +++ b/tests/run_all_tests.sh @@ -6,6 +6,8 @@ turtle.test_fftw turtle.test_Parseval turtle.test_NSVEparticles +turtle DNS static_field_with_ghost_collisions --simname dumb_test_of_ghost_collisions + # test postprocessing turtle PP field_single_to_double --simname dns_nsveparticles --iter0 32 --iter1 32 turtle PP get_rfields --simname dns_nsveparticles --iter0 0 --iter1 64 --TrS2_on 1