diff --git a/bfps/cpp/particles/p2p_computer.hpp b/bfps/cpp/particles/p2p_computer.hpp index eb77729fd0b9d6cdb0ad2586df3bf78696c1ef59..cc2ed2dc33c8eebc414b1e9666297f608fadf21f 100644 --- a/bfps/cpp/particles/p2p_computer.hpp +++ b/bfps/cpp/particles/p2p_computer.hpp @@ -20,9 +20,9 @@ public: } } - template <int size_particle_positions, int size_particle_rhs> - void compute_interaction(const real_number pos_part1[], real_number rhs_part1[], - const real_number pos_part2[], real_number rhs_part2[], + template <int size_particle_positions, int size_particle_data, int size_particle_rhs> + void compute_interaction(const real_number pos_part1[], const real_number data_part1[], real_number rhs_part1[], + const real_number pos_part2[], const real_number data_part2[], real_number rhs_part2[], const real_number dist_pow2, const real_number xshift_coef, const real_number yshift_coef, const real_number zshift_coef) const{ rhs_part1[0] += 1; diff --git a/bfps/cpp/particles/p2p_distr_mpi.hpp b/bfps/cpp/particles/p2p_distr_mpi.hpp index c148e2d31837575f4a3316d5238db44925b2dab7..6ed8158ae38c060c0b500aaac14381b42c5e3b39 100644 --- a/bfps/cpp/particles/p2p_distr_mpi.hpp +++ b/bfps/cpp/particles/p2p_distr_mpi.hpp @@ -31,9 +31,11 @@ protected: int destProc; int nbLevelsToExchange; bool isRecv; + bool positionsReceived; std::unique_ptr<real_number[]> toRecvAndMerge; std::unique_ptr<real_number[]> toCompute; + std::unique_ptr<real_number[]> toData; std::unique_ptr<real_number[]> results; }; @@ -41,6 +43,7 @@ protected: NOTHING_TODO, RECV_PARTICLES, COMPUTE_PARTICLES, + CHECK_PARTICLES, RELEASE_BUFFER_PARTICLES, MERGE_PARTICLES, @@ -234,10 +237,11 @@ public: return (diff_x*diff_x) + (diff_y*diff_y) + (diff_z*diff_z); } - template <class computer_class, int size_particle_positions, int size_particle_rhs> + template <class computer_class, int size_particle_positions, int size_particle_data, int size_particle_rhs> void compute_distr(computer_class& in_computer, const partsize_t current_my_nb_particles_per_partition[], real_number particles_positions[], + real_number particles_data[], real_number particles_current_rhs[], partsize_t inout_index_particles[]){ TIMEZONE("compute_distr"); @@ -313,6 +317,9 @@ public: permute_copy<real_number, size_particle_positions>(current_offset_particles_for_partition[idxPartition], current_my_nb_particles_per_partition[idxPartition], part_to_sort.data(), particles_positions, &buffer); + permute_copy<real_number, size_particle_data>(current_offset_particles_for_partition[idxPartition], + current_my_nb_particles_per_partition[idxPartition], + part_to_sort.data(), particles_data, &buffer); permute_copy<real_number, size_particle_rhs>(current_offset_particles_for_partition[idxPartition], current_my_nb_particles_per_partition[idxPartition], part_to_sort.data(), particles_current_rhs, &buffer); @@ -432,6 +439,7 @@ public: descriptor.nbLevelsToExchange = nb_levels_to_send; descriptor.nbParticlesToExchange = particles_offset_layers[my_nb_cell_levels] - particles_offset_layers[my_nb_cell_levels-nb_levels_to_send]; descriptor.isRecv = false; + descriptor.positionsReceived = false; std::cout << my_rank << " SEND" << std::endl; std::cout << ">> descriptor.destProc " << descriptor.destProc << std::endl; @@ -469,6 +477,7 @@ public: descriptor.nbLevelsToExchange = nb_levels_to_recv; descriptor.nbParticlesToExchange = -1; descriptor.isRecv = true; + descriptor.positionsReceived = false; neigDescriptors.emplace_back(std::move(descriptor)); @@ -523,6 +532,14 @@ public: descriptor.destProc, TAG_POSITION_PARTICLES, current_com, &mpiRequests.back())); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + assert(descriptor.nbParticlesToExchange*size_particle_data < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(const_cast<real_number*>(&particles_data[particles_offset_layers[my_nb_cell_levels-descriptor.nbLevelsToExchange]*size_particle_data]), + int(descriptor.nbParticlesToExchange*size_particle_data), particles_utils::GetMpiType(real_number()), + descriptor.destProc, TAG_POSITION_PARTICLES, + current_com, &mpiRequests.back())); + assert(descriptor.toRecvAndMerge == nullptr); descriptor.toRecvAndMerge.reset(new real_number[descriptor.nbParticlesToExchange*size_particle_rhs]); whatNext.emplace_back(std::pair<Action,int>{MERGE_PARTICLES, idxDescr}); @@ -584,12 +601,21 @@ public: if(NbParticlesToReceive){ // std::cout << "MPI_Irecv " << std::endl; descriptor.toCompute.reset(new real_number[NbParticlesToReceive*size_particle_positions]); - whatNext.emplace_back(std::pair<Action,int>{COMPUTE_PARTICLES, releasedAction.second}); + whatNext.emplace_back(std::pair<Action,int>{CHECK_PARTICLES, releasedAction.second}); mpiRequests.emplace_back(); assert(NbParticlesToReceive*size_particle_positions < std::numeric_limits<int>::max()); AssertMpi(MPI_Irecv(descriptor.toCompute.get(), int(NbParticlesToReceive*size_particle_positions), particles_utils::GetMpiType(real_number()), destProc, TAG_POSITION_PARTICLES, current_com, &mpiRequests.back())); + + + descriptor.toData.reset(new real_number[NbParticlesToReceive*size_particle_data]); + whatNext.emplace_back(std::pair<Action,int>{COMPUTE_PARTICLES, releasedAction.second}); + mpiRequests.emplace_back(); + assert(NbParticlesToReceive*size_particle_data < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(descriptor.toData.get(), int(NbParticlesToReceive*size_particle_data), + particles_utils::GetMpiType(real_number()), destProc, TAG_POSITION_PARTICLES, + current_com, &mpiRequests.back())); } } @@ -603,6 +629,8 @@ public: const partsize_t NbParticlesToReceive = descriptor.nbParticlesToExchange; assert(descriptor.toCompute != nullptr); + assert(descriptor.toData != nullptr); + assert(descriptor.positionsReceived == true); descriptor.results.reset(new real_number[NbParticlesToReceive*size_particle_rhs]); in_computer.template init_result_array<size_particle_rhs>(descriptor.results.get(), NbParticlesToReceive); @@ -651,10 +679,12 @@ public: particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z], shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]); if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ - in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>( + in_computer.template compute_interaction<size_particle_positions,size_particle_data, size_particle_rhs>( &descriptor.toCompute[(idxPart+idx_p1)*size_particle_positions], + &descriptor.toData[(idxPart+idx_p1)*size_particle_data], &descriptor.results[(idxPart+idx_p1)*size_particle_rhs], &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions], + &particles_data[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_data], &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs], dist_r2, shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]); } @@ -694,6 +724,15 @@ public: ////////////////////////////////////////////////////////////////////// /// Computation ////////////////////////////////////////////////////////////////////// + if(releasedAction.first == CHECK_PARTICLES){ + NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; + assert(descriptor.toCompute != nullptr); + assert(descriptor.isRecv); + descriptor.positionsReceived = true; + } + ////////////////////////////////////////////////////////////////////// + /// Computation + ////////////////////////////////////////////////////////////////////// if(releasedAction.first == RELEASE_BUFFER_PARTICLES){ NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; assert(descriptor.toCompute != nullptr); @@ -758,10 +797,12 @@ public: particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions + IDX_Z], 0, 0, 0); if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ - in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>( + in_computer.template compute_interaction<size_particle_positions,size_particle_data,size_particle_rhs>( &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], + &particles_data[(intervals[idx_1].first+idx_p1)*size_particle_data], &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], &particles_positions[(intervals[idx_1].first+idx_p2)*size_particle_positions], + &particles_data[(intervals[idx_1].first+idx_p2)*size_particle_data], &particles_current_rhs[(intervals[idx_1].first+idx_p2)*size_particle_rhs], dist_r2, 0, 0, 0); } @@ -801,10 +842,12 @@ public: particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions + IDX_Z], 0, 0, 0); if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ - in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>( + in_computer.template compute_interaction<size_particle_positions,size_particle_data,size_particle_rhs>( &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], + &particles_data[(intervals[idx_1].first+idx_p1)*size_particle_data], &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], &particles_positions[(intervals[idx_2].first+idx_p2)*size_particle_positions], + &particles_data[(intervals[idx_2].first+idx_p2)*size_particle_data], &particles_current_rhs[(intervals[idx_2].first+idx_p2)*size_particle_rhs], dist_r2, 0, 0, 0); } @@ -862,10 +905,12 @@ public: particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions + IDX_Z], shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]); if(dist_r2 < cutoff_radius_compute*cutoff_radius_compute){ - in_computer.template compute_interaction<size_particle_positions,size_particle_rhs>( + in_computer.template compute_interaction<size_particle_positions,size_particle_data,size_particle_rhs>( &particles_positions[(intervals[idx_1].first+idx_p1)*size_particle_positions], + &particles_data[(intervals[idx_1].first+idx_p1)*size_particle_data], &particles_current_rhs[(intervals[idx_1].first+idx_p1)*size_particle_rhs], &particles_positions[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_positions], + &particles_data[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_data], &particles_current_rhs[((*neighbors[idx_neighbor])[idx_2].first+idx_p2)*size_particle_rhs], dist_r2, shift[idx_neighbor][IDX_X], shift[idx_neighbor][IDX_Y], shift[idx_neighbor][IDX_Z]); }