diff --git a/bfps/cpp/particles/abstract_particles_distr.hpp b/bfps/cpp/particles/abstract_particles_distr.hpp index 1fb63cc0696c9c07c3938d1951183b426d08768d..911dd6e2901bd07679fa6c6da140b35c1d846434 100644 --- a/bfps/cpp/particles/abstract_particles_distr.hpp +++ b/bfps/cpp/particles/abstract_particles_distr.hpp @@ -78,6 +78,7 @@ protected: const std::pair<int,int> current_partition_interval; const int current_partition_size; + const std::array<size_t,3> field_grid_dim; std::unique_ptr<int[]> partition_interval_size_per_proc; std::unique_ptr<int[]> partition_interval_offset_per_proc; @@ -92,11 +93,13 @@ public: //////////////////////////////////////////////////////////////////////////// abstract_particles_distr(MPI_Comm in_current_com, - const std::pair<int,int>& in_current_partitions) + const std::pair<int,int>& in_current_partitions, + const std::array<size_t,3>& in_field_grid_dim) : current_com(in_current_com), my_rank(-1), nb_processes(-1),nb_processes_involved(-1), current_partition_interval(in_current_partitions), - current_partition_size(current_partition_interval.second-current_partition_interval.first){ + current_partition_size(current_partition_interval.second-current_partition_interval.first), + field_grid_dim(in_field_grid_dim){ AssertMpi(MPI_Comm_rank(current_com, &my_rank)); AssertMpi(MPI_Comm_size(current_com, &nb_processes)); @@ -123,6 +126,8 @@ public: for(int idx_proc_involved = 0 ; idx_proc_involved < nb_processes_involved ; ++idx_proc_involved){ assert(partition_interval_size_per_proc[idx_proc_involved] != 0); } + + assert(int(field_grid_dim[IDX_Z]) == partition_interval_offset_per_proc[nb_processes_involved]); } virtual ~abstract_particles_distr(){} @@ -486,10 +491,7 @@ public: int* nb_particles, std::unique_ptr<real_number[]>* inout_positions_particles, std::unique_ptr<real_number[]> inout_rhs_particles[], const int in_nb_rhs, - std::unique_ptr<int[]>* inout_index_particles, - const real_number mySpatialLowLimit, - const real_number mySpatialUpLimit, - const real_number spatialPartitionWidth){ + std::unique_ptr<int[]>* inout_index_particles){ TIMEZONE("redistribute"); // Some latest processes might not be involved @@ -508,7 +510,11 @@ public: // Find particles outside my interval const int nbOutLower = particles_utils::partition_extra<size_particle_positions>(&(*inout_positions_particles)[0], current_my_nb_particles_per_partition[0], [&](const real_number val[]){ - const bool isLower = val[IDX_Z] < mySpatialLowLimit; + const int partition_level = pbc_field_layer(val[IDX_Z], IDX_Z); + assert(partition_level == current_partition_interval.first + || partition_level == (current_partition_interval.first-1+int(field_grid_dim[IDX_Z]))%int(field_grid_dim[IDX_Z]) + || partition_level == (current_partition_interval.first+1)%int(field_grid_dim[IDX_Z])); + const bool isLower = partition_level == (current_partition_interval.first-1+int(field_grid_dim[IDX_Z]))%int(field_grid_dim[IDX_Z]); return isLower; }, [&](const int idx1, const int idx2){ @@ -529,7 +535,11 @@ public: &(*inout_positions_particles)[(current_offset_particles_for_partition[current_partition_size-1]+offesetOutLow)*size_particle_positions], myTotalNbParticles - (current_offset_particles_for_partition[current_partition_size-1]+offesetOutLow), [&](const real_number val[]){ - const bool isUpper = mySpatialUpLimit <= val[IDX_Z]; + const int partition_level = pbc_field_layer(val[IDX_Z], IDX_Z); + assert(partition_level == (current_partition_interval.second-1) + || partition_level == ((current_partition_interval.second-1)-1+int(field_grid_dim[IDX_Z]))%int(field_grid_dim[IDX_Z]) + || partition_level == ((current_partition_interval.second-1)+1)%int(field_grid_dim[IDX_Z])); + const bool isUpper = (partition_level == ((current_partition_interval.second-1)+1)%int(field_grid_dim[IDX_Z])); return !isUpper; }, [&](const int idx1, const int idx2){ @@ -556,200 +566,140 @@ public: std::vector<std::unique_ptr<real_number[]>> newParticlesLowRhs(in_nb_rhs); std::vector<std::unique_ptr<real_number[]>> newParticlesUpRhs(in_nb_rhs); - const bool more_than_one_thread = (omp_get_max_threads() > 1); - - TIMEZONE_OMP_INIT_PREPARALLEL(omp_get_max_threads()) - #pragma omp parallel default(shared) { - #pragma omp master - { - assert(whatNext.size() == 0); - assert(mpiRequests.size() == 0); + assert(whatNext.size() == 0); + assert(mpiRequests.size() == 0); + + whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_NB_LOW, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&nbNewFromLow, 1, MPI_INT, (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_NB_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + eventsBeforeWaitall += 1; + + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(const_cast<int*>(&nbOutLower), 1, MPI_INT, (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_NB_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); - whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_NB_LOW, -1}); + if(nbOutLower){ + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(&nbNewFromLow, 1, MPI_INT, (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_NB_PARTICLES, + AssertMpi(MPI_Isend(&(*inout_positions_particles)[0], nbOutLower*size_particle_positions, particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES, MPI_COMM_WORLD, &mpiRequests.back())); - eventsBeforeWaitall += 1; - whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(const_cast<int*>(&nbOutLower), 1, MPI_INT, (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_NB_PARTICLES, + AssertMpi(MPI_Isend(&(*inout_index_particles)[0], nbOutLower, MPI_INT, (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_INDEXES, MPI_COMM_WORLD, &mpiRequests.back())); - if(nbOutLower){ - whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); - mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(&(*inout_positions_particles)[0], nbOutLower*size_particle_positions, particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES, - MPI_COMM_WORLD, &mpiRequests.back())); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(&(*inout_index_particles)[0], nbOutLower, MPI_INT, (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_INDEXES, + AssertMpi(MPI_Isend(&inout_rhs_particles[idx_rhs][0], nbOutLower*size_particle_rhs, particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_RHS+idx_rhs, MPI_COMM_WORLD, &mpiRequests.back())); - - for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ - whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); - mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(&inout_rhs_particles[idx_rhs][0], nbOutLower*size_particle_rhs, particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_RHS+idx_rhs, - MPI_COMM_WORLD, &mpiRequests.back())); - } } + } + + whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_NB_UP, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&nbNewFromUp, 1, MPI_INT, (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_NB_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + eventsBeforeWaitall += 1; - whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_NB_UP, -1}); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(const_cast<int*>(&nbOutUpper), 1, MPI_INT, (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_NB_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + + if(nbOutUpper){ + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(&nbNewFromUp, 1, MPI_INT, (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_NB_PARTICLES, + AssertMpi(MPI_Isend(&(*inout_positions_particles)[(myTotalNbParticles-nbOutUpper)*size_particle_positions], nbOutUpper*size_particle_positions, particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES, MPI_COMM_WORLD, &mpiRequests.back())); - eventsBeforeWaitall += 1; - whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(const_cast<int*>(&nbOutUpper), 1, MPI_INT, (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_NB_PARTICLES, + AssertMpi(MPI_Isend(&(*inout_index_particles)[(myTotalNbParticles-nbOutUpper)], nbOutUpper, MPI_INT, (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_INDEXES, MPI_COMM_WORLD, &mpiRequests.back())); - if(nbOutUpper){ - whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); - mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(&(*inout_positions_particles)[(myTotalNbParticles-nbOutUpper)*size_particle_positions], nbOutUpper*size_particle_positions, particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES, - MPI_COMM_WORLD, &mpiRequests.back())); + + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(&(*inout_index_particles)[(myTotalNbParticles-nbOutUpper)], nbOutUpper, MPI_INT, (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_INDEXES, + AssertMpi(MPI_Isend(&inout_rhs_particles[idx_rhs][(myTotalNbParticles-nbOutUpper)*size_particle_rhs], nbOutUpper*size_particle_rhs, particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_RHS+idx_rhs, MPI_COMM_WORLD, &mpiRequests.back())); + } + } + while(mpiRequests.size() && eventsBeforeWaitall){ + int idxDone = mpiRequests.size(); + { + TIMEZONE("waitany_move"); + AssertMpi(MPI_Waitany(mpiRequests.size(), mpiRequests.data(), &idxDone, MPI_STATUSES_IGNORE)); + } + const std::pair<Action, int> releasedAction = whatNext[idxDone]; + std::swap(mpiRequests[idxDone], mpiRequests[mpiRequests.size()-1]); + std::swap(whatNext[idxDone], whatNext[mpiRequests.size()-1]); + mpiRequests.pop_back(); + whatNext.pop_back(); + + if(releasedAction.first == RECV_MOVE_NB_LOW){ + if(nbNewFromLow){ + assert(newParticlesLow == nullptr); + newParticlesLow.reset(new real_number[nbNewFromLow*size_particle_positions]); + whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_LOW, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&newParticlesLow[0], nbNewFromLow*size_particle_positions, particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); - for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + newParticlesLowIndexes.reset(new int[nbNewFromLow]); whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(&inout_rhs_particles[idx_rhs][(myTotalNbParticles-nbOutUpper)*size_particle_rhs], nbOutUpper*size_particle_rhs, particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_RHS+idx_rhs, + AssertMpi(MPI_Irecv(&newParticlesLowIndexes[0], nbNewFromLow, MPI_INT, (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_INDEXES, MPI_COMM_WORLD, &mpiRequests.back())); - } - } - - while(mpiRequests.size() && eventsBeforeWaitall){ - int idxDone = mpiRequests.size(); - { - TIMEZONE("waitany_move"); - AssertMpi(MPI_Waitany(mpiRequests.size(), mpiRequests.data(), &idxDone, MPI_STATUSES_IGNORE)); - } - const std::pair<Action, int> releasedAction = whatNext[idxDone]; - std::swap(mpiRequests[idxDone], mpiRequests[mpiRequests.size()-1]); - std::swap(whatNext[idxDone], whatNext[mpiRequests.size()-1]); - mpiRequests.pop_back(); - whatNext.pop_back(); - if(releasedAction.first == RECV_MOVE_NB_LOW){ - if(nbNewFromLow){ - assert(newParticlesLow == nullptr); - newParticlesLow.reset(new real_number[nbNewFromLow*size_particle_positions]); - whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_LOW, -1}); - mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(&newParticlesLow[0], nbNewFromLow*size_particle_positions, particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES, - MPI_COMM_WORLD, &mpiRequests.back())); - - newParticlesLowIndexes.reset(new int[nbNewFromLow]); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + newParticlesLowRhs[idx_rhs].reset(new real_number[nbNewFromLow*size_particle_rhs]); whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(&newParticlesLowIndexes[0], nbNewFromLow, MPI_INT, (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_INDEXES, + AssertMpi(MPI_Irecv(&newParticlesLowRhs[idx_rhs][0], nbNewFromLow*size_particle_rhs, particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_RHS+idx_rhs, MPI_COMM_WORLD, &mpiRequests.back())); - - for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ - newParticlesLowRhs[idx_rhs].reset(new real_number[nbNewFromLow*size_particle_rhs]); - whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); - mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(&newParticlesLowRhs[idx_rhs][0], nbNewFromLow*size_particle_rhs, particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_RHS+idx_rhs, - MPI_COMM_WORLD, &mpiRequests.back())); - } } - eventsBeforeWaitall -= 1; } - else if(releasedAction.first == RECV_MOVE_NB_UP){ - if(nbNewFromUp){ - assert(newParticlesUp == nullptr); - newParticlesUp.reset(new real_number[nbNewFromUp*size_particle_positions]); - whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_UP, -1}); - mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(&newParticlesUp[0], nbNewFromUp*size_particle_positions, particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES, - MPI_COMM_WORLD, &mpiRequests.back())); + eventsBeforeWaitall -= 1; + } + else if(releasedAction.first == RECV_MOVE_NB_UP){ + if(nbNewFromUp){ + assert(newParticlesUp == nullptr); + newParticlesUp.reset(new real_number[nbNewFromUp*size_particle_positions]); + whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_UP, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&newParticlesUp[0], nbNewFromUp*size_particle_positions, particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); - newParticlesUpIndexes.reset(new int[nbNewFromUp]); + newParticlesUpIndexes.reset(new int[nbNewFromUp]); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&newParticlesUpIndexes[0], nbNewFromUp, MPI_INT, (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_INDEXES, + MPI_COMM_WORLD, &mpiRequests.back())); + + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + newParticlesUpRhs[idx_rhs].reset(new real_number[nbNewFromUp*size_particle_rhs]); whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(&newParticlesUpIndexes[0], nbNewFromUp, MPI_INT, (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_INDEXES, + AssertMpi(MPI_Irecv(&newParticlesUpRhs[idx_rhs][0], nbNewFromUp*size_particle_rhs, particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_RHS+idx_rhs, MPI_COMM_WORLD, &mpiRequests.back())); - - for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ - newParticlesUpRhs[idx_rhs].reset(new real_number[nbNewFromUp*size_particle_rhs]); - whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); - mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(&newParticlesUpRhs[idx_rhs][0], nbNewFromUp*size_particle_rhs, particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_RHS+idx_rhs, - MPI_COMM_WORLD, &mpiRequests.back())); - } } - eventsBeforeWaitall -= 1; } + eventsBeforeWaitall -= 1; } - - if(mpiRequests.size()){ - // TODO Proceed when received - TIMEZONE("waitall-move"); - AssertMpi(MPI_Waitall(mpiRequests.size(), mpiRequests.data(), MPI_STATUSES_IGNORE)); - mpiRequests.clear(); - whatNext.clear(); - } - - // If we use thread, insert task to proceed received data - if(more_than_one_thread == true){ - TIMEZONE_OMP_INIT_PRETASK(timeZoneTaskKey) - #pragma omp taskgroup - { - if(nbNewFromLow){ - assert(newParticlesLow.get() != nullptr); - #pragma omp task TIMEZONE_OMP_PRAGMA_TASK_KEY(timeZoneTaskKey) - { - TIMEZONE_OMP_TASK("task-pbc", timeZoneTaskKey); - apply_pbc_z_new_particles(newParticlesLow.get(), nbNewFromLow); - apply_pbc_xy(newParticlesLow.get(), nbNewFromLow); - } - } - if(nbNewFromUp){ - assert(newParticlesUp.get() != nullptr); - #pragma omp task TIMEZONE_OMP_PRAGMA_TASK_KEY(timeZoneTaskKey) - { - TIMEZONE_OMP_TASK("task-pbc", timeZoneTaskKey); - apply_pbc_z_new_particles(newParticlesUp.get(), nbNewFromUp); - apply_pbc_xy(newParticlesUp.get(), nbNewFromUp); - } - } - } - } - } - // if we use threads and we are not master thread than proceed local data (not send/recv) - if(more_than_one_thread == true && omp_get_thread_num() > 0){ - TIMEZONE("apply_pbc_xy"); - const int nbOldParticles = myTotalNbParticles - nbOutLower - nbOutUpper; - particles_utils::IntervalSplitter<int> interval(nbOldParticles, - omp_get_num_threads()-1, - omp_get_thread_num()-1); - - apply_pbc_xy(&(*inout_positions_particles)[(nbOutLower+interval.getMyOffset())*size_particle_positions], interval.getMySize()); } - } - // If we do not use thread, process all data sequentially - if(more_than_one_thread == false){ - TIMEZONE("apply_pbc_z_new_particles"); - if(nbNewFromLow){ - assert(newParticlesLow.get() != nullptr); - apply_pbc_z_new_particles(newParticlesLow.get(), nbNewFromLow); - apply_pbc_xy(newParticlesLow.get(), nbNewFromLow); - } - if(nbNewFromUp){ - assert(newParticlesUp.get() != nullptr); - apply_pbc_z_new_particles(newParticlesUp.get(), nbNewFromUp); - apply_pbc_xy(newParticlesUp.get(), nbNewFromUp); + if(mpiRequests.size()){ + // TODO Proceed when received + TIMEZONE("waitall-move"); + AssertMpi(MPI_Waitall(mpiRequests.size(), mpiRequests.data(), MPI_STATUSES_IGNORE)); + mpiRequests.clear(); + whatNext.clear(); } - - apply_pbc_xy(&(*inout_positions_particles)[nbOutLower*size_particle_positions], myTotalNbParticles - nbOutLower - nbOutUpper); } // Realloc an merge @@ -810,8 +760,10 @@ public: particles_utils::partition_extra_z<size_particle_positions>(&(*inout_positions_particles)[0], myTotalNbParticles,current_partition_size, current_my_nb_particles_per_partition, current_offset_particles_for_partition.get(), - [&](const int idxPartition){ - return (idxPartition+1)*spatialPartitionWidth + mySpatialLowLimit; + [&](const real_number& z_pos){ + const int partition_level = pbc_field_layer(z_pos, IDX_Z); + assert(current_partition_interval.first <= partition_level && partition_level < current_partition_interval.second); + return partition_level - current_partition_interval.first; }, [&](const int idx1, const int idx2){ for(int idx_val = 0 ; idx_val < size_particle_index ; ++idx_val){ @@ -830,12 +782,8 @@ public: for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ assert(current_my_nb_particles_per_partition[idxPartition] == current_offset_particles_for_partition[idxPartition+1] - current_offset_particles_for_partition[idxPartition]); - const real_number limitPartition = (idxPartition+1)*spatialPartitionWidth + mySpatialLowLimit; - for(int idx = 0 ; idx < current_offset_particles_for_partition[idxPartition+1] ; ++idx){ - assert((*inout_positions_particles)[idx*3+IDX_Z] < limitPartition); - } - for(int idx = current_offset_particles_for_partition[idxPartition+1] ; idx < myTotalNbParticles ; ++idx){ - assert((*inout_positions_particles)[idx*3+IDX_Z] >= limitPartition); + for(int idx = current_offset_particles_for_partition[idxPartition] ; idx < current_offset_particles_for_partition[idxPartition+1] ; ++idx){ + assert(pbc_field_layer((*inout_positions_particles)[idx*3+IDX_Z], IDX_Z)-current_partition_interval.first == idxPartition); } } } @@ -845,8 +793,7 @@ public: assert(mpiRequests.size() == 0); } - virtual void apply_pbc_z_new_particles(real_number* newParticlesLow, const int nbNewFromLow) const = 0; - virtual void apply_pbc_xy(real_number* inout_positions_particles, const int nbNew) const = 0; + virtual int pbc_field_layer(const real_number& a_z_pos, const int idx_dim) const = 0; //////////////////////////////////////////////////////////////////////////// diff --git a/bfps/cpp/particles/particles_field_computer.hpp b/bfps/cpp/particles/particles_field_computer.hpp index 2d755616b71b7d4656017733e82ffa18908011cf..96d58a26849d31203b2e4c1a4ea8722327c3c0cb 100644 --- a/bfps/cpp/particles/particles_field_computer.hpp +++ b/bfps/cpp/particles/particles_field_computer.hpp @@ -25,9 +25,8 @@ class particles_field_computer : public abstract_particles_distr<real_number, 3, const positions_updater_class positions_updater; const std::array<real_number,3> spatial_box_width; + const std::array<real_number,3> spatial_box_offset; const std::array<real_number,3> box_step_width; - const real_number my_spatial_low_limit_z; - const real_number my_spatial_up_limit_z; int deriv[3]; @@ -44,8 +43,11 @@ class particles_field_computer : public abstract_particles_distr<real_number, 3, } real_number get_norm_pos_in_cell(const real_number in_pos, const int idx_pos) const { - const real_number cell_idx = floor(in_pos/box_step_width[idx_pos]); - const real_number pos_in_cell = (in_pos - cell_idx*box_step_width[idx_pos]) / box_step_width[idx_pos]; + const real_number shifted_pos = in_pos - spatial_box_offset[idx_pos]; + const real_number nb_box_repeat = floor(shifted_pos/spatial_box_width[idx_pos]); + const real_number pos_in_box = shifted_pos - nb_box_repeat*spatial_box_width[idx_pos]; + const real_number cell_idx = floor(pos_in_box/box_step_width[idx_pos]); + const real_number pos_in_cell = (pos_in_box - cell_idx*box_step_width[idx_pos]) / box_step_width[idx_pos]; assert(0 <= pos_in_cell && pos_in_cell < 1); return pos_in_cell; } @@ -68,9 +70,9 @@ class particles_field_computer : public abstract_particles_distr<real_number, 3, interpolator.compute_beta(deriv[IDX_Y], reltv_y, by); interpolator.compute_beta(deriv[IDX_Z], reltv_z, bz); - const int partGridIdx_x = int(particles_positions[idxPart*3+IDX_X]/box_step_width[IDX_X]); - const int partGridIdx_y = int(particles_positions[idxPart*3+IDX_Y]/box_step_width[IDX_Y]); - const int partGridIdx_z = int(particles_positions[idxPart*3+IDX_Z]/box_step_width[IDX_Z]); + const int partGridIdx_x = pbc_field_layer(particles_positions[idxPart*3+IDX_X], IDX_X); + const int partGridIdx_y = pbc_field_layer(particles_positions[idxPart*3+IDX_Y], IDX_Y); + const int partGridIdx_z = pbc_field_layer(particles_positions[idxPart*3+IDX_Z], IDX_Z); assert(0 <= partGridIdx_x && partGridIdx_x < int(field_grid_dim[IDX_X])); assert(0 <= partGridIdx_y && partGridIdx_y < int(field_grid_dim[IDX_Y])); @@ -140,7 +142,6 @@ class particles_field_computer : public abstract_particles_distr<real_number, 3, } } } - //DEBUG_MSG("exiting particles_field_computer::apply_computation\n"); } virtual void reduce_particles_rhs(real_number particles_current_rhs[], @@ -155,72 +156,18 @@ class particles_field_computer : public abstract_particles_distr<real_number, 3, } } - - //////////////////////////////////////////////////////////////////////// - /// Re-distribution related - //////////////////////////////////////////////////////////////////////// - - void apply_pbc_xy(real_number* inout_particles, const int size) const final { - TIMEZONE("particles_field_computer::apply_pbc_xy"); - const std::array<int, 2> dims_xy={IDX_X, IDX_Y}; - for(int idxPart = 0 ; idxPart < size ; ++idxPart){ - // Consider it will never move for more than one box repeatition - for(const int idxDim : dims_xy){ - if(inout_particles[idxPart*3+idxDim] < 0) inout_particles[idxPart*3+idxDim] += spatial_box_width[idxDim]; - else if(spatial_box_width[idxDim] <= inout_particles[idxPart*3+idxDim]) inout_particles[idxPart*3+idxDim] -= spatial_box_width[idxDim]; - assert(0 <= inout_particles[idxPart*3+idxDim] && inout_particles[idxPart*3+idxDim] < spatial_box_width[idxDim]); - } - } - } - - void apply_pbc_z_new_particles(real_number* values, const int size) const final { - TIMEZONE("particles_field_computer::apply_pbc_z_new_particles"); - if(Parent::my_rank == 0){ - const int idxDim = IDX_Z; - for(int idxPart = 0 ; idxPart < size ; ++idxPart){ - assert(values[idxPart*3+idxDim] < my_spatial_up_limit_z || spatial_box_width[idxDim] <= values[idxPart*3+idxDim]); - assert(my_spatial_low_limit_z <= values[idxPart*3+idxDim]); - - if(spatial_box_width[idxDim] <= values[idxPart*3+idxDim]) values[idxPart*3+idxDim] -= spatial_box_width[idxDim]; - - assert(0 <= values[idxPart*3+idxDim] && values[idxPart*3+idxDim] < spatial_box_width[idxDim]); - assert(my_spatial_low_limit_z <= values[idxPart*3+idxDim] && values[idxPart*3+idxDim] < my_spatial_up_limit_z); - } - } - if(Parent::my_rank == Parent::nb_processes_involved - 1){ - const int idxDim = IDX_Z; - for(int idxPart = 0 ; idxPart < size ; ++idxPart){ - assert(my_spatial_low_limit_z <= values[idxPart*3+idxDim] || values[idxPart*3+idxDim] < 0); - assert(values[idxPart*3+idxDim] < spatial_box_width[idxDim]); - - if(values[idxPart*3+idxDim] < 0) values[idxPart*3+idxDim] += spatial_box_width[idxDim]; - - assert(0 <= values[idxPart*3+idxDim] && values[idxPart*3+idxDim] < spatial_box_width[idxDim]); - assert(my_spatial_low_limit_z <= values[idxPart*3+idxDim] && values[idxPart*3+idxDim] < my_spatial_up_limit_z); - } - } - if(Parent::my_rank != 0 && Parent::my_rank != Parent::nb_processes_involved - 1){ - const int idxDim = IDX_Z; - for(int idxPart = 0 ; idxPart < size ; ++idxPart){ - assert(my_spatial_low_limit_z <= values[idxPart*3+idxDim] && values[idxPart*3+idxDim] < my_spatial_up_limit_z); - } - } - } - public: particles_field_computer(MPI_Comm in_current_com, const std::array<size_t,3>& in_field_grid_dim, const std::pair<int,int>& in_current_partitions, const interpolator_class& in_interpolator, const field_class& in_field, - const std::array<real_number,3>& in_spatial_box_width, - const std::array<real_number,3>& in_box_step_width, const real_number in_my_spatial_low_limit_z, - const real_number in_my_spatial_up_limit_z) - : abstract_particles_distr<real_number, 3,3,1>(in_current_com, in_current_partitions), + const std::array<real_number,3>& in_spatial_box_width, const std::array<real_number,3>& in_spatial_box_offset, + const std::array<real_number,3>& in_box_step_width) + : abstract_particles_distr<real_number, 3,3,1>(in_current_com, in_current_partitions, in_field_grid_dim), field_grid_dim(in_field_grid_dim), current_partition_interval(in_current_partitions), interpolator(in_interpolator), field(in_field), positions_updater(), - spatial_box_width(in_spatial_box_width), box_step_width(in_box_step_width), - my_spatial_low_limit_z(in_my_spatial_low_limit_z), my_spatial_up_limit_z(in_my_spatial_up_limit_z){ + spatial_box_width(in_spatial_box_width), spatial_box_offset(in_spatial_box_offset), box_step_width(in_box_step_width){ deriv[IDX_X] = 0; deriv[IDX_Y] = 0; deriv[IDX_Z] = 0; @@ -239,6 +186,19 @@ public: particles_current_rhs, nb_rhs, dt); } + //////////////////////////////////////////////////////////////////////// + /// Re-distribution related + //////////////////////////////////////////////////////////////////////// + + virtual int pbc_field_layer(const real_number& a_z_pos, const int idx_dim) const final { + const real_number shifted_pos = a_z_pos - spatial_box_offset[idx_dim]; + const int nb_level_to_pos = int(shifted_pos/box_step_width[idx_dim]); + const int int_field_grid_dim = int(field_grid_dim[idx_dim]); + const int pbc_level = ((nb_level_to_pos%int_field_grid_dim)+int_field_grid_dim)%int_field_grid_dim; + assert(0 <= pbc_level && pbc_level < int(field_grid_dim[idx_dim])); + return pbc_level; + } + }; diff --git a/bfps/cpp/particles/particles_input_hdf5.hpp b/bfps/cpp/particles/particles_input_hdf5.hpp index ce1bddc152f459576d3e539ed1a73fde69a651e3..d08e3a986f80025c54b6fb4374f7d4fc6737e86c 100644 --- a/bfps/cpp/particles/particles_input_hdf5.hpp +++ b/bfps/cpp/particles/particles_input_hdf5.hpp @@ -216,14 +216,21 @@ public: std::vector<int> nb_particles_per_proc(nb_processes); { TIMEZONE("partition"); + + const real_number spatial_box_offset = in_spatial_limit_per_proc[0]; + const real_number spatial_box_width = in_spatial_limit_per_proc[nb_processes] - in_spatial_limit_per_proc[0]; + int previousOffset = 0; for(int idx_proc = 0 ; idx_proc < nb_processes-1 ; ++idx_proc){ - const real_number limitPartition = in_spatial_limit_per_proc[idx_proc+1]; + const real_number limitPartitionShifted = in_spatial_limit_per_proc[idx_proc+1]-spatial_box_offset; const int localOffset = particles_utils::partition_extra<size_particle_positions>( &split_particles_positions[previousOffset*size_particle_positions], load_splitter.getMySize()-previousOffset, [&](const real_number val[]){ - return val[IDX_Z] < limitPartition; + const real_number shiftPos = val[IDX_Z]-spatial_box_offset; + const real_number nbRepeat = floor(shiftPos/spatial_box_width); + const real_number posInBox = shiftPos - (spatial_box_width*nbRepeat); + return posInBox < limitPartitionShifted; }, [&](const int idx1, const int idx2){ std::swap(split_particles_indexes[idx1], split_particles_indexes[idx2]); diff --git a/bfps/cpp/particles/particles_system.hpp b/bfps/cpp/particles/particles_system.hpp index 459e7a79f132fe6e1c4d47498e73edd62b4e5306..48b4baba86c84346538b007f14bf8fb316de7cf2 100644 --- a/bfps/cpp/particles/particles_system.hpp +++ b/bfps/cpp/particles/particles_system.hpp @@ -42,6 +42,7 @@ class particles_system : public abstract_particles_system<real_number> { public: particles_system(const std::array<size_t,3>& field_grid_dim, const std::array<real_number,3>& in_spatial_box_width, + const std::array<real_number,3>& in_spatial_box_offset, const std::array<real_number,3>& in_spatial_partition_width, const real_number in_my_spatial_low_limit, const real_number in_my_spatial_up_limit, const field_rnumber* in_field_data, const std::array<size_t,3>& in_local_field_dims, @@ -54,8 +55,7 @@ public: field(in_field_data, in_local_field_dims, in_local_field_offset, in_field_memory_dims), interpolator(), computer(in_mpi_com, field_grid_dim, current_partition_interval, - interpolator, field, in_spatial_box_width, in_spatial_partition_width, - in_my_spatial_low_limit, in_my_spatial_up_limit), + interpolator, field, in_spatial_box_width, in_spatial_box_offset, in_spatial_partition_width), spatial_box_width(in_spatial_box_width), spatial_partition_width(in_spatial_partition_width), my_spatial_low_limit(in_my_spatial_low_limit), my_spatial_up_limit(in_my_spatial_up_limit), my_nb_particles(0), step_idx(1){ @@ -76,15 +76,17 @@ public: my_nb_particles = particles_input.getLocalNbParticles(); for(int idx_part = 0 ; idx_part < my_nb_particles ; ++idx_part){ // TODO remove me - assert(my_particles_positions[idx_part*3+IDX_Z] >= my_spatial_low_limit); - assert(my_particles_positions[idx_part*3+IDX_Z] < my_spatial_up_limit); + const int partition_level = computer.pbc_field_layer(my_particles_positions[idx_part*3+IDX_Z], IDX_Z); + assert(partition_level >= current_partition_interval.first); + assert(partition_level < current_partition_interval.second); } particles_utils::partition_extra_z<3>(&my_particles_positions[0], my_nb_particles, partition_interval_size, current_my_nb_particles_per_partition.get(), current_offset_particles_for_partition.get(), - [&](const int idxPartition){ - const real_number limitPartition = (idxPartition+1)*spatial_partition_width[IDX_Z] + my_spatial_low_limit; - return limitPartition; + [&](const real_number& z_pos){ + const int partition_level = computer.pbc_field_layer(z_pos, IDX_Z); + assert(current_partition_interval.first <= partition_level && partition_level < current_partition_interval.second); + return partition_level - current_partition_interval.first; }, [&](const int idx1, const int idx2){ std::swap(my_particles_positions_indexes[idx1], my_particles_positions_indexes[idx2]); @@ -100,12 +102,8 @@ public: for(int idxPartition = 0 ; idxPartition < partition_interval_size ; ++idxPartition){ assert(current_my_nb_particles_per_partition[idxPartition] == current_offset_particles_for_partition[idxPartition+1] - current_offset_particles_for_partition[idxPartition]); - const real_number limitPartition = (idxPartition+1)*spatial_partition_width[IDX_Z] + my_spatial_low_limit; - for(int idx = 0 ; idx < current_offset_particles_for_partition[idxPartition+1] ; ++idx){ - assert(my_particles_positions[idx*3+IDX_Z] < limitPartition); - } - for(int idx = current_offset_particles_for_partition[idxPartition+1] ; idx < my_nb_particles ; ++idx){ - assert(my_particles_positions[idx*3+IDX_Z] >= limitPartition); + for(int idx = current_offset_particles_for_partition[idxPartition] ; idx < current_offset_particles_for_partition[idxPartition+1] ; ++idx){ + assert(computer.pbc_field_layer(my_particles_positions[idx*3+IDX_Z], IDX_Z)-current_partition_interval.first == idxPartition); } } } @@ -133,10 +131,7 @@ public: &my_nb_particles, &my_particles_positions, my_particles_rhs.data(), my_particles_rhs.size(), - &my_particles_positions_indexes, - my_spatial_low_limit, - my_spatial_up_limit, - spatial_partition_width[IDX_Z]); + &my_particles_positions_indexes); } void inc_step_idx() final { diff --git a/bfps/cpp/particles/particles_system_builder.hpp b/bfps/cpp/particles/particles_system_builder.hpp index 33f11fbc26e86c50e54e86777dcb77fa8117f5bf..f828343742cec3103942c1c690f95f3942fa3435 100644 --- a/bfps/cpp/particles/particles_system_builder.hpp +++ b/bfps/cpp/particles/particles_system_builder.hpp @@ -181,6 +181,12 @@ struct particles_system_build_container { spatial_box_width[IDX_Y] = 4 * acos(0) / (fs_kk->dky); spatial_box_width[IDX_Z] = 4 * acos(0) / (fs_kk->dkz); + // Box is in the corner + std::array<particles_rnumber,3> spatial_box_offset; + spatial_box_offset[IDX_X] = 0; + spatial_box_offset[IDX_Y] = 0; + spatial_box_offset[IDX_Z] = 0; + // The distance between two field nodes in z std::array<particles_rnumber,3> spatial_partition_width; spatial_partition_width[IDX_X] = spatial_box_width[IDX_X]/particles_rnumber(field_grid_dim[IDX_X]); @@ -194,6 +200,7 @@ struct particles_system_build_container { particles_system<particles_rnumber, field_rnumber, particles_interp_spline<particles_rnumber, interpolation_size,spline_mode>, interpolation_size>* part_sys = new particles_system<particles_rnumber, field_rnumber, particles_interp_spline<particles_rnumber, interpolation_size,spline_mode>, interpolation_size>(field_grid_dim, spatial_box_width, + spatial_box_offset, spatial_partition_width, my_spatial_low_limit_z, my_spatial_up_limit_z, diff --git a/bfps/cpp/particles/particles_utils.hpp b/bfps/cpp/particles/particles_utils.hpp index db3d00d77e70dafa0257419ece7bd5669720580b..9e9e1a64173e2ea498fb5afa0eedde79ed30c8e3 100644 --- a/bfps/cpp/particles/particles_utils.hpp +++ b/bfps/cpp/particles/particles_utils.hpp @@ -99,7 +99,7 @@ inline int partition_extra(real_number* array, const int size, Predicate1 pdc, P template <int nb_values, class real_number, class Predicate1, class Predicate2> inline void partition_extra_z(real_number* array, const int size, const int nb_partitions, int partitions_size[], int partitions_offset[], - Predicate1 partitions_limits, Predicate2 pdcswap) + Predicate1 partitions_levels, Predicate2 pdcswap) { if(nb_partitions == 0){ return ; @@ -114,10 +114,9 @@ inline void partition_extra_z(real_number* array, const int size, const int nb_p } if(nb_partitions == 2){ - const real_number limit = partitions_limits(0); const int size_current = partition_extra<nb_values>(array, size, [&](const real_number inval[]){ - return inval[IDX_Z] < limit; + return partitions_levels(inval[IDX_Z]) == 0; }, pdcswap); partitions_size[0] = size_current; partitions_size[1] = size-size_current; @@ -143,11 +142,10 @@ inline void partition_extra_z(real_number* array, const int size, const int nb_p const int size_unpart = partitions_offset[current_part.second]- partitions_offset[current_part.first]; - const real_number limit = partitions_limits(idx_middle); const int size_current = partition_extra<nb_values>(&array[partitions_offset[current_part.first]*nb_values], size_unpart, [&](const real_number inval[]){ - return inval[IDX_Z] < limit; + return partitions_levels(inval[IDX_Z]) <= idx_middle; }, pdcswap, partitions_offset[current_part.first]); partitions_offset[idx_middle+1] = size_current + partitions_offset[current_part.first]; @@ -161,14 +159,14 @@ inline void partition_extra_z(real_number* array, const int size, const int nb_p template <int nb_values, class real_number, class Predicate1, class Predicate2> inline std::pair<std::vector<int>,std::vector<int>> partition_extra_z(real_number* array, const int size, - const int nb_partitions, Predicate1 partitions_limits, + const int nb_partitions, Predicate1 partitions_levels, Predicate2 pdcswap){ std::vector<int> partitions_size(nb_partitions); std::vector<int> partitions_offset(nb_partitions+1); partition_extra_z<nb_values, real_number, Predicate1, Predicate2>(array, size, nb_partitions, partitions_size.data(), partitions_offset.data(), - partitions_limits, pdcswap); + partitions_levels, pdcswap); return {std::move(partitions_size), std::move(partitions_offset)}; }