diff --git a/bfps/cpp/particles/abstract_particles_distr.hpp b/bfps/cpp/particles/abstract_particles_distr.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1e6508cb359bcfa8fb7dd7a5bbbacd71f3498673 --- /dev/null +++ b/bfps/cpp/particles/abstract_particles_distr.hpp @@ -0,0 +1,835 @@ +#ifndef ABSTRACT_PARTICLES_DISTR_HPP +#define ABSTRACT_PARTICLES_DISTR_HPP + +#include <mpi.h> + +#include <vector> +#include <memory> +#include <cassert> + +#include <type_traits> + +#include "scope_timer.hpp" +#include "particles_utils.hpp" + +#ifndef AssertMpi +#define AssertMpi(X) if(MPI_SUCCESS != (X)) { printf("MPI Error at line %d\n",__LINE__); fflush(stdout) ; throw std::runtime_error("Stop from from mpi erro"); } +#endif + + +template <int size_particle_positions, int size_particle_rhs, int size_particle_index> +class abstract_particles_distr { +protected: + static const int MaxNbRhs = 100; + + // Used withing the loop, allocate only once + enum MpiTag{ + TAG_LOW_UP_NB_PARTICLES, + TAG_UP_LOW_NB_PARTICLES, + TAG_LOW_UP_PARTICLES, + TAG_UP_LOW_PARTICLES, + TAG_LOW_UP_RESULTS, + TAG_UP_LOW_RESULTS, + + TAG_LOW_UP_MOVED_NB_PARTICLES, + TAG_UP_LOW_MOVED_NB_PARTICLES, + TAG_LOW_UP_MOVED_PARTICLES, + TAG_UP_LOW_MOVED_PARTICLES, + + TAG_LOW_UP_MOVED_PARTICLES_INDEXES, + TAG_UP_LOW_MOVED_PARTICLES_INDEXES, + + TAG_LOW_UP_MOVED_PARTICLES_RHS, + TAG_LOW_UP_MOVED_PARTICLES_RHS_MAX = TAG_LOW_UP_MOVED_PARTICLES_RHS+MaxNbRhs, + + TAG_UP_LOW_MOVED_PARTICLES_RHS = TAG_LOW_UP_MOVED_PARTICLES_RHS_MAX, + TAG_UP_LOW_MOVED_PARTICLES_RHS_MAX = TAG_UP_LOW_MOVED_PARTICLES_RHS+MaxNbRhs, + }; + + struct NeighborDescriptor{ + int nbPartitionsToSend; + int nbPartitionsToRecv; + int nbParticlesToSend; + int nbParticlesToRecv; + int destProc; + int rankDiff; + bool isLower; + int idxLowerUpper; + + std::unique_ptr<double[]> toRecvAndMerge; + std::unique_ptr<double[]> toCompute; + std::unique_ptr<double[]> results; + }; + + enum Action{ + NOTHING_TODO, + RECV_PARTICLES, + COMPUTE_PARTICLES, + RELEASE_BUFFER_PARTICLES, + MERGE_PARTICLES, + + RECV_MOVE_NB_LOW, + RECV_MOVE_NB_UP, + RECV_MOVE_LOW, + RECV_MOVE_UP + }; + + MPI_Comm current_com; + + int my_rank; + int nb_processes; + + const std::pair<int,int> current_partition_interval; + const int current_partition_size; + + std::unique_ptr<int[]> partition_interval_size_per_proc; + std::unique_ptr<int[]> partition_interval_offset_per_proc; + + std::unique_ptr<int[]> current_offset_particles_for_partition; + + std::vector<std::pair<Action,int>> whatNext; + std::vector<MPI_Request> mpiRequests; + std::vector<NeighborDescriptor> neigDescriptors; + +public: + //////////////////////////////////////////////////////////////////////////// + + abstract_particles_distr(MPI_Comm in_current_com, + const std::pair<int,int>& in_current_partitions) + : current_com(in_current_com), + my_rank(-1), nb_processes(-1), + current_partition_interval(in_current_partitions), + current_partition_size(current_partition_interval.second-current_partition_interval.first){ + + AssertMpi(MPI_Comm_rank(current_com, &my_rank)); + AssertMpi(MPI_Comm_size(current_com, &nb_processes)); + + assert(current_partition_size >= 1); + + partition_interval_size_per_proc.reset(new int[nb_processes]); + AssertMpi( MPI_Allgather( const_cast<int*>(¤t_partition_size), 1, MPI_INT, + partition_interval_size_per_proc.get(), 1, MPI_INT, + current_com) ); + assert(partition_interval_size_per_proc[my_rank] == current_partition_size); + + partition_interval_offset_per_proc.reset(new int[nb_processes+1]); + partition_interval_offset_per_proc[0] = 0; + for(int idxProc = 0 ; idxProc < nb_processes ; ++idxProc){ + partition_interval_offset_per_proc[idxProc+1] = partition_interval_offset_per_proc[idxProc] + partition_interval_size_per_proc[idxProc]; + } + + current_offset_particles_for_partition.reset(new int[current_partition_size+1]); + } + + virtual ~abstract_particles_distr(){} + + //////////////////////////////////////////////////////////////////////////// + + void compute_distr(const int current_my_nb_particles_per_partition[], + const double particles_positions[], + double particles_current_rhs[], + const int interpolation_size){ + TIMEZONE("compute_distr"); + + current_offset_particles_for_partition[0] = 0; + int myTotalNbParticles = 0; + for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ + myTotalNbParticles += current_my_nb_particles_per_partition[idxPartition]; + current_offset_particles_for_partition[idxPartition+1] = current_offset_particles_for_partition[idxPartition] + current_my_nb_particles_per_partition[idxPartition]; + } + + ////////////////////////////////////////////////////////////////////// + /// Exchange the number of particles in each partition + /// Could involve only here but I do not think it will be a problem + ////////////////////////////////////////////////////////////////////// + + + assert(whatNext.size() == 0); + assert(mpiRequests.size() == 0); + + neigDescriptors.clear(); + + int nbProcToRecvLower; + { + int nextDestProc = my_rank; + for(int idxLower = 1 ; idxLower <= interpolation_size ; idxLower += partition_interval_size_per_proc[nextDestProc]){ + nextDestProc = (nextDestProc-1+nb_processes)%nb_processes; + const int destProc = nextDestProc; + const int lowerRankDiff = (nextDestProc < my_rank ? my_rank - nextDestProc : nb_processes-nextDestProc+my_rank); + + const int nbPartitionsToSend = std::min(current_partition_size, interpolation_size-(idxLower-1)); + const int nbParticlesToSend = current_offset_particles_for_partition[nbPartitionsToSend] - current_offset_particles_for_partition[0]; + + const int nbPartitionsToRecv = std::min(partition_interval_size_per_proc[destProc], (interpolation_size+1)-(idxLower-1)); + const int nbParticlesToRecv = -1; + + NeighborDescriptor descriptor; + descriptor.destProc = destProc; + descriptor.rankDiff = lowerRankDiff; + descriptor.nbPartitionsToSend = nbPartitionsToSend; + descriptor.nbParticlesToSend = nbParticlesToSend; + descriptor.nbPartitionsToRecv = nbPartitionsToRecv; + descriptor.nbParticlesToRecv = nbParticlesToRecv; + descriptor.isLower = true; + descriptor.idxLowerUpper = idxLower; + + neigDescriptors.emplace_back(std::move(descriptor)); + } + nbProcToRecvLower = neigDescriptors.size(); + + nextDestProc = my_rank; + for(int idxUpper = 1 ; idxUpper <= interpolation_size ; idxUpper += partition_interval_size_per_proc[nextDestProc]){ + nextDestProc = (nextDestProc+1+nb_processes)%nb_processes; + const int destProc = nextDestProc; + const int upperRankDiff = (nextDestProc > my_rank ? nextDestProc - my_rank: nb_processes-my_rank+nextDestProc); + + const int nbPartitionsToSend = std::min(current_partition_size, (interpolation_size+1)-(idxUpper-1)); + const int nbParticlesToSend = current_offset_particles_for_partition[current_partition_size] - current_offset_particles_for_partition[current_partition_size-nbPartitionsToSend]; + + const int nbPartitionsToRecv = std::min(partition_interval_size_per_proc[destProc], interpolation_size-(idxUpper-1)); + const int nbParticlesToRecv = -1; + + NeighborDescriptor descriptor; + descriptor.destProc = destProc; + descriptor.rankDiff = upperRankDiff; + descriptor.nbPartitionsToSend = nbPartitionsToSend; + descriptor.nbParticlesToSend = nbParticlesToSend; + descriptor.nbPartitionsToRecv = nbPartitionsToRecv; + descriptor.nbParticlesToRecv = nbParticlesToRecv; + descriptor.isLower = false; + descriptor.idxLowerUpper = idxUpper; + + neigDescriptors.emplace_back(std::move(descriptor)); + } + } + const int nbProcToRecvUpper = neigDescriptors.size()-nbProcToRecvLower; + const int nbProcToRecv = nbProcToRecvUpper + nbProcToRecvLower; + assert(neigDescriptors.size() == nbProcToRecv); + DEBUG_MSG("[%d] nbProcToRecvUpper %d\n", my_rank, nbProcToRecvUpper); + DEBUG_MSG("[%d] nbProcToRecvLower %d\n", my_rank, nbProcToRecvLower); + DEBUG_MSG("[%d] nbProcToRecv %d\n", my_rank, nbProcToRecv); + + for(int idxDescr = 0 ; idxDescr < neigDescriptors.size() ; ++idxDescr){ + NeighborDescriptor& descriptor = neigDescriptors[idxDescr]; + + if(descriptor.isLower){ + DEBUG_MSG("[%d] Send idxLower %d -- nbPartitionsToSend %d -- nbParticlesToSend %d\n", + my_rank, descriptor.idxLowerUpper, descriptor.nbPartitionsToSend, descriptor.nbParticlesToSend); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(const_cast<int*>(&descriptor.nbParticlesToSend), 1, MPI_INT, descriptor.destProc, TAG_LOW_UP_NB_PARTICLES, + current_com, &mpiRequests.back())); + + if(descriptor.nbParticlesToSend){ + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(const_cast<double*>(&particles_positions[0]), descriptor.nbParticlesToSend*size_particle_positions, MPI_DOUBLE, descriptor.destProc, TAG_LOW_UP_PARTICLES, + current_com, &mpiRequests.back())); + + assert(descriptor.toRecvAndMerge == nullptr); + descriptor.toRecvAndMerge.reset(new double[descriptor.nbParticlesToSend*size_particle_rhs]); + whatNext.emplace_back(std::pair<Action,int>{MERGE_PARTICLES, idxDescr}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToSend*size_particle_rhs, MPI_DOUBLE, descriptor.destProc, TAG_UP_LOW_RESULTS, + current_com, &mpiRequests.back())); + } + + whatNext.emplace_back(std::pair<Action,int>{RECV_PARTICLES, idxDescr}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&descriptor.nbParticlesToRecv, + 1, MPI_INT, descriptor.destProc, TAG_UP_LOW_NB_PARTICLES, + current_com, &mpiRequests.back())); + } + else{ + DEBUG_MSG("[%d] Send idxUpper %d -- nbPartitionsToSend %d -- nbParticlesToSend %d\n", + my_rank, descriptor.idxLowerUpper, descriptor.nbPartitionsToSend, descriptor.nbParticlesToSend); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(const_cast<int*>(&descriptor.nbParticlesToSend), 1, MPI_INT, descriptor.destProc, TAG_UP_LOW_NB_PARTICLES, + current_com, &mpiRequests.back())); + + if(descriptor.nbParticlesToSend){ + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(const_cast<double*>(&particles_positions[current_offset_particles_for_partition[current_partition_size-descriptor.nbPartitionsToSend]]), descriptor.nbParticlesToSend*size_particle_positions, MPI_DOUBLE, descriptor.destProc, TAG_UP_LOW_PARTICLES, + current_com, &mpiRequests.back())); + + assert(descriptor.toRecvAndMerge == nullptr); + descriptor.toRecvAndMerge.reset(new double[descriptor.nbParticlesToSend*size_particle_rhs]); + whatNext.emplace_back(std::pair<Action,int>{MERGE_PARTICLES, idxDescr}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToSend*size_particle_rhs, MPI_DOUBLE, descriptor.destProc, TAG_LOW_UP_RESULTS, + current_com, &mpiRequests.back())); + } + + whatNext.emplace_back(std::pair<Action,int>{RECV_PARTICLES, idxDescr}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&descriptor.nbParticlesToRecv, + 1, MPI_INT, descriptor.destProc, TAG_LOW_UP_NB_PARTICLES, + current_com, &mpiRequests.back())); + } + } + + while(mpiRequests.size()){ + assert(mpiRequests.size() == whatNext.size()); + + int idxDone = mpiRequests.size(); + { + TIMEZONE("wait"); + AssertMpi(MPI_Waitany(mpiRequests.size(), mpiRequests.data(), &idxDone, MPI_STATUSES_IGNORE)); + } + const std::pair<Action, int> releasedAction = whatNext[idxDone]; + std::swap(mpiRequests[idxDone], mpiRequests[mpiRequests.size()-1]); + std::swap(whatNext[idxDone], whatNext[mpiRequests.size()-1]); + mpiRequests.pop_back(); + whatNext.pop_back(); + + ////////////////////////////////////////////////////////////////////// + /// Data to exchange particles + ////////////////////////////////////////////////////////////////////// + if(releasedAction.first == RECV_PARTICLES){ + DEBUG_MSG("[%d] RECV_PARTICLES\n", my_rank); + NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; + + if(descriptor.isLower){ + const int idxLower = descriptor.idxLowerUpper; + const int destProc = descriptor.destProc; + const int nbPartitionsToRecv = descriptor.nbPartitionsToRecv; + const int NbParticlesToReceive = descriptor.nbParticlesToRecv; + assert(NbParticlesToReceive != -1); + assert(descriptor.toCompute == nullptr); + DEBUG_MSG("[%d] Recv idxLower %d -- nbPartitionsToRecv %d -- NbParticlesToReceive %d\n", + my_rank, idxLower, nbPartitionsToRecv, NbParticlesToReceive); + if(NbParticlesToReceive){ + descriptor.toCompute.reset(new double[NbParticlesToReceive*size_particle_positions]); + whatNext.emplace_back(std::pair<Action,int>{COMPUTE_PARTICLES, releasedAction.second}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(descriptor.toCompute.get(), NbParticlesToReceive*size_particle_positions, MPI_DOUBLE, destProc, TAG_UP_LOW_PARTICLES, + current_com, &mpiRequests.back())); + } + } + else{ + const int idxUpper = descriptor.idxLowerUpper; + const int destProc = descriptor.destProc; + const int nbPartitionsToRecv = descriptor.nbPartitionsToRecv; + const int NbParticlesToReceive = descriptor.nbParticlesToRecv; + assert(NbParticlesToReceive != -1); + assert(descriptor.toCompute == nullptr); + DEBUG_MSG("[%d] Recv idxUpper %d -- nbPartitionsToRecv %d -- NbParticlesToReceive %d\n", + my_rank, idxUpper, nbPartitionsToRecv, NbParticlesToReceive); + if(NbParticlesToReceive){ + descriptor.toCompute.reset(new double[NbParticlesToReceive*size_particle_positions]); + whatNext.emplace_back(std::pair<Action,int>{COMPUTE_PARTICLES, releasedAction.second}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(descriptor.toCompute.get(), NbParticlesToReceive*size_particle_positions, MPI_DOUBLE, destProc, TAG_LOW_UP_PARTICLES, + current_com, &mpiRequests.back())); + } + } + } + + ////////////////////////////////////////////////////////////////////// + /// Computation + ////////////////////////////////////////////////////////////////////// + if(releasedAction.first == COMPUTE_PARTICLES){ + DEBUG_MSG("[%d] COMPUTE_PARTICLES\n", my_rank); + NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; + const int NbParticlesToReceive = descriptor.nbParticlesToRecv; + + assert(descriptor.toCompute != nullptr); + descriptor.results.reset(new double[NbParticlesToReceive*size_particle_rhs]); + init_result_array(descriptor.results.get(), NbParticlesToReceive); + apply_computation(descriptor.toCompute.get(), descriptor.results.get(), NbParticlesToReceive); + + const int destProc = descriptor.destProc; + whatNext.emplace_back(std::pair<Action,int>{RELEASE_BUFFER_PARTICLES, releasedAction.second}); + mpiRequests.emplace_back(); + const int tag = descriptor.isLower? TAG_LOW_UP_RESULTS : TAG_UP_LOW_RESULTS; + AssertMpi(MPI_Isend(descriptor.results.get(), NbParticlesToReceive*size_particle_rhs, MPI_DOUBLE, destProc, tag, + current_com, &mpiRequests.back())); + } + ////////////////////////////////////////////////////////////////////// + /// Computation + ////////////////////////////////////////////////////////////////////// + if(releasedAction.first == RELEASE_BUFFER_PARTICLES){ + DEBUG_MSG("[%d] RELEASE_BUFFER_PARTICLES\n", my_rank); + NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; + assert(descriptor.toCompute != nullptr); + descriptor.toCompute.release(); + } + ////////////////////////////////////////////////////////////////////// + /// Merge + ////////////////////////////////////////////////////////////////////// + if(releasedAction.first == MERGE_PARTICLES){ + DEBUG_MSG("[%d] MERGE_PARTICLES\n", my_rank); + NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; + + if(descriptor.isLower){ + DEBUG_MSG("[%d] low buffer received\n", my_rank); + TIMEZONE("reduce"); + assert(descriptor.toRecvAndMerge != nullptr); + reduce_particles(&particles_positions[0], &particles_current_rhs[0], descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToSend); + descriptor.toRecvAndMerge.release(); + } + else { + DEBUG_MSG("[%d] up buffer received\n", my_rank); + TIMEZONE("reduce"); + assert(descriptor.toRecvAndMerge != nullptr); + reduce_particles(&particles_positions[current_offset_particles_for_partition[current_partition_size]-descriptor.nbParticlesToSend], + &particles_current_rhs[current_offset_particles_for_partition[current_partition_size]-descriptor.nbParticlesToSend], + descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToSend); + descriptor.toRecvAndMerge.release(); + } + } + } + + + { + // TODO compute only border sections and do the reste in parallel + TIMEZONE("compute"); + // Compute my particles + if(myTotalNbParticles){ + TIMEZONE("my_compute"); + apply_computation(particles_positions, particles_current_rhs, myTotalNbParticles); + } + } + + assert(mpiRequests.size() == 0); + } + + //////////////////////////////////////////////////////////////////////////// + + virtual void init_result_array(double particles_current_rhs[], + const int nb_particles) = 0; + virtual void apply_computation(const double particles_positions[], + double particles_current_rhs[], + const int nb_particles) const = 0; + virtual void reduce_particles(const double particles_positions[], + double particles_current_rhs[], + const double extra_particles_current_rhs[], + const int nb_particles) const = 0; + + //////////////////////////////////////////////////////////////////////////// + + void redistribute(int current_my_nb_particles_per_partition[], + int* nb_particles, + std::unique_ptr<double[]>* inout_positions_particles, + std::unique_ptr<double[]> inout_rhs_particles[], const int in_nb_rhs, + std::unique_ptr<int[]>* inout_index_particles, + const double mySpatialLowLimit, + const double mySpatialUpLimit, + const double spatialPartitionWidth, + const int myTotalNbParticlesAllocated=-1){ + TIMEZONE("redistribute"); + current_offset_particles_for_partition[0] = 0; + int myTotalNbParticles = 0; + for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ + myTotalNbParticles += current_my_nb_particles_per_partition[idxPartition]; + current_offset_particles_for_partition[idxPartition+1] = current_offset_particles_for_partition[idxPartition] + current_my_nb_particles_per_partition[idxPartition]; + } + assert((*nb_particles) == myTotalNbParticles); + + // Find particles outside my interval + const int nbOutLower = particles_utils::partition_extra<size_particle_positions>(&(*inout_positions_particles)[0], current_my_nb_particles_per_partition[0], + [&](const double val[]){ + const bool isLower = val[2] < mySpatialLowLimit; + return isLower; + }, + [&](const int idx1, const int idx2){ + for(int idx_val = 0 ; idx_val < size_particle_index ; ++idx_val){ + std::swap((*inout_index_particles)[idx1], (*inout_index_particles)[idx2]); + } + + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + for(int idx_val = 0 ; idx_val < size_particle_rhs ; ++idx_val){ + std::swap(inout_rhs_particles[idx_rhs][idx1*size_particle_rhs + idx_val], + inout_rhs_particles[idx_rhs][idx2*size_particle_rhs + idx_val]); + } + } + }); + DEBUG_MSG("[%d] nbOutLower %d\n", my_rank, nbOutLower); + + const int offesetOutLow = (current_partition_size==1? nbOutLower : 0); + + const int nbOutUpper = current_my_nb_particles_per_partition[current_partition_size-1] - offesetOutLow - (particles_utils::partition_extra<size_particle_positions>( + &(*inout_positions_particles)[(current_offset_particles_for_partition[current_partition_size-1]+offesetOutLow)*size_particle_positions], + myTotalNbParticles - (current_offset_particles_for_partition[current_partition_size-1]+offesetOutLow), + [&](const double val[]){ + const bool isUpper = mySpatialUpLimit <= val[2]; + return !isUpper; + }, + [&](const int idx1, const int idx2){ + for(int idx_val = 0 ; idx_val < size_particle_index ; ++idx_val){ + std::swap((*inout_index_particles)[idx1], (*inout_index_particles)[idx2]); + } + + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + for(int idx_val = 0 ; idx_val < size_particle_rhs ; ++idx_val){ + std::swap(inout_rhs_particles[idx_rhs][idx1*size_particle_rhs + idx_val], + inout_rhs_particles[idx_rhs][idx2*size_particle_rhs + idx_val]); + } + } + })); + DEBUG_MSG("[%d] nbOutUpper %d\n", my_rank, nbOutUpper); + + // Exchange number + int eventsBeforeWaitall = 0; + int nbNewFromLow = 0; + int nbNewFromUp = 0; + std::unique_ptr<double[]> newParticlesLow; + std::unique_ptr<double[]> newParticlesUp; + std::unique_ptr<int[]> newParticlesLowIndexes; + std::unique_ptr<int[]> newParticlesUpIndexes; + std::vector<std::unique_ptr<double[]>> newParticlesLowRhs(in_nb_rhs); + std::vector<std::unique_ptr<double[]>> newParticlesUpRhs(in_nb_rhs); + + { + assert(whatNext.size() == 0); + assert(mpiRequests.size() == 0); + + whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_NB_LOW, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&nbNewFromLow, 1, MPI_INT, (my_rank-1+nb_processes)%nb_processes, TAG_UP_LOW_MOVED_NB_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + eventsBeforeWaitall += 1; + + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(const_cast<int*>(&nbOutLower), 1, MPI_INT, (my_rank-1+nb_processes)%nb_processes, TAG_LOW_UP_MOVED_NB_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + + if(nbOutLower){ + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(&(*inout_positions_particles)[0], nbOutLower*size_particle_positions, MPI_DOUBLE, (my_rank-1+nb_processes)%nb_processes, TAG_LOW_UP_MOVED_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(&(*inout_index_particles)[0], nbOutLower, MPI_INT, (my_rank-1+nb_processes)%nb_processes, TAG_LOW_UP_MOVED_PARTICLES_INDEXES, + MPI_COMM_WORLD, &mpiRequests.back())); + + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(&inout_rhs_particles[idx_rhs][0], nbOutLower*size_particle_rhs, MPI_DOUBLE, (my_rank-1+nb_processes)%nb_processes, TAG_LOW_UP_MOVED_PARTICLES_RHS+idx_rhs, + MPI_COMM_WORLD, &mpiRequests.back())); + } + } + + whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_NB_UP, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&nbNewFromUp, 1, MPI_INT, (my_rank+1)%nb_processes, TAG_LOW_UP_MOVED_NB_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + eventsBeforeWaitall += 1; + + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(const_cast<int*>(&nbOutUpper), 1, MPI_INT, (my_rank+1)%nb_processes, TAG_UP_LOW_MOVED_NB_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + + if(nbOutUpper){ + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(&(*inout_positions_particles)[(myTotalNbParticles-nbOutUpper)*size_particle_positions], nbOutUpper*size_particle_positions, MPI_DOUBLE, (my_rank+1)%nb_processes, TAG_UP_LOW_MOVED_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(&(*inout_index_particles)[(myTotalNbParticles-nbOutUpper)], nbOutUpper, MPI_INT, (my_rank+1)%nb_processes, TAG_UP_LOW_MOVED_PARTICLES_INDEXES, + MPI_COMM_WORLD, &mpiRequests.back())); + + + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Isend(&inout_rhs_particles[idx_rhs][(myTotalNbParticles-nbOutUpper)*size_particle_rhs], nbOutUpper*size_particle_rhs, MPI_DOUBLE, (my_rank+1)%nb_processes, TAG_UP_LOW_MOVED_PARTICLES_RHS+idx_rhs, + MPI_COMM_WORLD, &mpiRequests.back())); + } + } + + while(mpiRequests.size() && eventsBeforeWaitall){ + DEBUG_MSG("eventsBeforeWaitall %d\n", eventsBeforeWaitall); + + int idxDone = mpiRequests.size(); + { + TIMEZONE("waitany_move"); + AssertMpi(MPI_Waitany(mpiRequests.size(), mpiRequests.data(), &idxDone, MPI_STATUSES_IGNORE)); + DEBUG_MSG("MPI_Waitany eventsBeforeWaitall %d\n", eventsBeforeWaitall); + } + const std::pair<Action, int> releasedAction = whatNext[idxDone]; + std::swap(mpiRequests[idxDone], mpiRequests[mpiRequests.size()-1]); + std::swap(whatNext[idxDone], whatNext[mpiRequests.size()-1]); + mpiRequests.pop_back(); + whatNext.pop_back(); + + if(releasedAction.first == RECV_MOVE_NB_LOW){ + DEBUG_MSG("[%d] nbNewFromLow %d from %d\n", my_rank, nbNewFromLow, (my_rank-1+nb_processes)%nb_processes); + + if(nbNewFromLow){ + assert(newParticlesLow == nullptr); + newParticlesLow.reset(new double[nbNewFromLow*size_particle_positions]); + whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_LOW, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&newParticlesLow[0], nbNewFromLow*size_particle_positions, MPI_DOUBLE, (my_rank-1+nb_processes)%nb_processes, TAG_UP_LOW_MOVED_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + + newParticlesLowIndexes.reset(new int[nbNewFromLow]); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&newParticlesLowIndexes[0], nbNewFromLow, MPI_INT, (my_rank-1+nb_processes)%nb_processes, TAG_UP_LOW_MOVED_PARTICLES_INDEXES, + MPI_COMM_WORLD, &mpiRequests.back())); + + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + newParticlesLowRhs[idx_rhs].reset(new double[nbNewFromLow]); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&newParticlesLowRhs[idx_rhs][0], nbNewFromLow*size_particle_rhs, MPI_DOUBLE, (my_rank-1+nb_processes)%nb_processes, TAG_UP_LOW_MOVED_PARTICLES_RHS+idx_rhs, + MPI_COMM_WORLD, &mpiRequests.back())); + } + } + eventsBeforeWaitall -= 1; + } + else if(releasedAction.first == RECV_MOVE_NB_UP){ + DEBUG_MSG("[%d] nbNewFromUp %d from %d\n", my_rank, nbNewFromUp, (my_rank+1)%nb_processes); + + if(nbNewFromUp){ + assert(newParticlesUp == nullptr); + newParticlesUp.reset(new double[nbNewFromUp*size_particle_positions]); + whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_UP, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&newParticlesUp[0], nbNewFromUp*size_particle_positions, MPI_DOUBLE, (my_rank+1)%nb_processes, TAG_LOW_UP_MOVED_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); + + newParticlesUpIndexes.reset(new int[nbNewFromUp]); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&newParticlesUpIndexes[0], nbNewFromUp, MPI_INT, (my_rank+1)%nb_processes, TAG_LOW_UP_MOVED_PARTICLES_INDEXES, + MPI_COMM_WORLD, &mpiRequests.back())); + + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + newParticlesUpRhs[idx_rhs].reset(new double[nbNewFromUp]); + whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); + mpiRequests.emplace_back(); + AssertMpi(MPI_Irecv(&newParticlesUpRhs[idx_rhs][0], nbNewFromUp*size_particle_rhs, MPI_DOUBLE, (my_rank+1)%nb_processes, TAG_LOW_UP_MOVED_PARTICLES_RHS+idx_rhs, + MPI_COMM_WORLD, &mpiRequests.back())); + } + } + eventsBeforeWaitall -= 1; + } + } + } + + if(mpiRequests.size()){ + DEBUG_MSG("MPI_Waitall\n"); + // TODO Proceed when received + TIMEZONE("waitall-move"); + AssertMpi(MPI_Waitall(mpiRequests.size(), mpiRequests.data(), MPI_STATUSES_IGNORE)); + mpiRequests.clear(); + } + + // Exchange particles + { + TIMEZONE("apply_pbc_z_new_particles"); + if(nbNewFromLow){ + assert(newParticlesLow.get() != nullptr); + apply_pbc_z_new_particles(newParticlesLow.get(), nbNewFromLow); + } + if(nbNewFromUp){ + assert(newParticlesUp.get() != nullptr); + apply_pbc_z_new_particles(newParticlesUp.get(), nbNewFromUp); + } + } + + // Realloc an merge + if(nbNewFromLow + nbNewFromUp != 0){ + TIMEZONE("realloc_and_merge"); + const int nbOldParticlesInside = myTotalNbParticles - nbOutLower - nbOutUpper; + const int myTotalNewNbParticles = nbOldParticlesInside + nbNewFromLow + nbNewFromUp; + + DEBUG_MSG("[%d] nbOldParticlesInside %d\n", my_rank, nbOldParticlesInside); + DEBUG_MSG("[%d] myTotalNewNbParticles %d\n", my_rank, myTotalNewNbParticles); + + if(myTotalNewNbParticles > myTotalNbParticlesAllocated){ + DEBUG_MSG("[%d] reuse array\n", my_rank); + std::unique_ptr<double[]> newArray(new double[myTotalNewNbParticles*size_particle_positions]); + std::unique_ptr<int[]> newArrayIndexes(new int[myTotalNewNbParticles*size_particle_positions]); + std::vector<std::unique_ptr<double[]>> newArrayRhs(in_nb_rhs); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + newArrayRhs[idx_rhs].reset(new double[myTotalNewNbParticles*size_particle_positions]); + } + + if(nbNewFromLow){ + memcpy(&newArray[0], &newParticlesLow[0], sizeof(double)*nbNewFromLow*size_particle_positions); + memcpy(&newArrayIndexes[0], &newParticlesLowIndexes[0], sizeof(int)*nbNewFromLow); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + memcpy(&newArrayRhs[idx_rhs][0], &newParticlesLowRhs[idx_rhs][0], sizeof(double)*nbNewFromLow); + } + } + + memcpy(&newArray[nbNewFromLow*size_particle_positions], &(*inout_positions_particles)[nbOutLower*size_particle_positions], sizeof(double)*nbOldParticlesInside*size_particle_positions); + memcpy(&newArrayIndexes[nbNewFromLow], &(*inout_positions_particles)[nbOutLower], sizeof(int)*nbOldParticlesInside); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + memcpy(&newArrayRhs[idx_rhs][nbNewFromLow], &inout_positions_particles[idx_rhs][nbOutLower], sizeof(double)*nbOldParticlesInside); + } + + if(nbNewFromUp){ + memcpy(&newArray[(nbNewFromLow+nbOldParticlesInside)*size_particle_positions], &newParticlesUp[0], sizeof(double)*nbNewFromUp*size_particle_positions); + memcpy(&newArrayIndexes[(nbNewFromLow+nbOldParticlesInside)], &newParticlesUpIndexes[0], sizeof(int)*nbNewFromUp); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + memcpy(&newArrayRhs[idx_rhs][(nbNewFromLow+nbOldParticlesInside)], &newParticlesUpRhs[idx_rhs][0], sizeof(double)*nbNewFromUp); + } + } + + (*inout_positions_particles) = std::move(newArray); + (*inout_index_particles) = std::move(newArrayIndexes); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + inout_rhs_particles[idx_rhs] = std::move(newArrayRhs[idx_rhs]); + } + + // not needed myTotalNbParticlesAllocated = myTotalNewNbParticles; + } + else if(nbOutLower < nbNewFromLow){ + DEBUG_MSG("[%d] A array\n", my_rank); + // Less low send thant received from low + const int nbLowToMoveBack = nbNewFromLow-nbOutLower; + // Copy received from low in two part + if(nbNewFromLow){ + memcpy(&(*inout_positions_particles)[0], &newParticlesLow[0], sizeof(double)*nbOutLower*size_particle_positions); + memcpy(&(*inout_index_particles)[0], &newParticlesLowIndexes[0], sizeof(int)*nbOutLower); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + memcpy(&inout_rhs_particles[idx_rhs][0], &newParticlesLowRhs[idx_rhs][0], sizeof(double)*nbOutLower); + } + } + if(nbNewFromLow){ + memcpy(&(*inout_positions_particles)[(nbOutLower+nbOldParticlesInside)*size_particle_positions], &newParticlesLow[nbOutLower*size_particle_positions], sizeof(double)*nbLowToMoveBack*size_particle_positions); + memcpy(&(*inout_index_particles)[(nbOutLower+nbOldParticlesInside)], &newParticlesLowIndexes[nbOutLower], sizeof(int)*nbLowToMoveBack); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + memcpy(&inout_rhs_particles[idx_rhs][(nbOutLower+nbOldParticlesInside)], &newParticlesLowRhs[idx_rhs][nbOutLower], sizeof(double)*nbLowToMoveBack); + } + } + if(nbNewFromUp){ + memcpy(&(*inout_positions_particles)[(nbNewFromLow+nbOldParticlesInside)*size_particle_positions], &newParticlesUp[0], sizeof(double)*nbNewFromUp*size_particle_positions); + memcpy(&(*inout_index_particles)[(nbNewFromLow+nbOldParticlesInside)], &newParticlesUpIndexes[0], sizeof(int)*nbNewFromUp); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + memcpy(&inout_rhs_particles[idx_rhs][(nbNewFromLow+nbOldParticlesInside)], &newParticlesUpRhs[0], sizeof(double)*nbNewFromUp); + } + } + } + else{ + const int nbUpToMoveBegin = nbOutLower - nbNewFromLow; + if(nbUpToMoveBegin <= nbNewFromUp){ + DEBUG_MSG("[%d] B array\n", my_rank); + if(nbNewFromLow){ + memcpy(&(*inout_positions_particles)[0], &newParticlesLow[0], sizeof(double)*nbNewFromLow*size_particle_positions); + memcpy(&(*inout_index_particles)[0], &newParticlesLowIndexes[0], sizeof(int)*nbNewFromLow); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + memcpy(&inout_rhs_particles[idx_rhs][0], &newParticlesLowRhs[idx_rhs][0], sizeof(double)*nbNewFromLow); + } + } + if(nbNewFromUp){ + memcpy(&(*inout_positions_particles)[nbNewFromLow*size_particle_positions], &newParticlesUp[0], sizeof(double)*nbUpToMoveBegin*size_particle_positions); + memcpy(&(*inout_index_particles)[nbNewFromLow], &newParticlesUpIndexes[0], sizeof(int)*nbUpToMoveBegin); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + memcpy(&inout_rhs_particles[idx_rhs][0], &newParticlesLowRhs[idx_rhs][0], sizeof(double)*nbNewFromLow); + } + } + if(nbNewFromUp){ + memcpy(&(*inout_positions_particles)[(nbOutLower+nbOldParticlesInside)*size_particle_positions], &newParticlesUp[nbUpToMoveBegin*size_particle_positions], + sizeof(double)*(nbNewFromUp-nbUpToMoveBegin)*size_particle_positions); + memcpy(&(*inout_index_particles)[(nbOutLower+nbOldParticlesInside)], &newParticlesUpIndexes[nbUpToMoveBegin], + sizeof(int)*(nbNewFromUp-nbUpToMoveBegin)); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + memcpy(&inout_rhs_particles[idx_rhs][(nbOutLower+nbOldParticlesInside)], &newParticlesUpRhs[idx_rhs][nbUpToMoveBegin], + sizeof(double)*(nbNewFromUp-nbUpToMoveBegin)); + } + } + } + else{ + DEBUG_MSG("[%d] C array\n", my_rank); + if(nbNewFromLow){ + memcpy(&(*inout_positions_particles)[0], &newParticlesLow[0], sizeof(double)*nbNewFromLow*size_particle_positions); + memcpy(&(*inout_index_particles)[0], &newParticlesLowIndexes[0], sizeof(int)*nbNewFromLow); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + memcpy(&inout_rhs_particles[idx_rhs][0], &newParticlesLowRhs[idx_rhs][0], sizeof(double)*nbNewFromLow); + } + } + if(nbNewFromUp){ + memcpy(&(*inout_positions_particles)[0], &newParticlesUp[0], sizeof(double)*nbNewFromUp*size_particle_positions); + memcpy(&(*inout_index_particles)[0], &newParticlesUpIndexes[0], sizeof(int)*nbNewFromUp); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + memcpy(&inout_rhs_particles[idx_rhs][0], &newParticlesUpRhs[idx_rhs][0], sizeof(double)*nbNewFromUp); + } + } + const int padding = nbOutLower - nbNewFromLow+nbNewFromUp; + memcpy(&(*inout_positions_particles)[(nbNewFromLow+nbNewFromUp)*size_particle_positions], + &(*inout_positions_particles)[(nbOutLower+nbOldParticlesInside-padding)*size_particle_positions], + sizeof(double)*padding*size_particle_positions); + memcpy(&(*inout_index_particles)[(nbNewFromLow+nbNewFromUp)], + &(*inout_index_particles)[(nbOutLower+nbOldParticlesInside-padding)], + sizeof(int)*padding); + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + memcpy(&inout_rhs_particles[idx_rhs][(nbNewFromLow+nbNewFromUp)], + &inout_rhs_particles[idx_rhs][(nbOutLower+nbOldParticlesInside-padding)], + sizeof(double)*padding); + } + } + } + myTotalNbParticles = myTotalNewNbParticles; + } + + { + TIMEZONE("apply_pbc_xy"); + apply_pbc_xy((*inout_positions_particles).get(), nbNewFromUp+nbNewFromLow); + } + + // Partitions all particles + { + TIMEZONE("repartition"); + particles_utils::partition_extra_z<size_particle_positions>(&(*inout_positions_particles)[0], + myTotalNbParticles,current_partition_size, + current_my_nb_particles_per_partition, current_offset_particles_for_partition.get(), + [&](const int idxPartition){ + return (idxPartition+1)*spatialPartitionWidth + mySpatialLowLimit; + }, + [&](const int idx1, const int idx2){ + for(int idx_val = 0 ; idx_val < size_particle_index ; ++idx_val){ + std::swap((*inout_index_particles)[idx1], (*inout_index_particles)[idx2]); + } + + for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ + for(int idx_val = 0 ; idx_val < size_particle_rhs ; ++idx_val){ + std::swap(inout_rhs_particles[idx_rhs][idx1*size_particle_rhs + idx_val], + inout_rhs_particles[idx_rhs][idx2*size_particle_rhs + idx_val]); + } + } + }); + + {// TODO remove + for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ + assert(current_my_nb_particles_per_partition[idxPartition] == + current_offset_particles_for_partition[idxPartition+1] - current_offset_particles_for_partition[idxPartition]); + const double limitPartition = (idxPartition+1)*spatialPartitionWidth + mySpatialLowLimit; + for(int idx = 0 ; idx < current_offset_particles_for_partition[idxPartition+1] ; ++idx){ + assert((*inout_positions_particles)[idx*3+2] < limitPartition); + } + for(int idx = current_offset_particles_for_partition[idxPartition+1] ; idx < myTotalNbParticles ; ++idx){ + assert((*inout_positions_particles)[idx*3+2] >= limitPartition); + } + } + } + } + (*nb_particles) = myTotalNbParticles; + + assert(mpiRequests.size() == 0); + } + + virtual void apply_pbc_z_new_particles(double* newParticlesLow, const int nbNewFromLow) const = 0; + virtual void apply_pbc_xy(double* inout_positions_particles, const int nbNew) const = 0; + + //////////////////////////////////////////////////////////////////////////// + + virtual void move_particles(double particles_positions[], + const int nb_particles, + const std::unique_ptr<double[]> particles_current_rhs[], + const int nb_rhs, const double dt) const = 0; +}; + +#endif diff --git a/bfps/cpp/particles/abstract_particles_input.hpp b/bfps/cpp/particles/abstract_particles_input.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d3d2e276d2fb73ade85d52503e2ea3e0513bce9c --- /dev/null +++ b/bfps/cpp/particles/abstract_particles_input.hpp @@ -0,0 +1,21 @@ +#ifndef ABSTRACT_PARTICLES_INPUT_HPP +#define ABSTRACT_PARTICLES_INPUT_HPP + +#include <tuple> + + +class abstract_particles_input { +public: + virtual ~abstract_particles_input(){} + + virtual int getTotalNbParticles() = 0; + virtual int getLocalNbParticles() = 0; + virtual int getNbRhs() = 0; + + virtual std::unique_ptr<double[]> getMyParticles() = 0; + virtual std::unique_ptr<int[]> getMyParticlesIndexes() = 0; + virtual std::vector<std::unique_ptr<double[]>> getMyRhs() = 0; +}; + + +#endif diff --git a/bfps/cpp/particles/abstract_particles_output.hpp b/bfps/cpp/particles/abstract_particles_output.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2f80704dc7735e5756cac650083aad072ef81f6f --- /dev/null +++ b/bfps/cpp/particles/abstract_particles_output.hpp @@ -0,0 +1,171 @@ +#ifndef ABSTRACT_PARTICLES_OUTPUT +#define ABSTRACT_PARTICLES_OUTPUT + +#include <memory> +#include <vector> +#include <cassert> +#include <algorithm> +#include <cstddef> + +#include "base.hpp" +#include "particles_utils.hpp" +#include "alltoall_exchanger.hpp" +#include "scope_timer.hpp" + +#ifndef AssertMpi +#define AssertMpi(X) if(MPI_SUCCESS != (X)) { printf("MPI Error at line %d\n",__LINE__); fflush(stdout) ; throw std::runtime_error("Stop from from mpi erro"); } +#endif + + +template <int size_particle_positions, int size_particle_rhs> +class abstract_particles_output { + struct movable_particle{ + int global_idx; + double positions[size_particle_positions]; + double rhs[size_particle_rhs]; + }; + + void create_movable_mpi_data_type(){ + /** Type in order in the struct */ + MPI_Datatype type[3] = { MPI_INT, MPI_DOUBLE, MPI_DOUBLE }; + /** Number of occurence of each type */ + int blocklen[3] = { 1, size_particle_positions, size_particle_rhs }; + /** Position offset from struct starting address */ + MPI_Aint disp[3]; + disp[0] = offsetof(movable_particle,global_idx); + disp[1] = offsetof(movable_particle,positions); + disp[2] = offsetof(movable_particle,rhs); + /** Create the type */ + AssertMpi( MPI_Type_create_struct(3, blocklen, disp, type, &mpi_movable_particle_type) ); + /** Commit it*/ + AssertMpi( MPI_Type_commit(&mpi_movable_particle_type) ); + } + + MPI_Datatype mpi_movable_particle_type; + + MPI_Comm mpi_com; + + int my_rank; + int nb_processes; + + std::unique_ptr<movable_particle[]> buffer_particles_send; + int nb_particles_allocated_send; + const int total_nb_particles; + + std::unique_ptr<movable_particle[]> buffer_particles_recv; + int nb_particles_allocated_recv; + +protected: + MPI_Comm& getCom(){ + return mpi_com; + } + + int getTotalNbParticles() const { + return total_nb_particles; + } + +public: + abstract_particles_output(MPI_Comm in_mpi_com, const int inTotalNbParticles) + : mpi_com(in_mpi_com), my_rank(-1), nb_processes(-1), + nb_particles_allocated_send(-1), total_nb_particles(inTotalNbParticles), + nb_particles_allocated_recv(-1){ + + AssertMpi(MPI_Comm_rank(mpi_com, &my_rank)); + AssertMpi(MPI_Comm_size(mpi_com, &nb_processes)); + + create_movable_mpi_data_type(); + } + + virtual ~abstract_particles_output(){ + MPI_Type_free(&mpi_movable_particle_type); + } + + void releaseMemory(){ + buffer_particles_send.release(); + nb_particles_allocated_send = -1; + buffer_particles_recv.release(); + nb_particles_allocated_recv = -1; + } + + void save(const double input_particles_positions[], const double input_particles_rhs[], + const int index_particles[], const int nb_particles, const int idx_time_step){ + TIMEZONE("abstract_particles_output::save"); + assert(total_nb_particles != -1); + DEBUG_MSG("[%d] total_nb_particles %d \n", my_rank, total_nb_particles); + DEBUG_MSG("[%d] nb_particles %d to distribute for saving \n", my_rank, nb_particles); + + { + TIMEZONE("sort"); + + if(nb_particles_allocated_send < nb_particles){ + buffer_particles_send.reset(new movable_particle[nb_particles]); + nb_particles_allocated_send = nb_particles; + } + + for(int idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + buffer_particles_send[idx_part].global_idx = index_particles[idx_part]; + for(int idx_val = 0 ; idx_val < size_particle_positions ; ++idx_val){ + buffer_particles_send[idx_part].positions[idx_val] = input_particles_positions[idx_part*size_particle_positions + idx_val]; + } + for(int idx_val = 0 ; idx_val < size_particle_rhs ; ++idx_val){ + buffer_particles_send[idx_part].rhs[idx_val] = input_particles_rhs[idx_part*size_particle_rhs + idx_val]; + } + } + + std::sort(&buffer_particles_send[0], &buffer_particles_send[nb_particles], [](const movable_particle& p1, const movable_particle& p2){ + return p1.global_idx < p2.global_idx; + }); + } + + const particles_utils::IntervalSplitter<int> particles_splitter(total_nb_particles, nb_processes, my_rank); + DEBUG_MSG("[%d] nb_particles_per_proc %d for saving\n", my_rank, particles_splitter.getMySize()); + + std::vector<int> nb_particles_to_send(nb_processes, 0); + for(int idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + nb_particles_to_send[particles_splitter.getOwner(buffer_particles_send[idx_part].global_idx)] += 1; + } + + alltoall_exchanger exchanger(mpi_com, std::move(nb_particles_to_send)); + // nb_particles_to_send is invalid after here + + const int nb_to_receive = exchanger.getTotalToRecv(); + + if(nb_particles_allocated_recv < nb_to_receive){ + buffer_particles_recv.reset(new movable_particle[nb_to_receive]); + nb_particles_allocated_recv = nb_to_receive; + } + + exchanger.alltoallv(buffer_particles_send.get(), buffer_particles_recv.get(), mpi_movable_particle_type); + + // Trick re-use the buffer to have only double + + if(nb_particles_allocated_send < nb_to_receive){ + buffer_particles_send.reset(new movable_particle[nb_to_receive]); + nb_particles_allocated_send = nb_to_receive; + } + + double* buffer_positions_dptr = reinterpret_cast<double*>(buffer_particles_recv.get()); + double* buffer_rhs_dptr = reinterpret_cast<double*>(buffer_particles_send.get()); + { + TIMEZONE("copy"); + for(int idx_part = 0 ; idx_part < nb_to_receive ; ++idx_part){ + for(int idx_val = 0 ; idx_val < size_particle_positions ; ++idx_val){ + buffer_positions_dptr[idx_part*size_particle_positions + idx_val] + = buffer_particles_recv[idx_part].positions[idx_val]; + } + // Can be done here or before "positions" copy + for(int idx_val = 0 ; idx_val < size_particle_rhs ; ++idx_val){ + buffer_rhs_dptr[idx_part*size_particle_rhs + idx_val] + = buffer_particles_recv[idx_part].rhs[idx_val]; + } + } + } + + write(idx_time_step, buffer_positions_dptr, buffer_rhs_dptr, nb_to_receive, particles_splitter.getMyOffset()); + } + + virtual void write(const int idx_time_step, const double* positions, const double* rhs, + const int nb_particles, const int particles_idx_offset) = 0; +}; + +#endif diff --git a/bfps/cpp/particles/alltoall_exchanger.hpp b/bfps/cpp/particles/alltoall_exchanger.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5644a8b4d0b8f76b6ffbc5dff2ea5915ee3c65b5 --- /dev/null +++ b/bfps/cpp/particles/alltoall_exchanger.hpp @@ -0,0 +1,124 @@ +#ifndef ALLTOALL_EXCHANGER_HPP +#define ALLTOALL_EXCHANGER_HPP + +#include <mpi.h> +#include <cassert> + +#include "base.hpp" +#include "particles_utils.hpp" +#include "scope_timer.hpp" + +#ifndef AssertMpi +#define AssertMpi(X) if(MPI_SUCCESS != (X)) { printf("MPI Error at line %d\n",__LINE__); fflush(stdout) ; throw std::runtime_error("Stop from from mpi erro"); } +#endif + +class GetMpiType{ + const MPI_Datatype type; +public: + explicit GetMpiType(const int&) : type(MPI_INT){} + explicit GetMpiType(const double&) : type(MPI_DOUBLE){} + explicit GetMpiType(const float&) : type(MPI_FLOAT){} + explicit GetMpiType(const char&) : type(MPI_CHAR){} + explicit GetMpiType(const long&) : type(MPI_LONG){} + + /*do not make it explicit*/ operator MPI_Datatype() const { return type; } +}; + +class alltoall_exchanger { + const MPI_Comm mpi_com; + + int my_rank; + int nb_processes; + + const std::vector<int> nb_items_to_send; + + std::vector<int> offset_items_to_send; + + std::vector<int> nb_items_to_sendrecv_all; + std::vector<int> nb_items_to_recv; + std::vector<int> offset_items_to_recv; + + int total_to_recv; + +public: + alltoall_exchanger(const MPI_Comm& in_mpi_com, std::vector<int>/*no ref to move here*/ in_nb_items_to_send) + :mpi_com(in_mpi_com), nb_items_to_send(std::move(in_nb_items_to_send)), total_to_recv(0){ + + AssertMpi(MPI_Comm_rank(mpi_com, &my_rank)); + AssertMpi(MPI_Comm_size(mpi_com, &nb_processes)); + + assert(nb_items_to_send.size() == nb_processes); + + offset_items_to_send.resize(nb_processes+1, 0); + for(int idx_proc = 0 ; idx_proc < nb_processes ; ++idx_proc){ + offset_items_to_send[idx_proc+1] = offset_items_to_send[idx_proc] + + nb_items_to_send[idx_proc]; + } + + nb_items_to_sendrecv_all.resize(nb_processes*nb_processes); + AssertMpi(MPI_Allgather(const_cast<int*>(nb_items_to_send.data()), nb_processes, MPI_INT, + nb_items_to_sendrecv_all.data(), nb_processes, MPI_INT, + mpi_com)); + + nb_items_to_recv.resize(nb_processes, 0); + offset_items_to_recv.resize(nb_processes+1, 0); + for(int idx_proc = 0 ; idx_proc < nb_processes ; ++idx_proc){ + const int nbrecv = nb_items_to_sendrecv_all[idx_proc*nb_processes + my_rank]; + total_to_recv += nbrecv; + nb_items_to_recv[idx_proc] = nbrecv; + offset_items_to_recv[idx_proc+1] = nb_items_to_recv[idx_proc] + + offset_items_to_recv[idx_proc]; + } + } + + int getTotalToRecv() const{ + return total_to_recv; + } + + template <class ItemType> + void alltoallv(const ItemType in_to_send[], + ItemType out_to_recv[], const MPI_Datatype& in_type) const { + TIMEZONE("alltoallv"); + AssertMpi(MPI_Alltoallv(const_cast<ItemType*>(in_to_send), const_cast<int*>(nb_items_to_send.data()), + const_cast<int*>(offset_items_to_send.data()), in_type, out_to_recv, + const_cast<int*>(nb_items_to_recv.data()), const_cast<int*>(offset_items_to_recv.data()), in_type, + mpi_com)); + } + + template <class ItemType> + void alltoallv(const ItemType in_to_send[], + ItemType out_to_recv[]) const { + alltoallv<ItemType>(in_to_send, out_to_recv, GetMpiType(ItemType())); + } + + template <class ItemType> + void alltoallv(const ItemType in_to_send[], + ItemType out_to_recv[], const MPI_Datatype& in_type, const int in_nb_values_per_item) const { + TIMEZONE("alltoallv"); + std::vector<int> nb_items_to_send_tmp = nb_items_to_send; + particles_utils::transform(nb_items_to_send_tmp.begin(), nb_items_to_send_tmp.end(), nb_items_to_send_tmp.begin(), + [&](const int val) -> int { return val * in_nb_values_per_item ;}); + std::vector<int> offset_items_to_send_tmp = offset_items_to_send; + particles_utils::transform(offset_items_to_send_tmp.begin(), offset_items_to_send_tmp.end(), offset_items_to_send_tmp.begin(), + [&](const int val) -> int { return val * in_nb_values_per_item ;}); + std::vector<int> nb_items_to_recv_tmp = nb_items_to_recv; + particles_utils::transform(nb_items_to_recv_tmp.begin(), nb_items_to_recv_tmp.end(), nb_items_to_recv_tmp.begin(), + [&](const int val) -> int { return val * in_nb_values_per_item ;}); + std::vector<int> offset_items_to_recv_tmp = offset_items_to_recv; + particles_utils::transform(offset_items_to_recv_tmp.begin(), offset_items_to_recv_tmp.end(), offset_items_to_recv_tmp.begin(), + [&](const int val) -> int { return val * in_nb_values_per_item ;}); + + AssertMpi(MPI_Alltoallv(const_cast<ItemType*>(in_to_send), const_cast<int*>(nb_items_to_send_tmp.data()), + const_cast<int*>(offset_items_to_send_tmp.data()), in_type, out_to_recv, + const_cast<int*>(nb_items_to_recv_tmp.data()), const_cast<int*>(offset_items_to_recv_tmp.data()), in_type, + mpi_com)); + } + + template <class ItemType> + void alltoallv(const ItemType in_to_send[], + ItemType out_to_recv[], const int in_nb_values_per_item) const { + alltoallv<ItemType>(in_to_send, out_to_recv, GetMpiType(ItemType()), in_nb_values_per_item); + } +}; + +#endif diff --git a/bfps/cpp/particles/field_accessor.hpp b/bfps/cpp/particles/field_accessor.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e8937d31d79c6d7fc2ed65de3cf2d93b4468d7e1 --- /dev/null +++ b/bfps/cpp/particles/field_accessor.hpp @@ -0,0 +1,46 @@ +#ifndef FIELD_ACCESSOR_HPP +#define FIELD_ACCESSOR_HPP + +#include <algorithm> +#include <array> + + +class field_accessor { + static const int nb_dim = 3; + + const double* field_date; + std::array<size_t,3> local_field_dims; + std::array<size_t,3> dim_offset; + +public: + field_accessor(const double* in_field_date, const std::array<size_t,3>& in_dims, + const std::array<size_t,3>& in_dim_offset) + : field_date(in_field_date), local_field_dims(in_dims), + dim_offset(in_dim_offset){ + } + + ~field_accessor(){} + + const double& getValue(const size_t in_index, const int in_dim) const { + assert(in_index < local_field_dims[0]*local_field_dims[1]*local_field_dims[2]); + return field_date[in_index*nb_dim + in_dim]; + } + + size_t getIndexFromGlobalPosition(const size_t in_global_x, const size_t in_global_y, const size_t in_global_z) const { + return getIndexFromLocalPosition(in_global_x - dim_offset[0], + in_global_y - dim_offset[1], + in_global_z - dim_offset[2]); + } + + size_t getIndexFromLocalPosition(const size_t in_local_x, const size_t in_local_y, const size_t in_local_z) const { + assert(in_local_x < local_field_dims[0]); + assert(in_local_y < local_field_dims[1]); + assert(in_local_z < local_field_dims[2]); + return (((in_local_z)*local_field_dims[1] + + in_local_y)*(local_field_dims[2]) + // TODO there was a +2 on local_field_dims[2] + in_local_x); + } +}; + + +#endif diff --git a/bfps/cpp/particles/main_tocompile.cpp b/bfps/cpp/particles/main_tocompile.cpp new file mode 100644 index 0000000000000000000000000000000000000000..38db2ec7bd4d9f1cf0d0fcc8b8cacfdd011652a6 --- /dev/null +++ b/bfps/cpp/particles/main_tocompile.cpp @@ -0,0 +1,177 @@ + +#include <mpi.h> +#include <memory> +#include <cassert> +#include <cmath> +#include <cfenv> + +#include "particles_system.hpp" +#include "particles_interp_spline.hpp" +#include "abstract_particles_input.hpp" +#include "particles_input_hdf5.hpp" + +class random_particles : public abstract_particles_input { + const int nb_particles; + const double box_width; + const double lower_limit; + const double upper_limit; + int my_rank; + int nb_processes; + +public: + random_particles(const int in_nb_particles, const double in_box_width, + const double in_lower_limit, const double in_upper_limit, + const int in_my_rank, const int in_nb_processes) + : nb_particles(in_nb_particles), box_width(in_box_width), + lower_limit(in_lower_limit), upper_limit(in_upper_limit), + my_rank(in_my_rank), nb_processes(in_nb_processes){ + } + + int getTotalNbParticles() final{ + return nb_processes*nb_particles; + } + + int getLocalNbParticles() final{ + return nb_particles; + } + + int getNbRhs() final{ + return 1; + } + + std::unique_ptr<double[]> getMyParticles() final { + std::unique_ptr<double[]> particles(new double[nb_particles*3]); + + for(int idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + particles[idx_part*3+0] = drand48() * box_width; + particles[idx_part*3+1] = drand48() * box_width; + particles[idx_part*3+2] = (drand48() * (upper_limit-lower_limit)) + + lower_limit; + } + + return std::move(particles); + } + + std::unique_ptr<int[]> getMyParticlesIndexes() final { + std::unique_ptr<int[]> indexes(new int[nb_particles]); + + for(int idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + indexes[idx_part] = idx_part + my_rank*nb_particles; + } + + return std::move(indexes); + } + + std::vector<std::unique_ptr<double[]>> getMyRhs() final { + std::vector<std::unique_ptr<double[]>> rhs(1); + rhs[0].reset(new double[nb_particles*3]); + std::fill(&rhs[0][0], &rhs[0][nb_particles*3], 0); + + return std::move(rhs); + } +}; + +int myrank; + +int main(int argc, char** argv){ + feenableexcept(FE_INVALID | FE_OVERFLOW); + + MPI_Init(&argc, &argv); + { + int my_rank; + int nb_processes; + AssertMpi(MPI_Comm_rank(MPI_COMM_WORLD, &my_rank)); + AssertMpi(MPI_Comm_size(MPI_COMM_WORLD, &nb_processes)); + myrank = my_rank; // global bfps variable + + const int InterpNbNeighbors = 5; + + const std::array<size_t,3> field_grid_dim{100, 100, 100}; + assert(my_rank < field_grid_dim[0]); + assert(my_rank < field_grid_dim[1]); + assert(my_rank < field_grid_dim[2]); + + const double partitionIntervalSize = double(field_grid_dim[2])/double(nb_processes); + const int myPartitionInterval[2] = { int(partitionIntervalSize*my_rank), (my_rank==nb_processes-1?field_grid_dim[2]:int(partitionIntervalSize*(my_rank+1)))}; + + + const std::array<double,3> spatial_box_width{10., 10., 10.}; + const double spatial_partition_width = spatial_box_width[2]/double(field_grid_dim[2]); + const double my_spatial_low_limit = myPartitionInterval[0]*spatial_partition_width; + const double my_spatial_up_limit = myPartitionInterval[1]*spatial_partition_width; + + if(my_rank == 0){ + std::cout << "spatial_box_width = " << spatial_box_width[0] << " " << spatial_box_width[1] << " " << spatial_box_width[2] << std::endl; + std::cout << "spatial_partition_width = " << spatial_partition_width << std::endl; + std::cout << "my_spatial_low_limit = " << my_spatial_low_limit << std::endl; + std::cout << "my_spatial_up_limit = " << my_spatial_up_limit << std::endl; + } + + std::array<size_t,3> local_field_dims{ field_grid_dim[0], field_grid_dim[1], myPartitionInterval[1]-myPartitionInterval[0]}; + std::array<size_t,3> local_field_offset{ 0, 0, myPartitionInterval[0]}; + + std::unique_ptr<double[]> field_data(new double[local_field_dims[0]*local_field_dims[1]*local_field_dims[2]]); + particles_utils::memzero(field_data.get(), local_field_dims[0]*local_field_dims[1]*local_field_dims[2]); + + + particles_system<particles_interp_spline<InterpNbNeighbors,0>, InterpNbNeighbors> part_sys(field_grid_dim, + spatial_box_width, + spatial_partition_width, + my_spatial_low_limit, + my_spatial_up_limit, + field_data.get(), + local_field_dims, + local_field_offset, + MPI_COMM_WORLD); + + int total_nb_particles; + { + //const int nb_part_to_generate = 1000; + //random_particles generator(nb_part_to_generate, spatial_box_width, my_spatial_low_limit, + // my_spatial_up_limit, my_rank, nb_processes); + std::vector<double> spatial_interval_per_proc(nb_processes+1); + for(int idx_proc = 0 ; idx_proc < nb_processes ; ++idx_proc){ + spatial_interval_per_proc[idx_proc] = partitionIntervalSize*spatial_partition_width*idx_proc; + std::cout << "spatial_interval_per_proc[idx_proc] " << spatial_interval_per_proc[idx_proc] << std::endl; + } + spatial_interval_per_proc[nb_processes] = spatial_box_width[2]; + assert(my_spatial_low_limit == spatial_interval_per_proc[my_rank] || fabs((spatial_interval_per_proc[my_rank]-my_spatial_low_limit)/my_spatial_low_limit) < 1e-13); + assert(my_spatial_up_limit == spatial_interval_per_proc[my_rank+1] || fabs((spatial_interval_per_proc[my_rank+1]-my_spatial_up_limit)/my_spatial_up_limit) < 1e-13); + + particles_input_hdf5<3,3> generator(MPI_COMM_WORLD, "/home/bbramas/Projects/bfps_runs/particles2/N0288_ptest_1e5_particles.h5", + "tracers0", spatial_interval_per_proc); + + total_nb_particles = generator.getTotalNbParticles(); + part_sys.init(generator); + // generator might be in a modified state here. + } + + + particles_output_hdf5<3,3> particles_output_writer(MPI_COMM_WORLD, "/tmp/res.hdf5", total_nb_particles); + particles_output_mpiio<3,3> particles_output_writer_mpi(MPI_COMM_WORLD, "/tmp/res.mpiio", total_nb_particles); + + + part_sys.completeLoop(0.1); + particles_output_writer.save(part_sys.getParticlesPositions(), + part_sys.getParticlesCurrentRhs(), + part_sys.getParticlesIndexes(), + part_sys.getLocalNbParticles(), 1); + particles_output_writer_mpi.save(part_sys.getParticlesPositions(), + part_sys.getParticlesCurrentRhs(), + part_sys.getParticlesIndexes(), + part_sys.getLocalNbParticles(), 1); + + part_sys.completeLoop(0.1); + particles_output_writer.save(part_sys.getParticlesPositions(), + part_sys.getParticlesCurrentRhs(), + part_sys.getParticlesIndexes(), + part_sys.getLocalNbParticles(), 2); + particles_output_writer_mpi.save(part_sys.getParticlesPositions(), + part_sys.getParticlesCurrentRhs(), + part_sys.getParticlesIndexes(), + part_sys.getLocalNbParticles(), 2); + } + MPI_Finalize(); + + return 0; +} diff --git a/bfps/cpp/particles/particles_adams_bashforth.hpp b/bfps/cpp/particles/particles_adams_bashforth.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f326111d506f8fe43696978e16b8947c61499e60 --- /dev/null +++ b/bfps/cpp/particles/particles_adams_bashforth.hpp @@ -0,0 +1,98 @@ +#ifndef PARTICLES_ADAMS_BASHFORTH_HPP +#define PARTICLES_ADAMS_BASHFORTH_HPP + +#include <stdexcept> +#include "scope_timer.hpp" + +template <int size_particle_positions = 3, int size_particle_rhs = 3> +class particles_adams_bashforth { +public: + static const int Max_steps = 6; + + void move_particles(double particles_positions[], + const int nb_particles, + const std::unique_ptr<double[]> particles_rhs[], + const int nb_rhs, const double dt) const{ + TIMEZONE("particles_adams_bashforth::move_particles"); + // TODO full unroll + blocking + switch (nb_rhs){ + case 1: + for(int idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + for(int idx_dim = 0 ; idx_dim < size_particle_positions ; ++idx_dim){ + // dt × [0] + particles_positions[idx_part*size_particle_positions + idx_dim] + += dt * particles_rhs[0][idx_part*size_particle_rhs + idx_dim]; + } + } + break; + case 2: + for(int idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + for(int idx_dim = 0 ; idx_dim < size_particle_positions ; ++idx_dim){ + // dt × (3[0] - [1])/2 + particles_positions[idx_part*size_particle_positions + idx_dim] + += dt * (3.*particles_rhs[0][idx_part*size_particle_rhs + idx_dim] + - particles_rhs[1][idx_part*size_particle_rhs + idx_dim])/2.; + } + } + break; + case 3: + for(int idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + for(int idx_dim = 0 ; idx_dim < size_particle_positions ; ++idx_dim){ + // dt × (23[0] - 16[1] + 5[2])/12 + particles_positions[idx_part*size_particle_positions + idx_dim] + += dt * (23.*particles_rhs[0][idx_part*size_particle_rhs + idx_dim] + - 16.*particles_rhs[1][idx_part*size_particle_rhs + idx_dim] + + 5.*particles_rhs[2][idx_part*size_particle_rhs + idx_dim])/12.; + } + } + break; + case 4: + for(int idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + for(int idx_dim = 0 ; idx_dim < size_particle_positions ; ++idx_dim){ + // dt × (55[0] - 59[1] + 37[2] - 9[3])/24 + particles_positions[idx_part*size_particle_positions + idx_dim] + += dt * (55.*particles_rhs[0][idx_part*size_particle_rhs + idx_dim] + - 59.*particles_rhs[1][idx_part*size_particle_rhs + idx_dim] + + 37.*particles_rhs[2][idx_part*size_particle_rhs + idx_dim] + - 9.*particles_rhs[3][idx_part*size_particle_rhs + idx_dim])/24.; + } + } + break; + case 5: + for(int idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + for(int idx_dim = 0 ; idx_dim < size_particle_positions ; ++idx_dim){ + // dt × (1901[0] - 2774[1] + 2616[2] - 1274[3] + 251[4])/720 + particles_positions[idx_part*size_particle_positions + idx_dim] + += dt * (1901.*particles_rhs[0][idx_part*size_particle_rhs + idx_dim] + - 2774.*particles_rhs[1][idx_part*size_particle_rhs + idx_dim] + + 2616.*particles_rhs[2][idx_part*size_particle_rhs + idx_dim] + - 1274.*particles_rhs[3][idx_part*size_particle_rhs + idx_dim] + + 251.*particles_rhs[4][idx_part*size_particle_rhs + idx_dim])/720.; + } + } + break; + case 6: + for(int idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + for(int idx_dim = 0 ; idx_dim < size_particle_positions ; ++idx_dim){ + // dt × (4277[0] - 7923[1] + 9982[2] - 7298[3] + 2877[4] - 475[5])/1440 + particles_positions[idx_part*size_particle_positions + idx_dim] + += dt * (4277.*particles_rhs[0][idx_part*size_particle_rhs + idx_dim] + - 7923.*particles_rhs[1][idx_part*size_particle_rhs + idx_dim] + + 9982.*particles_rhs[2][idx_part*size_particle_rhs + idx_dim] + - 7298.*particles_rhs[3][idx_part*size_particle_rhs + idx_dim] + + 2877.*particles_rhs[4][idx_part*size_particle_rhs + idx_dim] + - 475.*particles_rhs[5][idx_part*size_particle_rhs + idx_dim])/1440.; + } + } + break; + default: + throw std::runtime_error("Error, in bfps particles_adams_bashforth.\n" + "Step in particles_adams_bashforth is too large," + "you must add formulation up this number or limit the number of steps."); + } + } +}; + + + +#endif diff --git a/bfps/cpp/particles/particles_field_computer.hpp b/bfps/cpp/particles/particles_field_computer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1baac156b20005fb6e6c91a1bc0523bfd31c9550 --- /dev/null +++ b/bfps/cpp/particles/particles_field_computer.hpp @@ -0,0 +1,218 @@ +#ifndef PARTICLES_FIELD_COMPUTER_HPP +#define PARTICLES_FIELD_COMPUTER_HPP + +#include <array> +#include <utility> + +#include "abstract_particles_distr.hpp" +#include "scope_timer.hpp" + +template <class interpolator_class, class field_class, int interp_neighbours, class positions_updater_class > +class particles_field_computer : public abstract_particles_distr<3,3,1> { + const std::array<size_t,3> field_grid_dim; + const std::pair<int,int> current_partition_interval; + + const interpolator_class& interpolator; + const field_class& field; + + const positions_updater_class positions_updater; + + const std::array<double,3> spatial_box_width; + const double box_step_width; + const double my_spatial_low_limit; + const double my_spatial_up_limit; + + int deriv[3]; + + //////////////////////////////////////////////////////////////////////// + /// Computation related + //////////////////////////////////////////////////////////////////////// + + virtual void init_result_array(double particles_current_rhs[], + const int nb_particles) final{ + // Set values to zero initialy + std::fill(particles_current_rhs, particles_current_rhs+nb_particles*3, 0); + } + + virtual void apply_computation(const double particles_positions[], + double particles_current_rhs[], + const int nb_particles) const final{ + TIMEZONE("particles_field_computer::apply_computation"); + for(int idxPart = 0 ; idxPart < nb_particles ; ++idxPart){ + double bx[interp_neighbours*2+2], by[interp_neighbours*2+2], bz[interp_neighbours*2+2]; + interpolator.compute_beta(deriv[0], particles_positions[idxPart*3], bx); + interpolator.compute_beta(deriv[1], particles_positions[idxPart*3+1], by); + interpolator.compute_beta(deriv[2], particles_positions[idxPart*3+2], bz); + + const int partGridIdx_x = int(particles_positions[idxPart*3]/box_step_width); + const int partGridIdx_y = int(particles_positions[idxPart*3+1]/box_step_width); + const int partGridIdx_z = int(particles_positions[idxPart*3+2]/box_step_width); + + assert(0 <= partGridIdx_z && partGridIdx_z < field_grid_dim[2]); + assert(0 <= partGridIdx_x && partGridIdx_x < field_grid_dim[0]); + assert(0 <= partGridIdx_y && partGridIdx_y < field_grid_dim[1]); + + const int interp_limit_mx = partGridIdx_x-interp_neighbours; + const int interp_limit_x = partGridIdx_x+interp_neighbours+1; + const int interp_limit_my = partGridIdx_y-interp_neighbours; + const int interp_limit_y = partGridIdx_y+interp_neighbours+1; + + int interp_limit_mz[2]; + int interp_limit_z[2]; + int nb_z_intervals; + + if((partGridIdx_z-interp_neighbours) < 0){ + assert(partGridIdx_z+interp_neighbours+1 < field_grid_dim[2]); + interp_limit_mz[0] = ((partGridIdx_z-interp_neighbours)+field_grid_dim[2])%field_grid_dim[2]; + interp_limit_z[0] = current_partition_interval.second-1; + + interp_limit_mz[1] = std::max(0, current_partition_interval.first);// max is not really needed here + interp_limit_z[1] = std::min(partGridIdx_z+interp_neighbours+1, current_partition_interval.second-1); + + nb_z_intervals = 2; + } + else if(field_grid_dim[2] <= (partGridIdx_z+interp_neighbours+1)){ + interp_limit_mz[0] = std::max(current_partition_interval.first, partGridIdx_z-interp_neighbours); + interp_limit_z[0] = std::min(int(field_grid_dim[2])-1,current_partition_interval.second-1);// max is not really needed here + + interp_limit_mz[1] = std::max(0, current_partition_interval.first); + interp_limit_z[1] = std::min(int((partGridIdx_z+interp_neighbours+1+field_grid_dim[2])%field_grid_dim[2]), current_partition_interval.second-1); + + nb_z_intervals = 2; + } + else{ + interp_limit_mz[0] = std::max(partGridIdx_z-interp_neighbours, current_partition_interval.first); + interp_limit_z[0] = std::min(partGridIdx_z+interp_neighbours+1, current_partition_interval.second-1); + nb_z_intervals = 1; + } + + for(int idx_inter = 0 ; idx_inter < nb_z_intervals ; ++idx_inter){ + for(int idx_z = interp_limit_mz[idx_inter] ; idx_z <= interp_limit_z[idx_inter] ; ++idx_z ){ + const int idx_z_pbc = (idx_z + field_grid_dim[2])%field_grid_dim[2]; + assert(current_partition_interval.first <= idx_z_pbc && idx_z_pbc < current_partition_interval.second); + assert(idx_z-interp_limit_mz[idx_inter] < interp_neighbours*2+2); + + for(int idx_x = interp_limit_mx ; idx_x <= interp_limit_x ; ++idx_x ){ + const int idx_x_pbc = (idx_x + field_grid_dim[0])%field_grid_dim[0]; + assert(idx_x-interp_limit_mx < interp_neighbours*2+2); + + for(int idx_y = interp_limit_my ; idx_y <= interp_limit_y ; ++idx_y ){ + const int idx_y_pbc = (idx_y + field_grid_dim[1])%field_grid_dim[1]; + assert(idx_y-interp_limit_my < interp_neighbours*2+2); + + const double coef = (bz[idx_z-interp_limit_mz[idx_inter]] + * by[idx_y-interp_limit_my] + * bx[idx_x-interp_limit_mx]); + + const ptrdiff_t tindex = field.getIndexFromGlobalPosition(idx_x_pbc, idx_y_pbc, idx_z_pbc); + + particles_current_rhs[idxPart*3+0] += field.getValue(tindex,0)*coef; + particles_current_rhs[idxPart*3+1] += field.getValue(tindex,1)*coef; + particles_current_rhs[idxPart*3+2] += field.getValue(tindex,2)*coef; + } + } + } + } + } + } + + virtual void reduce_particles(const double /*particles_positions*/[], + double particles_current_rhs[], + const double extra_particles_current_rhs[], + const int nb_particles) const final{ + TIMEZONE("particles_field_computer::reduce_particles"); + // Simply sum values + for(int idxPart = 0 ; idxPart < nb_particles ; ++idxPart){ + particles_current_rhs[idxPart*3+0] += extra_particles_current_rhs[idxPart*3+0]; + particles_current_rhs[idxPart*3+1] += extra_particles_current_rhs[idxPart*3+1]; + particles_current_rhs[idxPart*3+2] += extra_particles_current_rhs[idxPart*3+2]; + } + } + + + //////////////////////////////////////////////////////////////////////// + /// Re-distribution related + //////////////////////////////////////////////////////////////////////// + + void apply_pbc_xy(double* inout_particles, const int size) const final { + TIMEZONE("particles_field_computer::apply_pbc_xy"); + for(int idxPart = 0 ; idxPart < size ; ++idxPart){ + // Consider it will never move for more than one box repeatition + for(int idxDim = 0 ; idxDim < 2 ; ++idxDim){ + if(inout_particles[idxPart*3+idxDim] < 0) inout_particles[idxPart*3+idxDim] += spatial_box_width[idxDim]; + else if(spatial_box_width[idxDim] <= inout_particles[idxPart*3+idxDim]) inout_particles[idxPart*3+idxDim] -= spatial_box_width[idxDim]; + assert(0 <= inout_particles[idxPart*3+idxDim] && inout_particles[idxPart*3+idxDim] < spatial_box_width[idxDim]); + } + } + } + + void apply_pbc_z_new_particles(double* values, const int size) const final { + TIMEZONE("particles_field_computer::apply_pbc_z_new_particles"); + if(my_rank == 0){ + const int idxDim = 2; + for(int idxPart = 0 ; idxPart < size ; ++idxPart){ + assert(values[idxPart*3+idxDim] < my_spatial_up_limit || spatial_box_width[idxDim] <= values[idxPart*3+idxDim]); + assert(my_spatial_low_limit <= values[idxPart*3+idxDim]); + + if(spatial_box_width[idxDim] <= values[idxPart*3+idxDim]) values[idxPart*3+idxDim] -= spatial_box_width[idxDim]; + + assert(0 <= values[idxPart*3+idxDim] && values[idxPart*3+idxDim] < spatial_box_width[idxDim]); + assert(my_spatial_low_limit <= values[idxPart*3+idxDim] && values[idxPart*3+idxDim] < my_spatial_up_limit); + } + } + else if(my_rank == nb_processes - 1){ + const int idxDim = 2; + for(int idxPart = 0 ; idxPart < size ; ++idxPart){ + assert(my_spatial_low_limit <= values[idxPart*3+idxDim] || values[idxPart*3+idxDim] < 0); + assert(values[idxPart*3+idxDim] < spatial_box_width[idxDim]); + + if(values[idxPart*3+idxDim] < 0) values[idxPart*3+idxDim] += spatial_box_width[idxDim]; + + assert(0 <= values[idxPart*3+idxDim] && values[idxPart*3+idxDim] < spatial_box_width[idxDim]); + assert(my_spatial_low_limit <= values[idxPart*3+idxDim] && values[idxPart*3+idxDim] < my_spatial_up_limit); + } + } + else{ + const int idxDim = 2; + for(int idxPart = 0 ; idxPart < size ; ++idxPart){ + assert(my_spatial_low_limit <= values[idxPart*3+idxDim] && values[idxPart*3+idxDim] < my_spatial_up_limit); + } + } + } + +public: + + particles_field_computer(MPI_Comm in_current_com, const std::array<size_t,3>& in_field_grid_dim, + const std::pair<int,int>& in_current_partitions, + const interpolator_class& in_interpolator, + const field_class& in_field, + const std::array<double,3>& in_spatial_box_width, + const double in_box_step_width, const double in_my_spatial_low_limit, + const double in_my_spatial_up_limit) + : abstract_particles_distr(in_current_com, in_current_partitions), + field_grid_dim(in_field_grid_dim), current_partition_interval(in_current_partitions), + interpolator(in_interpolator), field(in_field), positions_updater(), + spatial_box_width(in_spatial_box_width), box_step_width(in_box_step_width), + my_spatial_low_limit(in_my_spatial_low_limit), my_spatial_up_limit(in_my_spatial_up_limit){ + deriv[0] = 0; + deriv[1] = 0; + deriv[2] = 0; + } + + //////////////////////////////////////////////////////////////////////// + /// Update position + //////////////////////////////////////////////////////////////////////// + + void move_particles(double particles_positions[], + const int nb_particles, + const std::unique_ptr<double[]> particles_current_rhs[], + const int nb_rhs, const double dt) const final{ + TIMEZONE("particles_field_computer::move_particles"); + positions_updater.move_particles(particles_positions, nb_particles, + particles_current_rhs, nb_rhs, dt); + } + +}; + + +#endif diff --git a/bfps/cpp/particles/particles_input_hdf5.hpp b/bfps/cpp/particles/particles_input_hdf5.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c3b377a300e610d369938fdc8fd642f4ec7429c3 --- /dev/null +++ b/bfps/cpp/particles/particles_input_hdf5.hpp @@ -0,0 +1,309 @@ +#ifndef PARTICLES_INPUT_HDF5_HPP +#define PARTICLES_INPUT_HDF5_HPP + +#include <tuple> +#include <mpi.h> +#include <hdf5.h> +#include <cassert> +#include <vector> + +#include "abstract_particles_input.hpp" +#include "base.hpp" +#include "alltoall_exchanger.hpp" +#include "particles_utils.hpp" +#include "scope_timer.hpp" + +#ifndef AssertMpi +#define AssertMpi(X) if(MPI_SUCCESS != (X)) { printf("MPI Error at line %d\n",__LINE__); fflush(stdout) ; throw std::runtime_error("Stop from from mpi erro"); } +#endif + + +template <int size_particle_positions, int size_particle_rhs> +class particles_input_hdf5 : public abstract_particles_input { + const std::string filename; + const std::string dataname; + + MPI_Comm mpi_comm; + int my_rank; + int nb_processes; + + hsize_t nb_total_particles; + hsize_t nb_rhs; + int nb_particles_for_me; + + std::unique_ptr<double[]> my_particles_positions; + std::unique_ptr<int[]> my_particles_indexes; + std::vector<std::unique_ptr<double[]>> my_particles_rhs; + + static std::vector<double> BuildLimitsAllProcesses(MPI_Comm mpi_comm, + const double my_spatial_low_limit, const double my_spatial_up_limit){ + int my_rank; + int nb_processes; + + AssertMpi(MPI_Comm_rank(mpi_comm, &my_rank)); + AssertMpi(MPI_Comm_size(mpi_comm, &nb_processes)); + + std::vector<double> spatial_limit_per_proc(nb_processes*2); + + double intervalToSend[2] = {my_spatial_low_limit, my_spatial_up_limit}; + AssertMpi(MPI_Allgather(intervalToSend, 2, MPI_DOUBLE, + spatial_limit_per_proc.data(), 2, MPI_DOUBLE, mpi_comm)); + + for(int idx_proc = 0; idx_proc < nb_processes-1 ; ++idx_proc){ + assert(spatial_limit_per_proc[idx_proc*2] <= spatial_limit_per_proc[idx_proc*2+1]); + assert(spatial_limit_per_proc[idx_proc*2+1] == spatial_limit_per_proc[(idx_proc+1)*2]); + spatial_limit_per_proc[idx_proc+1] = spatial_limit_per_proc[idx_proc*2+1]; + } + spatial_limit_per_proc[nb_processes] = spatial_limit_per_proc[(nb_processes-1)*2+1]; + spatial_limit_per_proc.resize(nb_processes+1); + + return spatial_limit_per_proc; + } + +public: + particles_input_hdf5(const MPI_Comm in_mpi_comm,const std::string& inFilename, const std::string& inDataname, + const double my_spatial_low_limit, const double my_spatial_up_limit) + : particles_input_hdf5(in_mpi_comm, inFilename, inDataname, + BuildLimitsAllProcesses(in_mpi_comm, my_spatial_low_limit, my_spatial_up_limit)){ + } + + particles_input_hdf5(const MPI_Comm in_mpi_comm,const std::string& inFilename, const std::string& inDataname, + const std::vector<double>& in_spatial_limit_per_proc) + : filename(inFilename), dataname(inDataname), + mpi_comm(in_mpi_comm), my_rank(-1), nb_processes(-1), nb_total_particles(0), + nb_particles_for_me(-1){ + TIMEZONE("particles_input_hdf5"); + + AssertMpi(MPI_Comm_rank(mpi_comm, &my_rank)); + AssertMpi(MPI_Comm_size(mpi_comm, &nb_processes)); + assert(in_spatial_limit_per_proc.size() == nb_processes+1); + + hid_t plist_id_par = H5Pcreate(H5P_FILE_ACCESS); + assert(plist_id_par >= 0); + { + int retTest = H5Pset_fapl_mpio(plist_id_par, mpi_comm, MPI_INFO_NULL); + assert(retTest >= 0); + } + + hid_t particle_file = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, plist_id_par); + assert(particle_file >= 0); + + //hid_t dset, prop_list, dspace; + hid_t hdf5_group_id = H5Gopen(particle_file, dataname.c_str(), H5P_DEFAULT); + assert(hdf5_group_id >= 0); + + { + TIMEZONE("state"); + hid_t dset = H5Dopen(hdf5_group_id, "state", H5P_DEFAULT); + assert(dset >= 0); + + hid_t dspace = H5Dget_space(dset); // copy? + assert(dspace >= 0); + + hid_t space_dim = H5Sget_simple_extent_ndims(dspace); + assert(space_dim >= 2); + + std::vector<hsize_t> state_dim_array(space_dim); + int hdfret = H5Sget_simple_extent_dims(dspace, &state_dim_array[0], NULL); + assert(hdfret >= 0); + // Last value is the position dim of the particles + assert(state_dim_array.back() == size_particle_positions); + + nb_total_particles = 1; + for (size_t idx_dim = 0; idx_dim < state_dim_array.size()-1; ++idx_dim){ + nb_total_particles *= state_dim_array[idx_dim]; + } + + hdfret = H5Sclose(dspace); + assert(hdfret >= 0); + hdfret = H5Dclose(dset); + assert(hdfret >= 0); + } + { + TIMEZONE("rhs"); + hid_t dset = H5Dopen(hdf5_group_id, "rhs", H5P_DEFAULT); + assert(dset >= 0); + hid_t dspace = H5Dget_space(dset); // copy? + assert(dspace >= 0); + + hid_t rhs_dim = H5Sget_simple_extent_ndims(dspace); + assert(rhs_dim == 4); + std::vector<hsize_t> rhs_dim_array(rhs_dim); + + int hdfret = H5Sget_simple_extent_dims(dspace, &rhs_dim_array[0], NULL); + assert(hdfret >= 0); + assert(rhs_dim_array.back() == size_particle_rhs); + nb_rhs = rhs_dim_array[0]; + + hdfret = H5Sclose(dspace); + assert(hdfret >= 0); + hdfret = H5Dclose(dset); + assert(hdfret >= 0); + } + + particles_utils::IntervalSplitter<hsize_t> load_splitter(nb_total_particles, nb_processes, my_rank); + + DEBUG_MSG("nb_total_particles %lu\n", nb_total_particles); + DEBUG_MSG("load_splitter.getMyOffset() %lu\n", load_splitter.getMyOffset()); + DEBUG_MSG("load_splitter.getMySize() %lu\n", load_splitter.getMySize()); + + /// Load the data + std::unique_ptr<double[]> split_particles_positions(new double[load_splitter.getMySize()*size_particle_positions]); + { + TIMEZONE("state-read"); + hid_t dset = H5Dopen(hdf5_group_id, "state", H5P_DEFAULT); + assert(dset >= 0); + + hid_t rspace = H5Dget_space(dset); + assert(rspace >= 0); + + hsize_t offset[3] = {0, load_splitter.getMyOffset(), 0}; + hsize_t mem_dims[3] = {1, load_splitter.getMySize(), 3}; + + hid_t mspace = H5Screate_simple( 3, &mem_dims[0], NULL); + assert(mspace >= 0); + + int rethdf = H5Sselect_hyperslab(rspace, H5S_SELECT_SET, offset, + NULL, mem_dims, NULL); + assert(rethdf >= 0); + rethdf = H5Dread(dset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, split_particles_positions.get()); + assert(rethdf >= 0); + + rethdf = H5Sclose(rspace); + assert(rethdf >= 0); + rethdf = H5Dclose(dset); + assert(rethdf >= 0); + } + std::vector<std::unique_ptr<double[]>> split_particles_rhs(nb_rhs); + { + TIMEZONE("rhs-read"); + hid_t dset = H5Dopen(hdf5_group_id, "rhs", H5P_DEFAULT); + assert(dset >= 0); + + hid_t rspace = H5Dget_space(dset); + assert(rspace >= 0); + + for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ + split_particles_rhs[idx_rhs].reset(new double[load_splitter.getMySize()*size_particle_rhs]); + + hsize_t offset[4] = {0, idx_rhs, load_splitter.getMyOffset(), 0}; + hsize_t mem_dims[4] = {1, 1, load_splitter.getMySize(), 3}; + + hid_t mspace = H5Screate_simple( 4, &mem_dims[0], NULL); + assert(mspace >= 0); + + int rethdf = H5Sselect_hyperslab( rspace, H5S_SELECT_SET, offset, + NULL, mem_dims, NULL); + assert(rethdf >= 0); + rethdf = H5Dread(dset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, split_particles_rhs[idx_rhs].get()); + assert(rethdf >= 0); + + rethdf = H5Sclose(mspace); + assert(rethdf >= 0); + + rethdf = H5Sclose(rspace); + assert(rethdf >= 0); + } + int rethdf = H5Dclose(dset); + assert(rethdf >= 0); + } + + std::unique_ptr<int[]> split_particles_indexes(new int[load_splitter.getMySize()]); + for(int idx_part = 0 ; idx_part < load_splitter.getMySize() ; ++idx_part){ + split_particles_indexes[idx_part] = idx_part + load_splitter.getMyOffset(); + } + + // Permute + std::vector<int> nb_particles_per_proc(nb_processes); + { + TIMEZONE("partition"); + int previousOffset = 0; + for(int idx_proc = 0 ; idx_proc < nb_processes-1 ; ++idx_proc){ + const double limitPartition = in_spatial_limit_per_proc[idx_proc+1]; + const int localOffset = particles_utils::partition_extra<size_particle_positions>( + &split_particles_positions[previousOffset*size_particle_positions], + load_splitter.getMySize()-previousOffset, + [&](const double val[]){ + return val[2] < limitPartition; + }, + [&](const int idx1, const int idx2){ + std::swap(split_particles_indexes[idx1], split_particles_indexes[idx2]); + for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ + for(int idx_val = 0 ; idx_val < size_particle_rhs ; ++idx_val){ + std::swap(split_particles_rhs[idx_rhs][idx1*size_particle_rhs + idx_val], + split_particles_rhs[idx_rhs][idx2*size_particle_rhs + idx_val]); + } + } + }); + + nb_particles_per_proc[idx_proc] = localOffset; + previousOffset += localOffset; + } + nb_particles_per_proc[nb_processes-1] = load_splitter.getMySize() - previousOffset; + } + + { + TIMEZONE("exchanger"); + alltoall_exchanger exchanger(mpi_comm, std::move(nb_particles_per_proc)); + // nb_particles_per_processes cannot be used after due to move + DEBUG_MSG("exchanger.getTotalToRecv() %lu\n", exchanger.getTotalToRecv()); + nb_particles_for_me = exchanger.getTotalToRecv(); + + my_particles_positions.reset(new double[exchanger.getTotalToRecv()*size_particle_positions]); + exchanger.alltoallv<double>(split_particles_positions.get(), my_particles_positions.get(), size_particle_positions); + split_particles_positions.release(); + + my_particles_indexes.reset(new int[exchanger.getTotalToRecv()]); + exchanger.alltoallv<int>(split_particles_indexes.get(), my_particles_indexes.get()); + split_particles_indexes.release(); + + my_particles_rhs.resize(nb_rhs); + for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ + my_particles_rhs[idx_rhs].reset(new double[exchanger.getTotalToRecv()*size_particle_rhs]); + exchanger.alltoallv<double>(split_particles_rhs[idx_rhs].get(), my_particles_rhs[idx_rhs].get(), size_particle_rhs); + } + } + + { + TIMEZONE("close"); + int hdfret = H5Gclose(hdf5_group_id); + assert(hdfret >= 0); + hdfret = H5Fclose(particle_file); + assert(hdfret >= 0); + hdfret = H5Pclose(plist_id_par); + assert(hdfret >= 0); + } + } + + ~particles_input_hdf5(){ + } + + int getTotalNbParticles() final{ + return nb_total_particles; + } + + int getLocalNbParticles() final{ + return int(nb_particles_for_me); + } + + int getNbRhs() final{ + return int(nb_rhs); + } + + std::unique_ptr<double[]> getMyParticles() final { + assert(my_particles_positions != nullptr); + return std::move(my_particles_positions); + } + + std::vector<std::unique_ptr<double[]>> getMyRhs() final { + assert(my_particles_rhs.size() == nb_rhs); + return std::move(my_particles_rhs); + } + + std::unique_ptr<int[]> getMyParticlesIndexes() final { + assert(my_particles_indexes != nullptr); + return std::move(my_particles_indexes); + } +}; + +#endif diff --git a/bfps/cpp/particles/particles_interp_spline.hpp b/bfps/cpp/particles/particles_interp_spline.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5eb69d20dff75de443480eaaeeb48709fffce542 --- /dev/null +++ b/bfps/cpp/particles/particles_interp_spline.hpp @@ -0,0 +1,165 @@ +#ifndef PARTICLES_INTER_SPLINE_HPP +#define PARTICLES_INTER_SPLINE_HPP + +template <int interp_neighbours, int mode> +class particles_interp_spline; + +#include "spline_n1.hpp" + +template <> +class particles_interp_spline<1,0>{ +public: + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n1_m0(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_interp_spline<1,1>{ +public: + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n1_m1(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_interp_spline<1,2>{ +public: + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n1_m2(in_derivative, in_part_val, poly_val); + } +}; + +#include "spline_n2.hpp" + +template <> +class particles_interp_spline<2,0>{ +public: + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n2_m0(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_interp_spline<2,1>{ +public: + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n2_m1(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_interp_spline<2,2>{ +public: + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n2_m2(in_derivative, in_part_val, poly_val); + } +}; + +#include "spline_n3.hpp" + +template <> +class particles_interp_spline<3,0>{ +public: + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n3_m0(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_interp_spline<3,1>{ +public: + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n3_m1(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_interp_spline<3,2>{ +public: + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n3_m2(in_derivative, in_part_val, poly_val); + } +}; + +#include "spline_n4.hpp" + +template <> +class particles_interp_spline<4,0>{ +public: + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n4_m0(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_interp_spline<4,1>{ +public: + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n4_m1(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_interp_spline<4,2>{ +public: + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n4_m2(in_derivative, in_part_val, poly_val); + } +}; + +#include "spline_n5.hpp" + +template <> +class particles_interp_spline<5,0>{ +public: + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n5_m0(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_interp_spline<5,1>{ +public: + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n5_m1(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_interp_spline<5,2>{ +public: + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n5_m2(in_derivative, in_part_val, poly_val); + } +}; + +#include "spline_n6.hpp" + +template <> +class particles_interp_spline<6,0>{ +public: + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n6_m0(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_interp_spline<6,1>{ +public: + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n6_m1(in_derivative, in_part_val, poly_val); + } +}; + +template <> +class particles_interp_spline<6,2>{ +public: + void compute_beta(const int in_derivative, const double in_part_val, double poly_val[]) const { + beta_n6_m2(in_derivative, in_part_val, poly_val); + } +}; + + + +#endif diff --git a/bfps/cpp/particles/particles_output_hdf5.hpp b/bfps/cpp/particles/particles_output_hdf5.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f60bd80ba203723fed3f194cf34d67d9a86b9ffa --- /dev/null +++ b/bfps/cpp/particles/particles_output_hdf5.hpp @@ -0,0 +1,126 @@ +#ifndef PARTICLES_OUTPUT_HDF5_HPP +#define PARTICLES_OUTPUT_HDF5_HPP + +#include <memory> +#include <vector> +#include <hdf5.h> + +#include "abstract_particles_output.hpp" +#include "scope_timer.hpp" + +template <int size_particle_positions, int size_particle_rhs> +class particles_output_hdf5 : public abstract_particles_output<size_particle_positions, size_particle_rhs>{ + using Parent = abstract_particles_output<size_particle_positions, size_particle_rhs>; + + const std::string filename; + + hid_t file_id; + const int total_nb_particles; + +public: + particles_output_hdf5(MPI_Comm in_mpi_com, const std::string in_filename, const int inTotalNbParticles) + : abstract_particles_output<size_particle_positions, size_particle_rhs>(in_mpi_com, inTotalNbParticles), + filename(in_filename), + file_id(0), total_nb_particles(inTotalNbParticles){ + + TIMEZONE("particles_output_hdf5::H5Pcreate"); + hid_t plist_id_par = H5Pcreate(H5P_FILE_ACCESS); + assert(plist_id_par >= 0); + int retTest = H5Pset_fapl_mpio(plist_id_par, Parent::getCom(), MPI_INFO_NULL); + assert(retTest >= 0); + + // Parallel HDF5 write + file_id = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC | H5F_ACC_DEBUG/*H5F_ACC_EXCL*/, H5P_DEFAULT/*H5F_ACC_RDWR*/, plist_id_par); + assert(file_id >= 0); + H5Pclose(plist_id_par); + } + + ~particles_output_hdf5(){ + TIMEZONE("particles_output_hdf5::H5Dclose"); + int rethdf = H5Fclose(file_id); + assert(rethdf >= 0); + } + + void write(const int idx_time_step, const double* particles_positions, const double* particles_rhs, + const int nb_particles, const int particles_idx_offset) final{ + TIMEZONE("particles_output_hdf5::write"); + + assert(particles_idx_offset < Parent::getTotalNbParticles()); + assert(particles_idx_offset+nb_particles <= Parent::getTotalNbParticles()); + + hid_t dset_id = H5Gcreate(file_id, ("dataset"+std::to_string(idx_time_step)).c_str(), H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + assert(dset_id >= 0); + + { + const hsize_t datacount[3] = {1, total_nb_particles, size_particle_positions}; + hid_t dataspace = H5Screate_simple(3, datacount, NULL); + assert(dataspace >= 0); + + hid_t dataset_id = H5Dcreate( dset_id, "state", H5T_NATIVE_DOUBLE, dataspace, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + assert(dataset_id >= 0); + + const hsize_t count[3] = {1, nb_particles, size_particle_positions}; + const hsize_t offset[3] = {0, particles_idx_offset, 0}; + hid_t memspace = H5Screate_simple(3, count, NULL); + assert(memspace >= 0); + + hid_t filespace = H5Dget_space(dataset_id); + int rethdf = H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count, NULL); + assert(rethdf >= 0); + + hid_t plist_id = H5Pcreate(H5P_DATASET_XFER); + assert(plist_id >= 0); + rethdf = H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE); + assert(rethdf >= 0); + + herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, memspace, filespace, + plist_id, particles_positions); + assert(status >= 0); + rethdf = H5Sclose(memspace); + assert(rethdf >= 0); + rethdf = H5Dclose(dataset_id); + assert(rethdf >= 0); + rethdf = H5Sclose(filespace); + assert(rethdf >= 0); + } + { + const hsize_t datacount[3] = {1, total_nb_particles, size_particle_rhs}; + hid_t dataspace = H5Screate_simple(3, datacount, NULL); + assert(dataspace >= 0); + + hid_t dataset_id = H5Dcreate( dset_id, "rhs", H5T_NATIVE_DOUBLE, dataspace, H5P_DEFAULT, + H5P_DEFAULT, H5P_DEFAULT); + assert(dataset_id >= 0); + + const hsize_t count[3] = {1, nb_particles, size_particle_rhs}; + const hsize_t offset[3] = {0, particles_idx_offset, 0}; + hid_t memspace = H5Screate_simple(3, count, NULL); + assert(memspace >= 0); + + hid_t filespace = H5Dget_space(dataset_id); + assert(filespace >= 0); + int rethdf = H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count, NULL); + assert(rethdf >= 0); + + hid_t plist_id = H5Pcreate(H5P_DATASET_XFER); + assert(plist_id >= 0); + rethdf = H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE); + assert(rethdf >= 0); + + herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, memspace, filespace, + plist_id, particles_rhs); + assert(status >= 0); + rethdf = H5Sclose(memspace); + assert(rethdf >= 0); + rethdf = H5Dclose(dataset_id); + assert(rethdf >= 0); + rethdf = H5Sclose(filespace); + assert(rethdf >= 0); + } + int rethdf = H5Gclose(dset_id); + assert(rethdf >= 0); + } +}; + +#endif diff --git a/bfps/cpp/particles/particles_output_mpiio.hpp b/bfps/cpp/particles/particles_output_mpiio.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5b9934052e51e13afda41280bfb19885fd260f37 --- /dev/null +++ b/bfps/cpp/particles/particles_output_mpiio.hpp @@ -0,0 +1,83 @@ +#ifndef PARTICLES_OUTPUT_MPIIO +#define PARTICLES_OUTPUT_MPIIO + +#include <memory> +#include <vector> +#include <string> +#include <cassert> + +#include "abstract_particles_output.hpp" +#include "scope_timer.hpp" + +#ifndef AssertMpi +#define AssertMpi(X) if(MPI_SUCCESS != (X)) { printf("MPI Error at line %d\n",__LINE__); fflush(stdout) ; throw std::runtime_error("Stop from from mpi erro"); } +#endif + + +template <int size_particle_positions, int size_particle_rhs> +class particles_output_mpiio : public abstract_particles_output<size_particle_positions, size_particle_rhs>{ + using Parent = abstract_particles_output<size_particle_positions, size_particle_rhs>; + + const std::string filename; + const int nb_step_prealloc; + + MPI_File mpi_file; + +public: + particles_output_mpiio(MPI_Comm in_mpi_com, const std::string in_filename, const int inTotalNbParticles, + const int in_nb_step_prealloc = -1) + : abstract_particles_output<size_particle_positions, size_particle_rhs>(in_mpi_com, inTotalNbParticles), + filename(in_filename), nb_step_prealloc(in_nb_step_prealloc){ + { + TIMEZONE("particles_output_mpiio::MPI_File_open"); + AssertMpi(MPI_File_open(Parent::getCom(), const_cast<char*>(filename.c_str()), + MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &mpi_file)); + } + if(nb_step_prealloc != -1){ + TIMEZONE("particles_output_mpiio::MPI_File_set_size"); + AssertMpi(MPI_File_set_size(mpi_file, + nb_step_prealloc*Parent::getTotalNbParticles()*sizeof(double)*(size_particle_positions+size_particle_rhs))); + } + } + + ~particles_output_mpiio(){ + TIMEZONE("particles_output_mpiio::MPI_File_close"); + AssertMpi(MPI_File_close(&mpi_file)); + } + + void write(const int time_step, const double* particles_positions, const double* particles_rhs, + const int nb_particles, const int particles_idx_offset) final{ + TIMEZONE("particles_output_mpiio::write"); + + assert(nb_step_prealloc == -1 || time_step < nb_step_prealloc); + assert(particles_idx_offset < Parent::getTotalNbParticles()); + assert(particles_idx_offset+nb_particles <= Parent::getTotalNbParticles()); + + if(nb_step_prealloc == -1){ + TIMEZONE("particles_output_mpiio::write::MPI_File_set_size"); + AssertMpi(MPI_File_set_size(mpi_file, + (time_step+1)*Parent::getTotalNbParticles()*sizeof(double)*(size_particle_positions+size_particle_rhs))); + } + + const MPI_Offset globalParticlesOffset = time_step*Parent::getTotalNbParticles()*(size_particle_positions+size_particle_rhs) + + nb_particles*size_particle_positions; + + const MPI_Offset writingOffset = globalParticlesOffset * sizeof(double); + + AssertMpi(MPI_File_write_at(mpi_file, writingOffset, + const_cast<double*>(particles_positions), nb_particles*size_particle_positions, MPI_DOUBLE, + MPI_STATUS_IGNORE)); + + const MPI_Offset globalParticlesOffsetOutput = time_step*Parent::getTotalNbParticles()*(size_particle_positions+size_particle_rhs) + + Parent::getTotalNbParticles()*size_particle_positions + + nb_particles*size_particle_rhs; + + const MPI_Offset writingOffsetOutput = globalParticlesOffsetOutput * sizeof(double); + + AssertMpi(MPI_File_write_at(mpi_file, writingOffsetOutput, + const_cast<double*>(particles_rhs), nb_particles*size_particle_rhs, MPI_DOUBLE, + MPI_STATUS_IGNORE)); + } +}; + +#endif diff --git a/bfps/cpp/particles/particles_system.hpp b/bfps/cpp/particles/particles_system.hpp new file mode 100644 index 0000000000000000000000000000000000000000..52240591d1de54330b8f9a2ed95b020b48dfa70c --- /dev/null +++ b/bfps/cpp/particles/particles_system.hpp @@ -0,0 +1,196 @@ +#ifndef PARTICLES_SYSTEM_HPP +#define PARTICLES_SYSTEM_HPP + +#include <array> + +#include "particles_output_hdf5.hpp" +#include "particles_output_mpiio.hpp" +#include "particles_field_computer.hpp" +#include "field_accessor.hpp" +#include "abstract_particles_input.hpp" +#include "particles_adams_bashforth.hpp" +#include "scope_timer.hpp" + +template <class interpolator_class, int interp_neighbours> +class particles_system { + MPI_Comm mpi_com; + + const std::pair<int,int> current_partition_interval; + const int partition_interval_size; + + field_accessor field; + + interpolator_class interpolator; + + particles_field_computer<interpolator_class, field_accessor, interp_neighbours, particles_adams_bashforth<3,3>> computer; + + std::unique_ptr<int[]> current_my_nb_particles_per_partition; + std::unique_ptr<int[]> current_offset_particles_for_partition; + + const std::array<double,3> spatial_box_width; + const double spatial_partition_width; + const double my_spatial_low_limit; + const double my_spatial_up_limit; + + std::unique_ptr<double[]> my_particles_positions; + std::unique_ptr<int[]> my_particles_positions_indexes; + int my_nb_particles; + std::vector<std::unique_ptr<double[]>> my_particles_rhs; + + int step_idx; + +public: + particles_system(const std::array<size_t,3>& field_grid_dim, const std::array<double,3>& in_spatial_box_width, + const double in_spatial_partition_width, + const double in_my_spatial_low_limit, const double in_my_spatial_up_limit, + const double* in_field_data, const std::array<size_t,3>& in_local_field_dims, + const std::array<size_t,3>& in_local_field_offset, + MPI_Comm in_mpi_com) + : mpi_com(in_mpi_com), + current_partition_interval({in_local_field_offset[2], in_local_field_offset[2] + in_local_field_dims[2]}), + partition_interval_size(current_partition_interval.second - current_partition_interval.first), + field(in_field_data, in_local_field_dims, in_local_field_offset), + interpolator(), + computer(in_mpi_com, field_grid_dim, current_partition_interval, + interpolator, field, in_spatial_box_width, in_spatial_partition_width, + in_my_spatial_low_limit, in_my_spatial_up_limit), + spatial_box_width(in_spatial_box_width), spatial_partition_width(in_spatial_partition_width), + my_spatial_low_limit(in_my_spatial_low_limit), my_spatial_up_limit(in_my_spatial_up_limit), + my_nb_particles(0), step_idx(1){ + + current_my_nb_particles_per_partition.reset(new int[partition_interval_size]); + current_offset_particles_for_partition.reset(new int[partition_interval_size+1]); + } + + ~particles_system(){ + } + + void init(abstract_particles_input& particles_input){ + TIMEZONE("particles_system::init"); + + my_particles_positions = particles_input.getMyParticles(); + my_particles_positions_indexes = particles_input.getMyParticlesIndexes(); + my_particles_rhs = particles_input.getMyRhs(); + my_nb_particles = particles_input.getLocalNbParticles(); + + for(int idx_part = 0 ; idx_part < my_nb_particles ; ++idx_part){ // TODO remove me + assert(my_particles_positions[idx_part*3+2] >= my_spatial_low_limit); + assert(my_particles_positions[idx_part*3+2] < my_spatial_up_limit); + } + + particles_utils::partition_extra_z<3>(&my_particles_positions[0], my_nb_particles, partition_interval_size, + current_my_nb_particles_per_partition.get(), current_offset_particles_for_partition.get(), + [&](const int idxPartition){ + const double limitPartition = (idxPartition+1)*spatial_partition_width + my_spatial_low_limit; + return limitPartition; + }, + [&](const int idx1, const int idx2){ + std::swap(my_particles_positions_indexes[idx1], my_particles_positions_indexes[idx2]); + for(int idx_rhs = 0 ; idx_rhs < my_particles_rhs.size() ; ++idx_rhs){ + for(int idx_val = 0 ; idx_val < 3 ; ++idx_val){ + std::swap(my_particles_rhs[idx_rhs][idx1*3 + idx_val], + my_particles_rhs[idx_rhs][idx2*3 + idx_val]); + } + } + }); + + {// TODO remove + for(int idxPartition = 0 ; idxPartition < partition_interval_size ; ++idxPartition){ + assert(current_my_nb_particles_per_partition[idxPartition] == + current_offset_particles_for_partition[idxPartition+1] - current_offset_particles_for_partition[idxPartition]); + const double limitPartition = (idxPartition+1)*spatial_partition_width + my_spatial_low_limit; + for(int idx = 0 ; idx < current_offset_particles_for_partition[idxPartition+1] ; ++idx){ + assert(my_particles_positions[idx*3+2] < limitPartition); + } + for(int idx = current_offset_particles_for_partition[idxPartition+1] ; idx < my_nb_particles ; ++idx){ + assert(my_particles_positions[idx*3+2] >= limitPartition); + } + } + } + } + + + void compute(){ + TIMEZONE("particles_system::compute"); + computer.compute_distr(current_my_nb_particles_per_partition.get(), + my_particles_positions.get(), + my_particles_rhs.front().get(), + interp_neighbours); + } + + void move(const double dt){ + TIMEZONE("particles_system::move"); + computer.move_particles(my_particles_positions.get(), my_nb_particles, + my_particles_rhs.data(), std::min(step_idx,int(my_particles_rhs.size())), + dt); + } + + void redistribute(){ + TIMEZONE("particles_system::redistribute"); + computer.redistribute(current_my_nb_particles_per_partition.get(), + &my_nb_particles, + &my_particles_positions, + my_particles_rhs.data(), my_particles_rhs.size(), + &my_particles_positions_indexes, + my_spatial_low_limit, + my_spatial_up_limit, + spatial_partition_width); + } + + void inc_step_idx(){ + step_idx += 1; + } + + void shift_rhs_vectors(){ + if(my_particles_rhs.size()){ + std::unique_ptr<double[]> next_current(std::move(my_particles_rhs.back())); + for(int idx_rhs = my_particles_rhs.size()-1 ; idx_rhs > 0 ; --idx_rhs){ + my_particles_rhs[idx_rhs] = std::move(my_particles_rhs[idx_rhs-1]); + } + my_particles_rhs[0] = std::move(next_current); + particles_utils::memzero(my_particles_rhs[0], 3*my_nb_particles); + } + } + + void completeLoop(const double dt){ + TIMEZONE("particles_system::completeLoop"); + compute(); + move(dt); + redistribute(); + inc_step_idx(); + shift_rhs_vectors(); + } + + const double* getParticlesPositions() const{ + return my_particles_positions.get(); + } + + const double* getParticlesCurrentRhs() const{ + return my_particles_rhs.front().get(); + } + + const int* getParticlesIndexes() const{ + return my_particles_positions_indexes.get(); + } + + int getLocalNbParticles() const{ + return my_nb_particles; + } + + void checkNan() const { // TODO remove + for(int idx_part = 0 ; idx_part < my_nb_particles ; ++idx_part){ // TODO remove me + assert(std::isnan(my_particles_positions[idx_part*3+0]) == false); + assert(std::isnan(my_particles_positions[idx_part*3+1]) == false); + assert(std::isnan(my_particles_positions[idx_part*3+2]) == false); + + for(int idx_rhs = 0 ; idx_rhs < my_particles_rhs.size() ; ++idx_rhs){ + assert(std::isnan(my_particles_rhs[idx_rhs][idx_part*3+0]) == false); + assert(std::isnan(my_particles_rhs[idx_rhs][idx_part*3+1]) == false); + assert(std::isnan(my_particles_rhs[idx_rhs][idx_part*3+2]) == false); + } + } + } +}; + + +#endif diff --git a/bfps/cpp/particles/particles_utils.hpp b/bfps/cpp/particles/particles_utils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..eb3d22ab15e718026f53cf78bb1ea659e60bc6d1 --- /dev/null +++ b/bfps/cpp/particles/particles_utils.hpp @@ -0,0 +1,217 @@ +#ifndef PARTICLES_UTILS_HPP +#define PARTICLES_UTILS_HPP + +#include <cassert> +#include <stack> + + +namespace particles_utils { + +template <int nb_values, class Predicate> +inline int partition(double* array, const int size, Predicate pdc) +{ + if(size == 0) return 0; + if(size == 1) return (pdc(&array[0])?1:0); + + int idxInsert = 0; + + for(int idx = 0 ; idx < size && pdc(&array[idx*nb_values]); ++idx){ + idxInsert += 1; + } + + for(int idx = idxInsert ; idx < size ; ++idx){ + if(pdc(&array[idx*nb_values])){ + for(int idxVal = 0 ; idxVal < nb_values ; ++idxVal){ + std::iter_swap(array[idx*nb_values + idxVal], array[idxInsert*nb_values + idxVal]); + } + idxInsert += 1; + } + } + + return idxInsert; +} + + +template <int nb_values, class Predicate1, class Predicate2> +inline int partition_extra(double* array, const int size, Predicate1 pdc, Predicate2 pdcswap, const int offset_idx_swap = 0) +{ + if(size == 0) return 0; + if(size == 1) return (pdc(&array[0])?1:0); + + int idxInsert = 0; + + for(int idx = 0 ; idx < size && pdc(&array[idx*nb_values]); ++idx){ + idxInsert += 1; + } + + for(int idx = idxInsert ; idx < size ; ++idx){ + if(pdc(&array[idx*nb_values])){ + for(int idxVal = 0 ; idxVal < nb_values ; ++idxVal){ + std::swap(array[idx*nb_values + idxVal], array[idxInsert*nb_values + idxVal]); + } + pdcswap(idx+offset_idx_swap, idxInsert+offset_idx_swap); + idxInsert += 1; + } + } + + return idxInsert; +} + +template <int nb_values, class Predicate1, class Predicate2> +inline void partition_extra_z(double* array, const int size, const int nb_partitions, + int partitions_size[], int partitions_offset[], + Predicate1 partitions_limits, Predicate2 pdcswap) +{ + if(nb_partitions == 0){ + return ; + } + + partitions_offset[0] = 0; + partitions_offset[nb_partitions] = size; + + if(nb_partitions == 1){ + partitions_size[0] = 0; + return; + } + + if(nb_partitions == 2){ + const double limit = partitions_limits(0); + const int size_current = partition_extra<nb_values>(array, size, + [&](const double inval[]){ + return inval[nb_values-1] < limit; + }, pdcswap); + partitions_size[0] = size_current; + partitions_size[1] = size-size_current; + partitions_offset[1] = size_current; + return; + } + + std::stack<std::pair<int,int>> toproceed; + + toproceed.push({0, nb_partitions}); + + while(toproceed.size()){ + const std::pair<int,int> current_part = toproceed.top(); + toproceed.pop(); + + assert(current_part.second-current_part.first >= 1); + + if(current_part.second-current_part.first == 1){ + partitions_size[current_part.first] = partitions_offset[current_part.first+1] - partitions_offset[current_part.first]; + } + else{ + const int idx_middle = (current_part.second-current_part.first)/2 + current_part.first - 1; + + const int size_unpart = partitions_offset[current_part.second]- partitions_offset[current_part.first]; + + const double limit = partitions_limits(idx_middle); + const int size_current = partition_extra<nb_values>(&array[partitions_offset[current_part.first]*nb_values], + size_unpart, + [&](const double inval[]){ + return inval[nb_values-1] < limit; + }, pdcswap, partitions_offset[current_part.first]); + + partitions_offset[idx_middle+1] = size_current + partitions_offset[current_part.first]; + + toproceed.push({current_part.first, idx_middle+1}); + + toproceed.push({idx_middle+1, current_part.second}); + } + } +} + +template <int nb_values, class Predicate1, class Predicate2> +inline std::pair<std::vector<int>,std::vector<int>> partition_extra_z(double* array, const int size, + const int nb_partitions, Predicate1 partitions_limits, + Predicate2 pdcswap){ + + std::vector<int> partitions_size(nb_partitions); + std::vector<int> partitions_offset(nb_partitions+1); + partition_extra_z<nb_values, Predicate1, Predicate2>(array, size, nb_partitions, + partitions_size.data(), partitions_offset.data(), + partitions_limits, pdcswap); + return {std::move(partitions_size), std::move(partitions_offset)}; +} + + +template <class NumType = int> +class IntervalSplitter { + const NumType nb_items; + const NumType nb_intervals; + const NumType my_idx; + + double step_split; + NumType offset_mine; + NumType size_mine; +public: + IntervalSplitter(const NumType in_nb_items, + const NumType in_nb_intervals, + const NumType in_my_idx) + : nb_items(in_nb_items), nb_intervals(in_nb_intervals), my_idx(in_my_idx), + step_split(0), offset_mine(0), size_mine(0){ + if(nb_items <= nb_intervals){ + step_split = 1; + if(my_idx < nb_intervals){ + offset_mine = my_idx; + size_mine = 1; + } + else{ + offset_mine = nb_intervals; + size_mine = 0; + } + } + else{ + step_split = double(nb_items)/double(nb_intervals); + offset_mine = NumType(step_split*double(my_idx)); + size_mine = NumType(step_split*double(my_idx+1)-step_split*double(my_idx)); + assert(my_idx != nb_intervals-1 || (offset_mine+size_mine) == nb_items); + } + } + + NumType getMySize() const { + return size_mine; + } + + NumType getMyOffset() const { + return offset_mine; + } + + NumType getSizeOther(const NumType in_idx_other) const { + return IntervalSplitter<NumType>(nb_items, nb_intervals, in_idx_other).getMySize(); + } + + NumType getOffsetOther(const NumType in_idx_other) const { + return IntervalSplitter<NumType>(nb_items, nb_intervals, in_idx_other).getMyOffset(); + } + + NumType getOwner(const NumType in_item_idx) const { + return NumType(double(in_item_idx)/step_split); + } +}; + +// http://en.cppreference.com/w/cpp/algorithm/transform +template<class InputIt, class OutputIt, class UnaryOperation> +OutputIt transform(InputIt first1, InputIt last1, OutputIt d_first, + UnaryOperation unary_op) +{ + while (first1 != last1) { + *d_first++ = unary_op(*first1++); + } + return d_first; +} + + +template <class NumType> +void memzero(NumType* array, size_t size){ + memset(array, 0, size*sizeof(NumType)); +} + +template <class NumType> +void memzero(std::unique_ptr<NumType[]>& array, size_t size){ + memset(array.get(), 0, size*sizeof(NumType)); +} + + +} + +#endif