diff --git a/bfps/NSVorticityEquation.py b/bfps/NSVorticityEquation.py index 21615f351ad3072bb263ffd8b86409fab283a763..3cf530c67b6d6d8bc19070af9f144cf149e90293 100644 --- a/bfps/NSVorticityEquation.py +++ b/bfps/NSVorticityEquation.py @@ -138,11 +138,11 @@ class NSVorticityEquation(_fluid_particle_base): "current fname is %s\\n and iteration is %d", fs->get_current_fname().c_str(), fs->iteration); - std::unique_ptr<abstract_particles_system<double>> ps = particles_system_builder( + std::unique_ptr<abstract_particles_system<long long int, double>> ps = particles_system_builder( fs->cvelocity, // (field object) fs->kk, // (kspace object, contains dkx, dky, dkz) tracers0_integration_steps, // to check coherency between parameters and hdf input file (nb rhs) - nparticles, // to check coherency between parameters and hdf input file + (long long int)nparticles, // to check coherency between parameters and hdf input file fs->get_current_fname(), // particles input filename std::string("/tracers0/state/") + std::to_string(fs->iteration), // dataset name for initial input std::string("/tracers0/rhs/") + std::to_string(fs->iteration), // dataset name for initial input @@ -150,7 +150,7 @@ class NSVorticityEquation(_fluid_particle_base): tracers0_smoothness, // parameter MPI_COMM_WORLD, fs->iteration+1); - particles_output_hdf5<double,3,3> particles_output_writer_mpi( + particles_output_hdf5<long long int, double,3,3> particles_output_writer_mpi( MPI_COMM_WORLD, "tracers0", nparticles, diff --git a/bfps/_base.py b/bfps/_base.py index 14b42b6044fc77edeaba887f4aea8d236614545c..4b48ee73b646b0f684b7ecf1bfd627000322a737 100644 --- a/bfps/_base.py +++ b/bfps/_base.py @@ -56,7 +56,9 @@ class _base(object): key = sorted(list(parameters.keys())) src_txt = '' for i in range(len(key)): - if type(parameters[key[i]]) == int: + if (type(parameters[key[i]]) == int and parameters[key[i]] >= 1<<30): + src_txt += 'long long int ' + key[i] + ';\n' + elif type(parameters[key[i]]) == int: src_txt += 'int ' + key[i] + ';\n' elif type(parameters[key[i]]) == str: src_txt += 'char ' + key[i] + '[{0}];\n'.format(self.string_length) @@ -99,7 +101,9 @@ class _base(object): for i in range(len(key)): src_txt += 'dset = H5Dopen(parameter_file, "/{0}/{1}", H5P_DEFAULT);\n'.format( file_group, key[i]) - if type(parameters[key[i]]) == int: + if (type(parameters[key[i]]) == int and parameters[key[i]] >= 1<<30): + src_txt += 'H5Dread(dset, H5T_NATIVE_LLONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n'.format(key[i]) + elif type(parameters[key[i]]) == int: src_txt += 'H5Dread(dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &{0});\n'.format(key[i]) elif type(parameters[key[i]]) == str: src_txt += ('space = H5Dget_space(dset);\n' + diff --git a/bfps/cpp/full_code/NSVEp.cpp b/bfps/cpp/full_code/NSVEp.cpp index 9de13469470c61e7c4115c79a032ec8a66ade3fa..8ce8478c635161690fc0dd2313e7e4b09267a16c 100644 --- a/bfps/cpp/full_code/NSVEp.cpp +++ b/bfps/cpp/full_code/NSVEp.cpp @@ -63,7 +63,7 @@ int NSVEp<rnumber>::initialize(void) fs->cvelocity, // (field object) fs->kk, // (kspace object, contains dkx, dky, dkz) tracers0_integration_steps, // to check coherency between parameters and hdf input file (nb rhs) - nparticles, // to check coherency between parameters and hdf input file + (long long int)nparticles, // to check coherency between parameters and hdf input file fs->get_current_fname(), // particles input filename std::string("/tracers0/state/") + std::to_string(fs->iteration), // dataset name for initial input std::string("/tracers0/rhs/") + std::to_string(fs->iteration), // dataset name for initial input @@ -71,7 +71,7 @@ int NSVEp<rnumber>::initialize(void) tracers0_smoothness, // parameter this->comm, fs->iteration+1); - this->particles_output_writer_mpi = new particles_output_hdf5<double,3,3>( + this->particles_output_writer_mpi = new particles_output_hdf5<long long int,double,3,3>( MPI_COMM_WORLD, "tracers0", nparticles, diff --git a/bfps/cpp/full_code/NSVEp.hpp b/bfps/cpp/full_code/NSVEp.hpp index 31f61f6d7f83a898f9ef0e3a4b6a4c32b2e66d98..2b3d97ef505a59f857ce49b1ac16323007105ec3 100644 --- a/bfps/cpp/full_code/NSVEp.hpp +++ b/bfps/cpp/full_code/NSVEp.hpp @@ -63,8 +63,8 @@ class NSVEp: public direct_numerical_simulation vorticity_equation<rnumber, FFTW> *fs; field<rnumber, FFTW, THREE> *tmp_vec_field; field<rnumber, FFTW, ONE> *tmp_scal_field; - std::unique_ptr<abstract_particles_system<double>> ps; - particles_output_hdf5<double,3,3> *particles_output_writer_mpi; + std::unique_ptr<abstract_particles_system<long long int, double>> ps; + particles_output_hdf5<long long int, double,3,3> *particles_output_writer_mpi; NSVEp( diff --git a/bfps/cpp/particles/abstract_particles_distr.hpp b/bfps/cpp/particles/abstract_particles_distr.hpp index 6e1776868510142ebedeb80d0751aeaa8bf31d3d..b012f824eb659b46062b30c5b1773f5fbe9acca3 100644 --- a/bfps/cpp/particles/abstract_particles_distr.hpp +++ b/bfps/cpp/particles/abstract_particles_distr.hpp @@ -14,7 +14,7 @@ #include "particles_utils.hpp" -template <class real_number, int size_particle_positions, int size_particle_rhs, int size_particle_index> +template <class partsize_t, class real_number, int size_particle_positions, int size_particle_rhs, int size_particle_index> class abstract_particles_distr { protected: static const int MaxNbRhs = 100; @@ -45,8 +45,8 @@ protected: struct NeighborDescriptor{ int nbPartitionsToSend; int nbPartitionsToRecv; - int nbParticlesToSend; - int nbParticlesToRecv; + partsize_t nbParticlesToSend; + partsize_t nbParticlesToRecv; int destProc; int rankDiff; bool isLower; @@ -83,7 +83,7 @@ protected: std::unique_ptr<int[]> partition_interval_size_per_proc; std::unique_ptr<int[]> partition_interval_offset_per_proc; - std::unique_ptr<int[]> current_offset_particles_for_partition; + std::unique_ptr<partsize_t[]> current_offset_particles_for_partition; std::vector<std::pair<Action,int>> whatNext; std::vector<MPI_Request> mpiRequests; @@ -116,7 +116,7 @@ public: partition_interval_offset_per_proc[idxProc+1] = partition_interval_offset_per_proc[idxProc] + partition_interval_size_per_proc[idxProc]; } - current_offset_particles_for_partition.reset(new int[current_partition_size+1]); + current_offset_particles_for_partition.reset(new partsize_t[current_partition_size+1]); nb_processes_involved = nb_processes; while(nb_processes_involved != 0 && partition_interval_size_per_proc[nb_processes_involved-1] == 0){ @@ -134,7 +134,7 @@ public: //////////////////////////////////////////////////////////////////////////// - void compute_distr(const int current_my_nb_particles_per_partition[], + void compute_distr(const partsize_t current_my_nb_particles_per_partition[], const real_number particles_positions[], real_number particles_current_rhs[], const int interpolation_size){ @@ -146,7 +146,7 @@ public: } current_offset_particles_for_partition[0] = 0; - int myTotalNbParticles = 0; + partsize_t myTotalNbParticles = 0; for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ myTotalNbParticles += current_my_nb_particles_per_partition[idxPartition]; current_offset_particles_for_partition[idxPartition+1] = current_offset_particles_for_partition[idxPartition] + current_my_nb_particles_per_partition[idxPartition]; @@ -178,11 +178,11 @@ public: const int nbPartitionsToSend = std::min(current_partition_size, interpolation_size-(idxLower-1)); assert(nbPartitionsToSend >= 0); - const int nbParticlesToSend = current_offset_particles_for_partition[nbPartitionsToSend] - current_offset_particles_for_partition[0]; + const partsize_t nbParticlesToSend = current_offset_particles_for_partition[nbPartitionsToSend] - current_offset_particles_for_partition[0]; const int nbPartitionsToRecv = std::min(partition_interval_size_per_proc[destProc], (interpolation_size+1)-(idxLower-1)); assert(nbPartitionsToRecv > 0); - const int nbParticlesToRecv = -1; + const partsize_t nbParticlesToRecv = -1; NeighborDescriptor descriptor; descriptor.destProc = destProc; @@ -196,7 +196,7 @@ public: neigDescriptors.emplace_back(std::move(descriptor)); } - nbProcToRecvLower = neigDescriptors.size(); + nbProcToRecvLower = int(neigDescriptors.size()); nextDestProc = my_rank; for(int idxUpper = 1 ; idxUpper <= interpolation_size+1 ; idxUpper += partition_interval_size_per_proc[nextDestProc]){ @@ -211,11 +211,11 @@ public: const int nbPartitionsToSend = std::min(current_partition_size, (interpolation_size+1)-(idxUpper-1)); assert(nbPartitionsToSend > 0); - const int nbParticlesToSend = current_offset_particles_for_partition[current_partition_size] - current_offset_particles_for_partition[current_partition_size-nbPartitionsToSend]; + const partsize_t nbParticlesToSend = current_offset_particles_for_partition[current_partition_size] - current_offset_particles_for_partition[current_partition_size-nbPartitionsToSend]; const int nbPartitionsToRecv = std::min(partition_interval_size_per_proc[destProc], interpolation_size-(idxUpper-1)); assert(nbPartitionsToSend >= 0); - const int nbParticlesToRecv = -1; + const partsize_t nbParticlesToRecv = -1; NeighborDescriptor descriptor; descriptor.destProc = destProc; @@ -230,7 +230,7 @@ public: neigDescriptors.emplace_back(std::move(descriptor)); } } - const int nbProcToRecvUpper = neigDescriptors.size()-nbProcToRecvLower; + const int nbProcToRecvUpper = int(neigDescriptors.size())-nbProcToRecvLower; const int nbProcToRecv = nbProcToRecvUpper + nbProcToRecvLower; assert(int(neigDescriptors.size()) == nbProcToRecv); @@ -241,20 +241,23 @@ public: if(descriptor.nbPartitionsToSend > 0){ whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(const_cast<int*>(&descriptor.nbParticlesToSend), 1, MPI_INT, descriptor.destProc, TAG_LOW_UP_NB_PARTICLES, - current_com, &mpiRequests.back())); + AssertMpi(MPI_Isend(const_cast<partsize_t*>(&descriptor.nbParticlesToSend), 1, particles_utils::GetMpiType(partsize_t()), + descriptor.destProc, TAG_LOW_UP_NB_PARTICLES, + current_com, &mpiRequests.back())); if(descriptor.nbParticlesToSend){ whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(const_cast<real_number*>(&particles_positions[0]), descriptor.nbParticlesToSend*size_particle_positions, particles_utils::GetMpiType(real_number()), descriptor.destProc, TAG_LOW_UP_PARTICLES, + assert(descriptor.nbParticlesToSend*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(const_cast<real_number*>(&particles_positions[0]), int(descriptor.nbParticlesToSend*size_particle_positions), particles_utils::GetMpiType(real_number()), descriptor.destProc, TAG_LOW_UP_PARTICLES, current_com, &mpiRequests.back())); assert(descriptor.toRecvAndMerge == nullptr); descriptor.toRecvAndMerge.reset(new real_number[descriptor.nbParticlesToSend*size_particle_rhs]); whatNext.emplace_back(std::pair<Action,int>{MERGE_PARTICLES, idxDescr}); mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToSend*size_particle_rhs, particles_utils::GetMpiType(real_number()), descriptor.destProc, TAG_UP_LOW_RESULTS, + assert(descriptor.nbParticlesToSend*size_particle_rhs < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(descriptor.toRecvAndMerge.get(), int(descriptor.nbParticlesToSend*size_particle_rhs), particles_utils::GetMpiType(real_number()), descriptor.destProc, TAG_UP_LOW_RESULTS, current_com, &mpiRequests.back())); } } @@ -263,27 +266,32 @@ public: whatNext.emplace_back(std::pair<Action,int>{RECV_PARTICLES, idxDescr}); mpiRequests.emplace_back(); AssertMpi(MPI_Irecv(&descriptor.nbParticlesToRecv, - 1, MPI_INT, descriptor.destProc, TAG_UP_LOW_NB_PARTICLES, + 1, particles_utils::GetMpiType(partsize_t()), descriptor.destProc, TAG_UP_LOW_NB_PARTICLES, current_com, &mpiRequests.back())); } else{ assert(descriptor.nbPartitionsToSend); whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(const_cast<int*>(&descriptor.nbParticlesToSend), 1, MPI_INT, descriptor.destProc, TAG_UP_LOW_NB_PARTICLES, - current_com, &mpiRequests.back())); + AssertMpi(MPI_Isend(const_cast<partsize_t*>(&descriptor.nbParticlesToSend), 1, particles_utils::GetMpiType(partsize_t()), + descriptor.destProc, TAG_UP_LOW_NB_PARTICLES, + current_com, &mpiRequests.back())); if(descriptor.nbParticlesToSend){ whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); - mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(const_cast<real_number*>(&particles_positions[(current_offset_particles_for_partition[current_partition_size-descriptor.nbPartitionsToSend])*size_particle_positions]), descriptor.nbParticlesToSend*size_particle_positions, particles_utils::GetMpiType(real_number()), descriptor.destProc, TAG_UP_LOW_PARTICLES, - current_com, &mpiRequests.back())); + mpiRequests.emplace_back(); + assert(descriptor.nbParticlesToSend*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(const_cast<real_number*>(&particles_positions[(current_offset_particles_for_partition[current_partition_size-descriptor.nbPartitionsToSend])*size_particle_positions]), + int(descriptor.nbParticlesToSend*size_particle_positions), particles_utils::GetMpiType(real_number()), + descriptor.destProc, TAG_UP_LOW_PARTICLES, + current_com, &mpiRequests.back())); assert(descriptor.toRecvAndMerge == nullptr); descriptor.toRecvAndMerge.reset(new real_number[descriptor.nbParticlesToSend*size_particle_rhs]); whatNext.emplace_back(std::pair<Action,int>{MERGE_PARTICLES, idxDescr}); mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(descriptor.toRecvAndMerge.get(), descriptor.nbParticlesToSend*size_particle_rhs, particles_utils::GetMpiType(real_number()), descriptor.destProc, TAG_LOW_UP_RESULTS, + assert(descriptor.nbParticlesToSend*size_particle_rhs < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(descriptor.toRecvAndMerge.get(), int(descriptor.nbParticlesToSend*size_particle_rhs), particles_utils::GetMpiType(real_number()), descriptor.destProc, TAG_LOW_UP_RESULTS, current_com, &mpiRequests.back())); } @@ -291,8 +299,8 @@ public: whatNext.emplace_back(std::pair<Action,int>{RECV_PARTICLES, idxDescr}); mpiRequests.emplace_back(); AssertMpi(MPI_Irecv(&descriptor.nbParticlesToRecv, - 1, MPI_INT, descriptor.destProc, TAG_LOW_UP_NB_PARTICLES, - current_com, &mpiRequests.back())); + 1, particles_utils::GetMpiType(partsize_t()), descriptor.destProc, TAG_LOW_UP_NB_PARTICLES, + current_com, &mpiRequests.back())); } } } @@ -307,10 +315,10 @@ public: while(mpiRequests.size()){ assert(mpiRequests.size() == whatNext.size()); - int idxDone = mpiRequests.size(); + int idxDone = int(mpiRequests.size()); { TIMEZONE("wait"); - AssertMpi(MPI_Waitany(mpiRequests.size(), mpiRequests.data(), &idxDone, MPI_STATUSES_IGNORE)); + AssertMpi(MPI_Waitany(int(mpiRequests.size()), mpiRequests.data(), &idxDone, MPI_STATUSES_IGNORE)); } const std::pair<Action, int> releasedAction = whatNext[idxDone]; std::swap(mpiRequests[idxDone], mpiRequests[mpiRequests.size()-1]); @@ -328,30 +336,34 @@ public: //const int idxLower = descriptor.idxLowerUpper; const int destProc = descriptor.destProc; //const int nbPartitionsToRecv = descriptor.nbPartitionsToRecv; - const int NbParticlesToReceive = descriptor.nbParticlesToRecv; + const partsize_t NbParticlesToReceive = descriptor.nbParticlesToRecv; assert(NbParticlesToReceive != -1); assert(descriptor.toCompute == nullptr); if(NbParticlesToReceive){ descriptor.toCompute.reset(new real_number[NbParticlesToReceive*size_particle_positions]); whatNext.emplace_back(std::pair<Action,int>{COMPUTE_PARTICLES, releasedAction.second}); mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(descriptor.toCompute.get(), NbParticlesToReceive*size_particle_positions, particles_utils::GetMpiType(real_number()), destProc, TAG_UP_LOW_PARTICLES, - current_com, &mpiRequests.back())); + assert(NbParticlesToReceive*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(descriptor.toCompute.get(), int(NbParticlesToReceive*size_particle_positions), + particles_utils::GetMpiType(real_number()), destProc, TAG_UP_LOW_PARTICLES, + current_com, &mpiRequests.back())); } } else{ //const int idxUpper = descriptor.idxLowerUpper; const int destProc = descriptor.destProc; //const int nbPartitionsToRecv = descriptor.nbPartitionsToRecv; - const int NbParticlesToReceive = descriptor.nbParticlesToRecv; + const partsize_t NbParticlesToReceive = descriptor.nbParticlesToRecv; assert(NbParticlesToReceive != -1); assert(descriptor.toCompute == nullptr); if(NbParticlesToReceive){ descriptor.toCompute.reset(new real_number[NbParticlesToReceive*size_particle_positions]); whatNext.emplace_back(std::pair<Action,int>{COMPUTE_PARTICLES, releasedAction.second}); mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(descriptor.toCompute.get(), NbParticlesToReceive*size_particle_positions, particles_utils::GetMpiType(real_number()), destProc, TAG_LOW_UP_PARTICLES, - current_com, &mpiRequests.back())); + assert(NbParticlesToReceive*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(descriptor.toCompute.get(), int(NbParticlesToReceive*size_particle_positions), + particles_utils::GetMpiType(real_number()), destProc, TAG_LOW_UP_PARTICLES, + current_com, &mpiRequests.back())); } } } @@ -361,7 +373,7 @@ public: ////////////////////////////////////////////////////////////////////// if(releasedAction.first == COMPUTE_PARTICLES){ NeighborDescriptor& descriptor = neigDescriptors[releasedAction.second]; - const int NbParticlesToReceive = descriptor.nbParticlesToRecv; + const partsize_t NbParticlesToReceive = descriptor.nbParticlesToRecv; assert(descriptor.toCompute != nullptr); descriptor.results.reset(new real_number[NbParticlesToReceive*size_particle_rhs]); @@ -375,8 +387,8 @@ public: NeighborDescriptor* ptr_descriptor = &descriptor; #pragma omp taskgroup { - for(int idxPart = 0 ; idxPart < NbParticlesToReceive ; idxPart += 300){ - const int sizeToDo = std::min(300, NbParticlesToReceive-idxPart); + for(partsize_t idxPart = 0 ; idxPart < NbParticlesToReceive ; idxPart += 300){ + const partsize_t sizeToDo = std::min(partsize_t(300), NbParticlesToReceive-idxPart); #pragma omp task default(shared) firstprivate(ptr_descriptor, idxPart, sizeToDo) priority(10) \ TIMEZONE_OMP_PRAGMA_TASK_KEY(timeZoneTaskKey) { @@ -391,8 +403,9 @@ public: const int destProc = descriptor.destProc; whatNext.emplace_back(std::pair<Action,int>{RELEASE_BUFFER_PARTICLES, releasedAction.second}); mpiRequests.emplace_back(); - const int tag = descriptor.isLower? TAG_LOW_UP_RESULTS : TAG_UP_LOW_RESULTS; - AssertMpi(MPI_Isend(descriptor.results.get(), NbParticlesToReceive*size_particle_rhs, particles_utils::GetMpiType(real_number()), destProc, tag, + const int tag = descriptor.isLower? TAG_LOW_UP_RESULTS : TAG_UP_LOW_RESULTS; + assert(NbParticlesToReceive*size_particle_rhs < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(descriptor.results.get(), int(NbParticlesToReceive*size_particle_rhs), particles_utils::GetMpiType(real_number()), destProc, tag, current_com, &mpiRequests.back())); } ////////////////////////////////////////////////////////////////////// @@ -431,10 +444,10 @@ public: { // Do for all partitions except the first and last one for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ - for(int idxPart = current_offset_particles_for_partition[idxPartition] ; + for(partsize_t idxPart = current_offset_particles_for_partition[idxPartition] ; idxPart < current_offset_particles_for_partition[idxPartition+1] ; idxPart += 300){ - const int sizeToDo = std::min(300, current_offset_particles_for_partition[idxPartition+1]-idxPart); + const partsize_t sizeToDo = std::min(partsize_t(300), current_offset_particles_for_partition[idxPartition+1]-idxPart); // Low priority to help master thread when possible #pragma omp task default(shared) firstprivate(idxPart, sizeToDo) priority(0) TIMEZONE_OMP_PRAGMA_TASK_KEY(timeZoneTaskKey) @@ -487,21 +500,21 @@ public: //////////////////////////////////////////////////////////////////////////// virtual void init_result_array(real_number particles_current_rhs[], - const int nb_particles) const = 0; + const partsize_t nb_particles) const = 0; virtual void apply_computation(const real_number particles_positions[], real_number particles_current_rhs[], - const int nb_particles) const = 0; + const partsize_t nb_particles) const = 0; virtual void reduce_particles_rhs(real_number particles_current_rhs[], const real_number extra_particles_current_rhs[], - const int nb_particles) const = 0; + const partsize_t nb_particles) const = 0; //////////////////////////////////////////////////////////////////////////// - void redistribute(int current_my_nb_particles_per_partition[], - int* nb_particles, + void redistribute(partsize_t current_my_nb_particles_per_partition[], + partsize_t* nb_particles, std::unique_ptr<real_number[]>* inout_positions_particles, std::unique_ptr<real_number[]> inout_rhs_particles[], const int in_nb_rhs, - std::unique_ptr<int[]>* inout_index_particles){ + std::unique_ptr<partsize_t[]>* inout_index_particles){ TIMEZONE("redistribute"); // Some latest processes might not be involved @@ -510,7 +523,7 @@ public: } current_offset_particles_for_partition[0] = 0; - int myTotalNbParticles = 0; + partsize_t myTotalNbParticles = 0; for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ myTotalNbParticles += current_my_nb_particles_per_partition[idxPartition]; current_offset_particles_for_partition[idxPartition+1] = current_offset_particles_for_partition[idxPartition] + current_my_nb_particles_per_partition[idxPartition]; @@ -518,7 +531,7 @@ public: assert((*nb_particles) == myTotalNbParticles); // Find particles outside my interval - const int nbOutLower = particles_utils::partition_extra<size_particle_positions>(&(*inout_positions_particles)[0], current_my_nb_particles_per_partition[0], + const partsize_t nbOutLower = particles_utils::partition_extra<partsize_t, size_particle_positions>(&(*inout_positions_particles)[0], current_my_nb_particles_per_partition[0], [&](const real_number val[]){ const int partition_level = pbc_field_layer(val[IDX_Z], IDX_Z); assert(partition_level == current_partition_interval.first @@ -527,7 +540,7 @@ public: const bool isLower = partition_level == (current_partition_interval.first-1+int(field_grid_dim[IDX_Z]))%int(field_grid_dim[IDX_Z]); return isLower; }, - [&](const int idx1, const int idx2){ + [&](const partsize_t idx1, const partsize_t idx2){ for(int idx_val = 0 ; idx_val < size_particle_index ; ++idx_val){ std::swap((*inout_index_particles)[idx1], (*inout_index_particles)[idx2]); } @@ -539,9 +552,9 @@ public: } } }); - const int offesetOutLow = (current_partition_size==1? nbOutLower : 0); + const partsize_t offesetOutLow = (current_partition_size==1? nbOutLower : 0); - const int nbOutUpper = current_my_nb_particles_per_partition[current_partition_size-1] - offesetOutLow - particles_utils::partition_extra<size_particle_positions>( + const partsize_t nbOutUpper = current_my_nb_particles_per_partition[current_partition_size-1] - offesetOutLow - particles_utils::partition_extra<partsize_t, size_particle_positions>( &(*inout_positions_particles)[(current_offset_particles_for_partition[current_partition_size-1]+offesetOutLow)*size_particle_positions], myTotalNbParticles - (current_offset_particles_for_partition[current_partition_size-1]+offesetOutLow), [&](const real_number val[]){ @@ -552,7 +565,7 @@ public: const bool isUpper = (partition_level == ((current_partition_interval.second-1)+1)%int(field_grid_dim[IDX_Z])); return !isUpper; }, - [&](const int idx1, const int idx2){ + [&](const partsize_t idx1, const partsize_t idx2){ for(int idx_val = 0 ; idx_val < size_particle_index ; ++idx_val){ std::swap((*inout_index_particles)[idx1], (*inout_index_particles)[idx2]); } @@ -567,12 +580,12 @@ public: // Exchange number int eventsBeforeWaitall = 0; - int nbNewFromLow = 0; - int nbNewFromUp = 0; + partsize_t nbNewFromLow = 0; + partsize_t nbNewFromUp = 0; std::unique_ptr<real_number[]> newParticlesLow; std::unique_ptr<real_number[]> newParticlesUp; - std::unique_ptr<int[]> newParticlesLowIndexes; - std::unique_ptr<int[]> newParticlesUpIndexes; + std::unique_ptr<partsize_t[]> newParticlesLowIndexes; + std::unique_ptr<partsize_t[]> newParticlesUpIndexes; std::vector<std::unique_ptr<real_number[]>> newParticlesLowRhs(in_nb_rhs); std::vector<std::unique_ptr<real_number[]>> newParticlesUpRhs(in_nb_rhs); @@ -582,68 +595,82 @@ public: whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_NB_LOW, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(&nbNewFromLow, 1, MPI_INT, (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_NB_PARTICLES, - MPI_COMM_WORLD, &mpiRequests.back())); + AssertMpi(MPI_Irecv(&nbNewFromLow, 1, particles_utils::GetMpiType(partsize_t()), + (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_NB_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); eventsBeforeWaitall += 1; whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(const_cast<int*>(&nbOutLower), 1, MPI_INT, (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_NB_PARTICLES, - MPI_COMM_WORLD, &mpiRequests.back())); + AssertMpi(MPI_Isend(const_cast<partsize_t*>(&nbOutLower), 1, particles_utils::GetMpiType(partsize_t()), + (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_NB_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); if(nbOutLower){ whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); - mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(&(*inout_positions_particles)[0], nbOutLower*size_particle_positions, particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES, + mpiRequests.emplace_back(); + assert(nbOutLower*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(&(*inout_positions_particles)[0], int(nbOutLower*size_particle_positions), particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES, MPI_COMM_WORLD, &mpiRequests.back())); whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(&(*inout_index_particles)[0], nbOutLower, MPI_INT, (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_INDEXES, + assert(nbOutLower < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(&(*inout_index_particles)[0], int(nbOutLower), particles_utils::GetMpiType(partsize_t()), + (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_INDEXES, MPI_COMM_WORLD, &mpiRequests.back())); for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(&inout_rhs_particles[idx_rhs][0], nbOutLower*size_particle_rhs, particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_RHS+idx_rhs, + assert(nbOutLower*size_particle_rhs < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(&inout_rhs_particles[idx_rhs][0], int(nbOutLower*size_particle_rhs), particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_RHS+idx_rhs, MPI_COMM_WORLD, &mpiRequests.back())); } } whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_NB_UP, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(&nbNewFromUp, 1, MPI_INT, (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_NB_PARTICLES, - MPI_COMM_WORLD, &mpiRequests.back())); + AssertMpi(MPI_Irecv(&nbNewFromUp, 1, particles_utils::GetMpiType(partsize_t()), (my_rank+1)%nb_processes_involved, + TAG_LOW_UP_MOVED_NB_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); eventsBeforeWaitall += 1; whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(const_cast<int*>(&nbOutUpper), 1, MPI_INT, (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_NB_PARTICLES, - MPI_COMM_WORLD, &mpiRequests.back())); + AssertMpi(MPI_Isend(const_cast<partsize_t*>(&nbOutUpper), 1, particles_utils::GetMpiType(partsize_t()), + (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_NB_PARTICLES, + MPI_COMM_WORLD, &mpiRequests.back())); if(nbOutUpper){ whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(&(*inout_positions_particles)[(myTotalNbParticles-nbOutUpper)*size_particle_positions], nbOutUpper*size_particle_positions, particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES, + assert(nbOutUpper*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(&(*inout_positions_particles)[(myTotalNbParticles-nbOutUpper)*size_particle_positions], + int(nbOutUpper*size_particle_positions), particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES, MPI_COMM_WORLD, &mpiRequests.back())); whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(&(*inout_index_particles)[(myTotalNbParticles-nbOutUpper)], nbOutUpper, MPI_INT, (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_INDEXES, + assert(nbOutUpper < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(&(*inout_index_particles)[(myTotalNbParticles-nbOutUpper)], int(nbOutUpper), + particles_utils::GetMpiType(partsize_t()), (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_INDEXES, MPI_COMM_WORLD, &mpiRequests.back())); for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Isend(&inout_rhs_particles[idx_rhs][(myTotalNbParticles-nbOutUpper)*size_particle_rhs], nbOutUpper*size_particle_rhs, particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_RHS+idx_rhs, + assert(nbOutUpper*size_particle_rhs < std::numeric_limits<int>::max()); + AssertMpi(MPI_Isend(&inout_rhs_particles[idx_rhs][(myTotalNbParticles-nbOutUpper)*size_particle_rhs], + int(nbOutUpper*size_particle_rhs), particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_RHS+idx_rhs, MPI_COMM_WORLD, &mpiRequests.back())); } } while(mpiRequests.size() && eventsBeforeWaitall){ - int idxDone = mpiRequests.size(); + int idxDone = int(mpiRequests.size()); { TIMEZONE("waitany_move"); - AssertMpi(MPI_Waitany(mpiRequests.size(), mpiRequests.data(), &idxDone, MPI_STATUSES_IGNORE)); + AssertMpi(MPI_Waitany(int(mpiRequests.size()), mpiRequests.data(), &idxDone, MPI_STATUSES_IGNORE)); } const std::pair<Action, int> releasedAction = whatNext[idxDone]; std::swap(mpiRequests[idxDone], mpiRequests[mpiRequests.size()-1]); @@ -657,20 +684,25 @@ public: newParticlesLow.reset(new real_number[nbNewFromLow*size_particle_positions]); whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_LOW, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(&newParticlesLow[0], nbNewFromLow*size_particle_positions, particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES, + assert(nbNewFromLow*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(&newParticlesLow[0], int(nbNewFromLow*size_particle_positions), particles_utils::GetMpiType(real_number()), + (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES, MPI_COMM_WORLD, &mpiRequests.back())); - newParticlesLowIndexes.reset(new int[nbNewFromLow]); + newParticlesLowIndexes.reset(new partsize_t[nbNewFromLow]); whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(&newParticlesLowIndexes[0], nbNewFromLow, MPI_INT, (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_INDEXES, + assert(nbNewFromLow < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(&newParticlesLowIndexes[0], int(nbNewFromLow), particles_utils::GetMpiType(partsize_t()), + (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_INDEXES, MPI_COMM_WORLD, &mpiRequests.back())); for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ newParticlesLowRhs[idx_rhs].reset(new real_number[nbNewFromLow*size_particle_rhs]); whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(&newParticlesLowRhs[idx_rhs][0], nbNewFromLow*size_particle_rhs, particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_RHS+idx_rhs, + assert(nbNewFromLow*size_particle_rhs < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(&newParticlesLowRhs[idx_rhs][0], int(nbNewFromLow*size_particle_rhs), particles_utils::GetMpiType(real_number()), (my_rank-1+nb_processes_involved)%nb_processes_involved, TAG_UP_LOW_MOVED_PARTICLES_RHS+idx_rhs, MPI_COMM_WORLD, &mpiRequests.back())); } } @@ -682,20 +714,24 @@ public: newParticlesUp.reset(new real_number[nbNewFromUp*size_particle_positions]); whatNext.emplace_back(std::pair<Action,int>{RECV_MOVE_UP, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(&newParticlesUp[0], nbNewFromUp*size_particle_positions, particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES, + assert(nbNewFromUp*size_particle_positions < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(&newParticlesUp[0], int(nbNewFromUp*size_particle_positions), particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES, MPI_COMM_WORLD, &mpiRequests.back())); - newParticlesUpIndexes.reset(new int[nbNewFromUp]); + newParticlesUpIndexes.reset(new partsize_t[nbNewFromUp]); whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(&newParticlesUpIndexes[0], nbNewFromUp, MPI_INT, (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_INDEXES, + assert(nbNewFromUp < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(&newParticlesUpIndexes[0], int(nbNewFromUp), particles_utils::GetMpiType(partsize_t()), + (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_INDEXES, MPI_COMM_WORLD, &mpiRequests.back())); for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ newParticlesUpRhs[idx_rhs].reset(new real_number[nbNewFromUp*size_particle_rhs]); whatNext.emplace_back(std::pair<Action,int>{NOTHING_TODO, -1}); mpiRequests.emplace_back(); - AssertMpi(MPI_Irecv(&newParticlesUpRhs[idx_rhs][0], nbNewFromUp*size_particle_rhs, particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_RHS+idx_rhs, + assert(nbNewFromUp*size_particle_rhs < std::numeric_limits<int>::max()); + AssertMpi(MPI_Irecv(&newParticlesUpRhs[idx_rhs][0], int(nbNewFromUp*size_particle_rhs), particles_utils::GetMpiType(real_number()), (my_rank+1)%nb_processes_involved, TAG_LOW_UP_MOVED_PARTICLES_RHS+idx_rhs, MPI_COMM_WORLD, &mpiRequests.back())); } } @@ -706,7 +742,7 @@ public: if(mpiRequests.size()){ // TODO Proceed when received TIMEZONE("waitall-move"); - AssertMpi(MPI_Waitall(mpiRequests.size(), mpiRequests.data(), MPI_STATUSES_IGNORE)); + AssertMpi(MPI_Waitall(int(mpiRequests.size()), mpiRequests.data(), MPI_STATUSES_IGNORE)); mpiRequests.clear(); whatNext.clear(); } @@ -715,11 +751,11 @@ public: // Realloc an merge { TIMEZONE("realloc_copy"); - const int nbOldParticlesInside = myTotalNbParticles - nbOutLower - nbOutUpper; - const int myTotalNewNbParticles = nbOldParticlesInside + nbNewFromLow + nbNewFromUp; + const partsize_t nbOldParticlesInside = myTotalNbParticles - nbOutLower - nbOutUpper; + const partsize_t myTotalNewNbParticles = nbOldParticlesInside + nbNewFromLow + nbNewFromUp; std::unique_ptr<real_number[]> newArray(new real_number[myTotalNewNbParticles*size_particle_positions]); - std::unique_ptr<int[]> newArrayIndexes(new int[myTotalNewNbParticles]); + std::unique_ptr<partsize_t[]> newArrayIndexes(new partsize_t[myTotalNewNbParticles]); std::vector<std::unique_ptr<real_number[]>> newArrayRhs(in_nb_rhs); for(int idx_rhs = 0 ; idx_rhs < in_nb_rhs ; ++idx_rhs){ newArrayRhs[idx_rhs].reset(new real_number[myTotalNewNbParticles*size_particle_rhs]); @@ -767,7 +803,7 @@ public: // Partitions all particles { TIMEZONE("repartition"); - particles_utils::partition_extra_z<size_particle_positions>(&(*inout_positions_particles)[0], + particles_utils::partition_extra_z<partsize_t, size_particle_positions>(&(*inout_positions_particles)[0], myTotalNbParticles,current_partition_size, current_my_nb_particles_per_partition, current_offset_particles_for_partition.get(), [&](const real_number& z_pos){ @@ -775,7 +811,7 @@ public: assert(current_partition_interval.first <= partition_level && partition_level < current_partition_interval.second); return partition_level - current_partition_interval.first; }, - [&](const int idx1, const int idx2){ + [&](const partsize_t idx1, const partsize_t idx2){ for(int idx_val = 0 ; idx_val < size_particle_index ; ++idx_val){ std::swap((*inout_index_particles)[idx1], (*inout_index_particles)[idx2]); } @@ -792,7 +828,7 @@ public: for(int idxPartition = 0 ; idxPartition < current_partition_size ; ++idxPartition){ assert(current_my_nb_particles_per_partition[idxPartition] == current_offset_particles_for_partition[idxPartition+1] - current_offset_particles_for_partition[idxPartition]); - for(int idx = current_offset_particles_for_partition[idxPartition] ; idx < current_offset_particles_for_partition[idxPartition+1] ; ++idx){ + for(partsize_t idx = current_offset_particles_for_partition[idxPartition] ; idx < current_offset_particles_for_partition[idxPartition+1] ; ++idx){ assert(pbc_field_layer((*inout_positions_particles)[idx*3+IDX_Z], IDX_Z)-current_partition_interval.first == idxPartition); } } @@ -808,7 +844,7 @@ public: //////////////////////////////////////////////////////////////////////////// virtual void move_particles(real_number particles_positions[], - const int nb_particles, + const partsize_t nb_particles, const std::unique_ptr<real_number[]> particles_current_rhs[], const int nb_rhs, const real_number dt) const = 0; }; diff --git a/bfps/cpp/particles/abstract_particles_input.hpp b/bfps/cpp/particles/abstract_particles_input.hpp index bb295c40717085e58126530e8403c3ff5b71a014..77dcbc638903a668ce6e2a0084815832b0580495 100644 --- a/bfps/cpp/particles/abstract_particles_input.hpp +++ b/bfps/cpp/particles/abstract_particles_input.hpp @@ -3,17 +3,17 @@ #include <tuple> -template <class real_number> +template <class partsize_t, class real_number> class abstract_particles_input { public: virtual ~abstract_particles_input(){} - virtual int getTotalNbParticles() = 0; - virtual int getLocalNbParticles() = 0; + virtual partsize_t getTotalNbParticles() = 0; + virtual partsize_t getLocalNbParticles() = 0; virtual int getNbRhs() = 0; virtual std::unique_ptr<real_number[]> getMyParticles() = 0; - virtual std::unique_ptr<int[]> getMyParticlesIndexes() = 0; + virtual std::unique_ptr<partsize_t[]> getMyParticlesIndexes() = 0; virtual std::vector<std::unique_ptr<real_number[]>> getMyRhs() = 0; }; diff --git a/bfps/cpp/particles/abstract_particles_output.hpp b/bfps/cpp/particles/abstract_particles_output.hpp index ce841294b133734ad6bd6485351d340fb0a175fb..dfc8ec37a39cf719242a15acfc125f634c0c1538 100644 --- a/bfps/cpp/particles/abstract_particles_output.hpp +++ b/bfps/cpp/particles/abstract_particles_output.hpp @@ -13,7 +13,7 @@ #include "scope_timer.hpp" #include "env_utils.hpp" -template <class real_number, int size_particle_positions, int size_particle_rhs> +template <class partsize_t, class real_number, int size_particle_positions, int size_particle_rhs> class abstract_particles_output { MPI_Comm mpi_com; MPI_Comm mpi_com_writer; @@ -21,31 +21,31 @@ class abstract_particles_output { int my_rank; int nb_processes; - const int total_nb_particles; + const partsize_t total_nb_particles; const int nb_rhs; - std::unique_ptr<std::pair<int,int>[]> buffer_indexes_send; + std::unique_ptr<std::pair<partsize_t,partsize_t>[]> buffer_indexes_send; std::unique_ptr<real_number[]> buffer_particles_positions_send; std::vector<std::unique_ptr<real_number[]>> buffer_particles_rhs_send; - int size_buffers_send; + partsize_t size_buffers_send; std::unique_ptr<real_number[]> buffer_particles_positions_recv; std::vector<std::unique_ptr<real_number[]>> buffer_particles_rhs_recv; - std::unique_ptr<int[]> buffer_indexes_recv; - int size_buffers_recv; + std::unique_ptr<partsize_t[]> buffer_indexes_recv; + partsize_t size_buffers_recv; int nb_processes_involved; bool current_is_involved; - int particles_chunk_per_process; - int particles_chunk_current_size; - int particles_chunk_current_offset; + partsize_t particles_chunk_per_process; + partsize_t particles_chunk_current_size; + partsize_t particles_chunk_current_offset; protected: MPI_Comm& getComWriter(){ return mpi_com_writer; } - int getTotalNbParticles() const { + partsize_t getTotalNbParticles() const { return total_nb_particles; } @@ -62,13 +62,13 @@ protected: } public: - abstract_particles_output(MPI_Comm in_mpi_com, const int inTotalNbParticles, const int in_nb_rhs) + abstract_particles_output(MPI_Comm in_mpi_com, const partsize_t inTotalNbParticles, const int in_nb_rhs) throw() : mpi_com(in_mpi_com), my_rank(-1), nb_processes(-1), total_nb_particles(inTotalNbParticles), nb_rhs(in_nb_rhs), buffer_particles_rhs_send(in_nb_rhs), size_buffers_send(-1), buffer_particles_rhs_recv(in_nb_rhs), size_buffers_recv(-1), nb_processes_involved(0), current_is_involved(true), particles_chunk_per_process(0), - particles_chunk_current_size(0), particles_chunk_current_offset(0){ + particles_chunk_current_size(0), particles_chunk_current_offset(0) { AssertMpi(MPI_Comm_rank(mpi_com, &my_rank)); AssertMpi(MPI_Comm_size(mpi_com, &nb_processes)); @@ -87,13 +87,13 @@ public: extraChunkBytes += 1; } const size_t bytesPerProcess = (MinBytesPerProcess+extraChunkBytes*ChunkBytes); - particles_chunk_per_process = (bytesPerProcess+sizeof(real_number)*size_particle_positions-1)/(sizeof(real_number)*size_particle_positions); - nb_processes_involved = (total_nb_particles+particles_chunk_per_process-1)/particles_chunk_per_process; + particles_chunk_per_process = partsize_t((bytesPerProcess+sizeof(real_number)*size_particle_positions-1)/(sizeof(real_number)*size_particle_positions)); + nb_processes_involved = int((total_nb_particles+particles_chunk_per_process-1)/particles_chunk_per_process); } // else limit based on minBytesPerProcess else{ nb_processes_involved = std::max(1,std::min(MaxProcessesInvolved,int((totalBytesForPositions+MinBytesPerProcess-1)/MinBytesPerProcess))); - particles_chunk_per_process = (MinBytesPerProcess+sizeof(real_number)*size_particle_positions-1)/(sizeof(real_number)*size_particle_positions); + particles_chunk_per_process = partsize_t((MinBytesPerProcess+sizeof(real_number)*size_particle_positions-1)/(sizeof(real_number)*size_particle_positions)); } // Print out @@ -144,8 +144,8 @@ public: void save( const real_number input_particles_positions[], const std::unique_ptr<real_number[]> input_particles_rhs[], - const int index_particles[], - const int nb_particles, + const partsize_t index_particles[], + const partsize_t nb_particles, const int idx_time_step){ TIMEZONE("abstract_particles_output::save"); assert(total_nb_particles != -1); @@ -154,7 +154,7 @@ public: TIMEZONE("sort-to-distribute"); if(size_buffers_send < nb_particles && nb_particles){ - buffer_indexes_send.reset(new std::pair<int,int>[nb_particles]); + buffer_indexes_send.reset(new std::pair<partsize_t,partsize_t>[nb_particles]); buffer_particles_positions_send.reset(new real_number[nb_particles*size_particle_positions]); for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ buffer_particles_rhs_send[idx_rhs].reset(new real_number[nb_particles*size_particle_rhs]); @@ -162,18 +162,19 @@ public: size_buffers_send = nb_particles; } - for(int idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ buffer_indexes_send[idx_part].first = idx_part; buffer_indexes_send[idx_part].second = index_particles[idx_part]; } - std::sort(&buffer_indexes_send[0], &buffer_indexes_send[nb_particles], [](const std::pair<int,int>& p1, const std::pair<int,int>& p2){ + std::sort(&buffer_indexes_send[0], &buffer_indexes_send[nb_particles], [](const std::pair<partsize_t,partsize_t>& p1, + const std::pair<partsize_t,partsize_t>& p2){ return p1.second < p2.second; }); - for(int idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ - const int src_idx = buffer_indexes_send[idx_part].first; - const int dst_idx = idx_part; + for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + const partsize_t src_idx = buffer_indexes_send[idx_part].first; + const partsize_t dst_idx = idx_part; for(int idx_val = 0 ; idx_val < size_particle_positions ; ++idx_val){ buffer_particles_positions_send[dst_idx*size_particle_positions + idx_val] @@ -188,10 +189,10 @@ public: } } - int* buffer_indexes_send_tmp = reinterpret_cast<int*>(buffer_indexes_send.get());// trick re-use buffer_indexes_send memory - std::vector<int> nb_particles_to_send(nb_processes, 0); - for(int idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ - const int dest_proc = buffer_indexes_send[idx_part].second/particles_chunk_per_process; + partsize_t* buffer_indexes_send_tmp = reinterpret_cast<partsize_t*>(buffer_indexes_send.get());// trick re-use buffer_indexes_send memory + std::vector<partsize_t> nb_particles_to_send(nb_processes, 0); + for(partsize_t idx_part = 0 ; idx_part < nb_particles ; ++idx_part){ + const int dest_proc = int(buffer_indexes_send[idx_part].second/particles_chunk_per_process); assert(dest_proc < nb_processes_involved); nb_particles_to_send[dest_proc] += 1; buffer_indexes_send_tmp[idx_part] = buffer_indexes_send[idx_part].second; @@ -204,7 +205,7 @@ public: assert(nb_to_receive == particles_chunk_current_size); if(size_buffers_recv < nb_to_receive && nb_to_receive){ - buffer_indexes_recv.reset(new int[nb_to_receive]); + buffer_indexes_recv.reset(new partsize_t[nb_to_receive]); buffer_particles_positions_recv.reset(new real_number[nb_to_receive*size_particle_positions]); for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ buffer_particles_rhs_recv[idx_rhs].reset(new real_number[nb_to_receive*size_particle_rhs]); @@ -215,7 +216,7 @@ public: { TIMEZONE("exchange"); // Could be done with multiple asynchronous coms - exchanger.alltoallv<int>(buffer_indexes_send_tmp, buffer_indexes_recv.get()); + exchanger.alltoallv<partsize_t>(buffer_indexes_send_tmp, buffer_indexes_recv.get()); exchanger.alltoallv<real_number>(buffer_particles_positions_send.get(), buffer_particles_positions_recv.get(), size_particle_positions); for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ exchanger.alltoallv<real_number>(buffer_particles_rhs_send[idx_rhs].get(), buffer_particles_rhs_recv[idx_rhs].get(), size_particle_rhs); @@ -229,7 +230,7 @@ public: } if(size_buffers_send < nb_to_receive && nb_to_receive){ - buffer_indexes_send.reset(new std::pair<int,int>[nb_to_receive]); + buffer_indexes_send.reset(new std::pair<partsize_t,partsize_t>[nb_to_receive]); buffer_particles_positions_send.reset(new real_number[nb_to_receive*size_particle_positions]); for(int idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ buffer_particles_rhs_send[idx_rhs].reset(new real_number[nb_to_receive*size_particle_rhs]); @@ -239,9 +240,9 @@ public: { TIMEZONE("copy-local-order"); - for(int idx_part = 0 ; idx_part < nb_to_receive ; ++idx_part){ - const int src_idx = idx_part; - const int dst_idx = buffer_indexes_recv[idx_part]-particles_chunk_current_offset; + for(partsize_t idx_part = 0 ; idx_part < nb_to_receive ; ++idx_part){ + const partsize_t src_idx = idx_part; + const partsize_t dst_idx = buffer_indexes_recv[idx_part]-particles_chunk_current_offset; assert(0 <= dst_idx); assert(dst_idx < particles_chunk_current_size); @@ -263,7 +264,7 @@ public: } virtual void write(const int idx_time_step, const real_number* positions, const std::unique_ptr<real_number[]>* rhs, - const int nb_particles, const int particles_idx_offset) = 0; + const partsize_t nb_particles, const partsize_t particles_idx_offset) = 0; }; #endif diff --git a/bfps/cpp/particles/abstract_particles_system.hpp b/bfps/cpp/particles/abstract_particles_system.hpp index 32510404b4fa69596a53385b470aea0d4136b08b..33a945130e2b94f3b266c29c34b31758b00c84c3 100644 --- a/bfps/cpp/particles/abstract_particles_system.hpp +++ b/bfps/cpp/particles/abstract_particles_system.hpp @@ -3,7 +3,7 @@ #include <memory> -template <class real_number> +template <class partsize_t, class real_number> class abstract_particles_system { public: virtual void compute() = 0; @@ -22,9 +22,9 @@ public: virtual const std::unique_ptr<real_number[]>* getParticlesRhs() const = 0; - virtual const int* getParticlesIndexes() const = 0; + virtual const partsize_t* getParticlesIndexes() const = 0; - virtual int getLocalNbParticles() const = 0; + virtual partsize_t getLocalNbParticles() const = 0; virtual int getNbRhs() const = 0; }; diff --git a/bfps/cpp/particles/alltoall_exchanger.hpp b/bfps/cpp/particles/alltoall_exchanger.hpp index 3c592011c9772afad1ab68555b179754f868624e..2beaf092e8e6c7a801efd492270d29c2d4dba398 100644 --- a/bfps/cpp/particles/alltoall_exchanger.hpp +++ b/bfps/cpp/particles/alltoall_exchanger.hpp @@ -24,7 +24,23 @@ class alltoall_exchanger { int total_to_recv; + template <class index_type> + static std::vector<int> ConvertVector(const std::vector<index_type>& vector){ + std::vector<int> resVector(vector.size()); + for(size_t idx = 0 ; idx < vector.size() ; ++idx){ + assert(vector[idx] <= std::numeric_limits<int>::max()); + resVector[idx] = int(vector[idx]); + } + return resVector; + } + public: + template <class index_type> + alltoall_exchanger(const MPI_Comm& in_mpi_com, const std::vector<index_type>& in_nb_items_to_send) + : alltoall_exchanger(in_mpi_com, ConvertVector(in_nb_items_to_send)){ + + } + alltoall_exchanger(const MPI_Comm& in_mpi_com, std::vector<int>/*no ref to move here*/ in_nb_items_to_send) :mpi_com(in_mpi_com), nb_items_to_send(std::move(in_nb_items_to_send)), total_to_recv(0){ TIMEZONE("alltoall_exchanger::constructor"); @@ -49,8 +65,10 @@ public: offset_items_to_recv.resize(nb_processes+1, 0); for(int idx_proc = 0 ; idx_proc < nb_processes ; ++idx_proc){ const int nbrecv = nb_items_to_sendrecv_all[idx_proc*nb_processes + my_rank]; + assert(static_cast<long long int>(total_to_recv) + static_cast<long long int>(nbrecv) <= std::numeric_limits<int>::max()); total_to_recv += nbrecv; nb_items_to_recv[idx_proc] = nbrecv; + assert(static_cast<long long int>(nb_items_to_recv[idx_proc]) + static_cast<long long int>(offset_items_to_recv[idx_proc]) <= std::numeric_limits<int>::max()); offset_items_to_recv[idx_proc+1] = nb_items_to_recv[idx_proc] + offset_items_to_recv[idx_proc]; } @@ -82,16 +100,20 @@ public: TIMEZONE("alltoallv"); std::vector<int> nb_items_to_send_tmp = nb_items_to_send; particles_utils::transform(nb_items_to_send_tmp.begin(), nb_items_to_send_tmp.end(), nb_items_to_send_tmp.begin(), - [&](const int val) -> int { return val * in_nb_values_per_item ;}); + [&](const int val) -> int { assert(static_cast<long long int>(val) * static_cast<long long int>(in_nb_values_per_item) <= std::numeric_limits<int>::max()); + return val * in_nb_values_per_item ;}); std::vector<int> offset_items_to_send_tmp = offset_items_to_send; particles_utils::transform(offset_items_to_send_tmp.begin(), offset_items_to_send_tmp.end(), offset_items_to_send_tmp.begin(), - [&](const int val) -> int { return val * in_nb_values_per_item ;}); + [&](const int val) -> int { assert(static_cast<long long int>(val) * static_cast<long long int>(in_nb_values_per_item) <= std::numeric_limits<int>::max()); + return val * in_nb_values_per_item ;}); std::vector<int> nb_items_to_recv_tmp = nb_items_to_recv; particles_utils::transform(nb_items_to_recv_tmp.begin(), nb_items_to_recv_tmp.end(), nb_items_to_recv_tmp.begin(), - [&](const int val) -> int { return val * in_nb_values_per_item ;}); + [&](const int val) -> int { assert(static_cast<long long int>(val) * static_cast<long long int>(in_nb_values_per_item) <= std::numeric_limits<int>::max()); + return val * in_nb_values_per_item ;}); std::vector<int> offset_items_to_recv_tmp = offset_items_to_recv; particles_utils::transform(offset_items_to_recv_tmp.begin(), offset_items_to_recv_tmp.end(), offset_items_to_recv_tmp.begin(), - [&](const int val) -> int { return val * in_nb_values_per_item ;}); + [&](const int val) -> int { assert(static_cast<long long int>(val) * static_cast<long long int>(in_nb_values_per_item) <= std::numeric_limits<int>::max()); + return val * in_nb_values_per_item ;}); AssertMpi(MPI_Alltoallv(const_cast<ItemType*>(in_to_send), const_cast<int*>(nb_items_to_send_tmp.data()), const_cast<int*>(offset_items_to_send_tmp.data()), in_type, out_to_recv, diff --git a/bfps/cpp/particles/particles_adams_bashforth.hpp b/bfps/cpp/particles/particles_adams_bashforth.hpp index f0c5647228c105a0e71b10e528c383b4069e2b48..2fb61462f7970d823acd6dc3405799e362fa15af 100644 --- a/bfps/cpp/particles/particles_adams_bashforth.hpp +++ b/bfps/cpp/particles/particles_adams_bashforth.hpp @@ -7,7 +7,7 @@ #include "scope_timer.hpp" #include "particles_utils.hpp" -template <class real_number, int size_particle_positions = 3, int size_particle_rhs = 3> +template <class partsize_t, class real_number, int size_particle_positions = 3, int size_particle_rhs = 3> class particles_adams_bashforth { static_assert(size_particle_positions == size_particle_rhs, "Not having the same dimension for positions and rhs looks like a bug," @@ -16,7 +16,7 @@ public: static const int Max_steps = 6; void move_particles(real_number*__restrict__ particles_positions, - const int nb_particles, + const partsize_t nb_particles, const std::unique_ptr<real_number[]> particles_rhs[], const int nb_rhs, const real_number dt) const{ TIMEZONE("particles_adams_bashforth::move_particles"); @@ -30,19 +30,19 @@ public: // Not needed: TIMEZONE_OMP_INIT_PREPARALLEL(omp_get_max_threads()) #pragma omp parallel default(shared) { - particles_utils::IntervalSplitter<int> interval(nb_particles, + particles_utils::IntervalSplitter<partsize_t> interval(nb_particles, omp_get_num_threads(), omp_get_thread_num()); - const int value_start = interval.getMyOffset()*size_particle_positions; - const int value_end = (interval.getMyOffset()+interval.getMySize())*size_particle_positions; + const partsize_t value_start = interval.getMyOffset()*size_particle_positions; + const partsize_t value_end = (interval.getMyOffset()+interval.getMySize())*size_particle_positions; // TODO full unroll + blocking switch (nb_rhs){ case 1: { const real_number* __restrict__ rhs_0 = particles_rhs[0].get(); - for(int idx_value = value_start ; idx_value < value_end ; ++idx_value){ + for(partsize_t idx_value = value_start ; idx_value < value_end ; ++idx_value){ // dt × [0] particles_positions[idx_value] += dt * rhs_0[idx_value]; } @@ -52,7 +52,7 @@ public: { const real_number* __restrict__ rhs_0 = particles_rhs[0].get(); const real_number* __restrict__ rhs_1 = particles_rhs[1].get(); - for(int idx_value = value_start ; idx_value < value_end ; ++idx_value){ + for(partsize_t idx_value = value_start ; idx_value < value_end ; ++idx_value){ // dt × (3[0] - [1])/2 particles_positions[idx_value] += dt * (3.*rhs_0[idx_value] @@ -65,7 +65,7 @@ public: const real_number* __restrict__ rhs_0 = particles_rhs[0].get(); const real_number* __restrict__ rhs_1 = particles_rhs[1].get(); const real_number* __restrict__ rhs_2 = particles_rhs[2].get(); - for(int idx_value = value_start ; idx_value < value_end ; ++idx_value){ + for(partsize_t idx_value = value_start ; idx_value < value_end ; ++idx_value){ // dt × (23[0] - 16[1] + 5[2])/12 particles_positions[idx_value] += dt * (23.*rhs_0[idx_value] @@ -80,7 +80,7 @@ public: const real_number* __restrict__ rhs_1 = particles_rhs[1].get(); const real_number* __restrict__ rhs_2 = particles_rhs[2].get(); const real_number* __restrict__ rhs_3 = particles_rhs[3].get(); - for(int idx_value = value_start ; idx_value < value_end ; ++idx_value){ + for(partsize_t idx_value = value_start ; idx_value < value_end ; ++idx_value){ // dt × (55[0] - 59[1] + 37[2] - 9[3])/24 particles_positions[idx_value] += dt * (55.*rhs_0[idx_value] @@ -97,7 +97,7 @@ public: const real_number* __restrict__ rhs_2 = particles_rhs[2].get(); const real_number* __restrict__ rhs_3 = particles_rhs[3].get(); const real_number* __restrict__ rhs_4 = particles_rhs[4].get(); - for(int idx_value = value_start ; idx_value < value_end ; ++idx_value){ + for(partsize_t idx_value = value_start ; idx_value < value_end ; ++idx_value){ // dt × (1901[0] - 2774[1] + 2616[2] - 1274[3] + 251[4])/720 particles_positions[idx_value] += dt * (1901.*rhs_0[idx_value] @@ -116,7 +116,7 @@ public: const real_number* __restrict__ rhs_3 = particles_rhs[3].get(); const real_number* __restrict__ rhs_4 = particles_rhs[4].get(); const real_number* __restrict__ rhs_5 = particles_rhs[5].get(); - for(int idx_value = value_start ; idx_value < value_end ; ++idx_value){ + for(partsize_t idx_value = value_start ; idx_value < value_end ; ++idx_value){ // dt × (4277[0] - 7923[1] + 9982[2] - 7298[3] + 2877[4] - 475[5])/1440 particles_positions[idx_value] += dt * (4277.*rhs_0[idx_value] diff --git a/bfps/cpp/particles/particles_field_computer.hpp b/bfps/cpp/particles/particles_field_computer.hpp index 8d91ac820ba83a5e76877848621f036712a9cc56..b7e60294d18bfb288564b981bb990bfb5ea7c5f4 100644 --- a/bfps/cpp/particles/particles_field_computer.hpp +++ b/bfps/cpp/particles/particles_field_computer.hpp @@ -8,15 +8,16 @@ #include "scope_timer.hpp" #include "particles_utils.hpp" -template <class real_number, +template <class partsize_t, + class real_number, class interpolator_class, class field_class, int interp_neighbours, class positions_updater_class > -class particles_field_computer : public abstract_particles_distr<real_number, 3,3,1> { - using Parent = abstract_particles_distr<real_number, 3,3,1>; +class particles_field_computer : public abstract_particles_distr<partsize_t, real_number, 3,3,1> { + using Parent = abstract_particles_distr<partsize_t, real_number, 3,3,1>; - const std::array<size_t,3> field_grid_dim; + const std::array<int,3> field_grid_dim; const std::pair<int,int> current_partition_interval; const interpolator_class& interpolator; @@ -35,7 +36,7 @@ class particles_field_computer : public abstract_particles_distr<real_number, 3, //////////////////////////////////////////////////////////////////////// virtual void init_result_array(real_number particles_current_rhs[], - const int nb_particles) const final{ + const partsize_t nb_particles) const final{ // Set values to zero initialy std::fill(particles_current_rhs, particles_current_rhs+nb_particles*3, @@ -54,10 +55,10 @@ class particles_field_computer : public abstract_particles_distr<real_number, 3, virtual void apply_computation(const real_number particles_positions[], real_number particles_current_rhs[], - const int nb_particles) const final{ + const partsize_t nb_particles) const final{ TIMEZONE("particles_field_computer::apply_computation"); //DEBUG_MSG("just entered particles_field_computer::apply_computation\n"); - for(int idxPart = 0 ; idxPart < nb_particles ; ++idxPart){ + for(partsize_t idxPart = 0 ; idxPart < nb_particles ; ++idxPart){ const real_number reltv_x = get_norm_pos_in_cell(particles_positions[idxPart*3+IDX_X], IDX_X); const real_number reltv_y = get_norm_pos_in_cell(particles_positions[idxPart*3+IDX_Y], IDX_Y); const real_number reltv_z = get_norm_pos_in_cell(particles_positions[idxPart*3+IDX_Z], IDX_Z); @@ -146,10 +147,10 @@ class particles_field_computer : public abstract_particles_distr<real_number, 3, virtual void reduce_particles_rhs(real_number particles_current_rhs[], const real_number extra_particles_current_rhs[], - const int nb_particles) const final{ + const partsize_t nb_particles) const final{ TIMEZONE("particles_field_computer::reduce_particles"); // Simply sum values - for(int idxPart = 0 ; idxPart < nb_particles ; ++idxPart){ + for(partsize_t idxPart = 0 ; idxPart < nb_particles ; ++idxPart){ particles_current_rhs[idxPart*3+IDX_X] += extra_particles_current_rhs[idxPart*3+IDX_X]; particles_current_rhs[idxPart*3+IDX_Y] += extra_particles_current_rhs[idxPart*3+IDX_Y]; particles_current_rhs[idxPart*3+IDX_Z] += extra_particles_current_rhs[idxPart*3+IDX_Z]; @@ -164,8 +165,8 @@ public: const field_class& in_field, const std::array<real_number,3>& in_spatial_box_width, const std::array<real_number,3>& in_spatial_box_offset, const std::array<real_number,3>& in_box_step_width) - : abstract_particles_distr<real_number, 3,3,1>(in_current_com, in_current_partitions, in_field_grid_dim), - field_grid_dim(in_field_grid_dim), current_partition_interval(in_current_partitions), + : abstract_particles_distr<partsize_t, real_number, 3,3,1>(in_current_com, in_current_partitions, in_field_grid_dim), + field_grid_dim({{int(in_field_grid_dim[0]),int(in_field_grid_dim[1]),int(in_field_grid_dim[2])}}), current_partition_interval(in_current_partitions), interpolator(in_interpolator), field(in_field), positions_updater(), spatial_box_width(in_spatial_box_width), spatial_box_offset(in_spatial_box_offset), box_step_width(in_box_step_width){ deriv[IDX_X] = 0; @@ -178,7 +179,7 @@ public: //////////////////////////////////////////////////////////////////////// void move_particles(real_number particles_positions[], - const int nb_particles, + const partsize_t nb_particles, const std::unique_ptr<real_number[]> particles_current_rhs[], const int nb_rhs, const real_number dt) const final{ TIMEZONE("particles_field_computer::move_particles"); diff --git a/bfps/cpp/particles/particles_input_hdf5.hpp b/bfps/cpp/particles/particles_input_hdf5.hpp index d08e3a986f80025c54b6fb4374f7d4fc6737e86c..32cfec05ad854cd7f3ffd88d771418d0552237d8 100644 --- a/bfps/cpp/particles/particles_input_hdf5.hpp +++ b/bfps/cpp/particles/particles_input_hdf5.hpp @@ -16,8 +16,8 @@ // why is "size_particle_rhs" a template parameter? // I think it's safe to assume this will always be 3. -template <class real_number, int size_particle_positions, int size_particle_rhs> -class particles_input_hdf5 : public abstract_particles_input<real_number> { +template <class partsize_t, class real_number, int size_particle_positions, int size_particle_rhs> +class particles_input_hdf5 : public abstract_particles_input<partsize_t, real_number> { const std::string filename; MPI_Comm mpi_comm; @@ -26,10 +26,10 @@ class particles_input_hdf5 : public abstract_particles_input<real_number> { hsize_t nb_total_particles; hsize_t nb_rhs; - int nb_particles_for_me; + partsize_t nb_particles_for_me; std::unique_ptr<real_number[]> my_particles_positions; - std::unique_ptr<int[]> my_particles_indexes; + std::unique_ptr<partsize_t[]> my_particles_indexes; std::vector<std::unique_ptr<real_number[]>> my_particles_rhs; static std::vector<real_number> BuildLimitsAllProcesses(MPI_Comm mpi_comm, @@ -70,7 +70,7 @@ public: const std::vector<real_number>& in_spatial_limit_per_proc) : filename(inFilename), mpi_comm(in_mpi_comm), my_rank(-1), nb_processes(-1), nb_total_particles(0), - nb_particles_for_me(-1){ + nb_particles_for_me(0){ TIMEZONE("particles_input_hdf5"); AssertMpi(MPI_Comm_rank(mpi_comm, &my_rank)); @@ -207,32 +207,32 @@ public: assert(rethdf >= 0); } - std::unique_ptr<int[]> split_particles_indexes(new int[load_splitter.getMySize()]); - for(int idx_part = 0 ; idx_part < int(load_splitter.getMySize()) ; ++idx_part){ - split_particles_indexes[idx_part] = idx_part + load_splitter.getMyOffset(); + std::unique_ptr<partsize_t[]> split_particles_indexes(new partsize_t[load_splitter.getMySize()]); + for(partsize_t idx_part = 0 ; idx_part < partsize_t(load_splitter.getMySize()) ; ++idx_part){ + split_particles_indexes[idx_part] = idx_part + partsize_t(load_splitter.getMyOffset()); } // Permute - std::vector<int> nb_particles_per_proc(nb_processes); + std::vector<partsize_t> nb_particles_per_proc(nb_processes); { TIMEZONE("partition"); const real_number spatial_box_offset = in_spatial_limit_per_proc[0]; const real_number spatial_box_width = in_spatial_limit_per_proc[nb_processes] - in_spatial_limit_per_proc[0]; - int previousOffset = 0; + partsize_t previousOffset = 0; for(int idx_proc = 0 ; idx_proc < nb_processes-1 ; ++idx_proc){ const real_number limitPartitionShifted = in_spatial_limit_per_proc[idx_proc+1]-spatial_box_offset; - const int localOffset = particles_utils::partition_extra<size_particle_positions>( + const partsize_t localOffset = particles_utils::partition_extra<partsize_t, size_particle_positions>( &split_particles_positions[previousOffset*size_particle_positions], - load_splitter.getMySize()-previousOffset, + partsize_t(load_splitter.getMySize())-previousOffset, [&](const real_number val[]){ const real_number shiftPos = val[IDX_Z]-spatial_box_offset; const real_number nbRepeat = floor(shiftPos/spatial_box_width); const real_number posInBox = shiftPos - (spatial_box_width*nbRepeat); return posInBox < limitPartitionShifted; }, - [&](const int idx1, const int idx2){ + [&](const partsize_t idx1, const partsize_t idx2){ std::swap(split_particles_indexes[idx1], split_particles_indexes[idx2]); for(int idx_rhs = 0 ; idx_rhs < int(nb_rhs) ; ++idx_rhs){ for(int idx_val = 0 ; idx_val < size_particle_rhs ; ++idx_val){ @@ -245,7 +245,7 @@ public: nb_particles_per_proc[idx_proc] = localOffset; previousOffset += localOffset; } - nb_particles_per_proc[nb_processes-1] = load_splitter.getMySize() - previousOffset; + nb_particles_per_proc[nb_processes-1] = partsize_t(load_splitter.getMySize()) - previousOffset; } { @@ -258,8 +258,8 @@ public: exchanger.alltoallv<real_number>(split_particles_positions.get(), my_particles_positions.get(), size_particle_positions); split_particles_positions.release(); - my_particles_indexes.reset(new int[exchanger.getTotalToRecv()]); - exchanger.alltoallv<int>(split_particles_indexes.get(), my_particles_indexes.get()); + my_particles_indexes.reset(new partsize_t[exchanger.getTotalToRecv()]); + exchanger.alltoallv<partsize_t>(split_particles_indexes.get(), my_particles_indexes.get()); split_particles_indexes.release(); my_particles_rhs.resize(nb_rhs); @@ -281,12 +281,12 @@ public: ~particles_input_hdf5(){ } - int getTotalNbParticles() final{ - return nb_total_particles; + partsize_t getTotalNbParticles() final{ + return partsize_t(nb_total_particles); } - int getLocalNbParticles() final{ - return int(nb_particles_for_me); + partsize_t getLocalNbParticles() final{ + return nb_particles_for_me; } int getNbRhs() final{ @@ -303,7 +303,7 @@ public: return std::move(my_particles_rhs); } - std::unique_ptr<int[]> getMyParticlesIndexes() final { + std::unique_ptr<partsize_t[]> getMyParticlesIndexes() final { assert(my_particles_indexes != nullptr); return std::move(my_particles_indexes); } diff --git a/bfps/cpp/particles/particles_output_hdf5.hpp b/bfps/cpp/particles/particles_output_hdf5.hpp index 567f466835dc0b554bb04af0ae1a73cf4d295c21..bc0a03690293668203dd78978680fdea03ab3a28 100644 --- a/bfps/cpp/particles/particles_output_hdf5.hpp +++ b/bfps/cpp/particles/particles_output_hdf5.hpp @@ -8,20 +8,23 @@ #include "abstract_particles_output.hpp" #include "scope_timer.hpp" -template <class real_number, +template <class partsize_t, + class real_number, int size_particle_positions, int size_particle_rhs> -class particles_output_hdf5 : public abstract_particles_output<real_number, +class particles_output_hdf5 : public abstract_particles_output<partsize_t, + real_number, size_particle_positions, size_particle_rhs>{ - using Parent = abstract_particles_output<real_number, + using Parent = abstract_particles_output<partsize_t, + real_number, size_particle_positions, size_particle_rhs>; const std::string particle_species_name; hid_t file_id; - const int total_nb_particles; + const partsize_t total_nb_particles; hid_t dset_id_state; hid_t dset_id_rhs; @@ -31,10 +34,11 @@ class particles_output_hdf5 : public abstract_particles_output<real_number, public: particles_output_hdf5(MPI_Comm in_mpi_com, const std::string ps_name, - const int inTotalNbParticles, + const partsize_t inTotalNbParticles, const int in_nb_rhs, const bool in_use_collective_io = false) - : abstract_particles_output<real_number, + : abstract_particles_output<partsize_t, + real_number, size_particle_positions, size_particle_rhs>( in_mpi_com, @@ -172,8 +176,8 @@ public: const int idx_time_step, const real_number* particles_positions, const std::unique_ptr<real_number[]>* particles_rhs, - const int nb_particles, - const int particles_idx_offset) final{ + const partsize_t nb_particles, + const partsize_t particles_idx_offset) final{ assert(Parent::isInvolved()); TIMEZONE("particles_output_hdf5::write"); diff --git a/bfps/cpp/particles/particles_output_mpiio.hpp b/bfps/cpp/particles/particles_output_mpiio.hpp index bd0eac41eef744c4f914391c1bcc4773090d1969..77dae6ca2f9441948ccf04f8a72e4a53d249894b 100644 --- a/bfps/cpp/particles/particles_output_mpiio.hpp +++ b/bfps/cpp/particles/particles_output_mpiio.hpp @@ -10,9 +10,9 @@ #include "scope_timer.hpp" #include "particles_utils.hpp" -template <class real_number, int size_particle_positions, int size_particle_rhs> -class particles_output_mpiio : public abstract_particles_output<real_number, size_particle_positions, size_particle_rhs>{ - using Parent = abstract_particles_output<real_number, size_particle_positions, size_particle_rhs>; +template <class partsize_t, class real_number, int size_particle_positions, int size_particle_rhs> +class particles_output_mpiio : public abstract_particles_output<partsize_t, real_number, size_particle_positions, size_particle_rhs>{ + using Parent = abstract_particles_output<partsize_t, real_number, size_particle_positions, size_particle_rhs>; const std::string filename; const int nb_step_prealloc; @@ -22,9 +22,9 @@ class particles_output_mpiio : public abstract_particles_output<real_number, siz MPI_File mpi_file; public: - particles_output_mpiio(MPI_Comm in_mpi_com, const std::string in_filename, const int inTotalNbParticles, + particles_output_mpiio(MPI_Comm in_mpi_com, const std::string in_filename, const partsize_t inTotalNbParticles, const int in_nb_rhs, const int in_nb_step_prealloc = -1) - : abstract_particles_output<real_number, size_particle_positions, size_particle_rhs>(in_mpi_com, inTotalNbParticles, in_nb_rhs), + : abstract_particles_output<partsize_t, real_number, size_particle_positions, size_particle_rhs>(in_mpi_com, inTotalNbParticles, in_nb_rhs), filename(in_filename), nb_step_prealloc(in_nb_step_prealloc), current_step_in_file(0){ if(Parent::isInvolved()){ { @@ -48,7 +48,7 @@ public: } void write(const int /*time_step*/, const real_number* particles_positions, const std::unique_ptr<real_number[]>* particles_rhs, - const int nb_particles, const int particles_idx_offset) final{ + const partsize_t nb_particles, const partsize_t particles_idx_offset) final{ assert(Parent::isInvolved()); TIMEZONE("particles_output_mpiio::write"); diff --git a/bfps/cpp/particles/particles_system.hpp b/bfps/cpp/particles/particles_system.hpp index 05466fc9313ad5f86c8c8279a407f467ec61d8b1..1b37dbce317b3d00e9ba1795727eee23396712f7 100644 --- a/bfps/cpp/particles/particles_system.hpp +++ b/bfps/cpp/particles/particles_system.hpp @@ -12,8 +12,8 @@ #include "particles_adams_bashforth.hpp" #include "scope_timer.hpp" -template <class real_number, class field_rnumber, class interpolator_class, int interp_neighbours> -class particles_system : public abstract_particles_system<real_number> { +template <class partsize_t, class real_number, class field_rnumber, class interpolator_class, int interp_neighbours> +class particles_system : public abstract_particles_system<partsize_t, real_number> { MPI_Comm mpi_com; const std::pair<int,int> current_partition_interval; @@ -23,10 +23,10 @@ class particles_system : public abstract_particles_system<real_number> { interpolator_class interpolator; - particles_field_computer<real_number, interpolator_class, field_accessor<field_rnumber>, interp_neighbours, particles_adams_bashforth<real_number, 3,3>> computer; + particles_field_computer<partsize_t, real_number, interpolator_class, field_accessor<field_rnumber>, interp_neighbours, particles_adams_bashforth<partsize_t, real_number, 3,3>> computer; - std::unique_ptr<int[]> current_my_nb_particles_per_partition; - std::unique_ptr<int[]> current_offset_particles_for_partition; + std::unique_ptr<partsize_t[]> current_my_nb_particles_per_partition; + std::unique_ptr<partsize_t[]> current_offset_particles_for_partition; const std::array<real_number,3> spatial_box_width; const std::array<real_number,3> spatial_partition_width; @@ -34,8 +34,8 @@ class particles_system : public abstract_particles_system<real_number> { const real_number my_spatial_up_limit; std::unique_ptr<real_number[]> my_particles_positions; - std::unique_ptr<int[]> my_particles_positions_indexes; - int my_nb_particles; + std::unique_ptr<partsize_t[]> my_particles_positions_indexes; + partsize_t my_nb_particles; std::vector<std::unique_ptr<real_number[]>> my_particles_rhs; int step_idx; @@ -61,14 +61,14 @@ public: my_spatial_low_limit(in_my_spatial_low_limit), my_spatial_up_limit(in_my_spatial_up_limit), my_nb_particles(0), step_idx(in_current_iteration){ - current_my_nb_particles_per_partition.reset(new int[partition_interval_size]); - current_offset_particles_for_partition.reset(new int[partition_interval_size+1]); + current_my_nb_particles_per_partition.reset(new partsize_t[partition_interval_size]); + current_offset_particles_for_partition.reset(new partsize_t[partition_interval_size+1]); } ~particles_system(){ } - void init(abstract_particles_input<real_number>& particles_input){ + void init(abstract_particles_input<partsize_t, real_number>& particles_input){ TIMEZONE("particles_system::init"); my_particles_positions = particles_input.getMyParticles(); @@ -76,20 +76,20 @@ public: my_particles_rhs = particles_input.getMyRhs(); my_nb_particles = particles_input.getLocalNbParticles(); - for(int idx_part = 0 ; idx_part < my_nb_particles ; ++idx_part){ // TODO remove me + for(partsize_t idx_part = 0 ; idx_part < my_nb_particles ; ++idx_part){ // TODO remove me const int partition_level = computer.pbc_field_layer(my_particles_positions[idx_part*3+IDX_Z], IDX_Z); assert(partition_level >= current_partition_interval.first); assert(partition_level < current_partition_interval.second); } - particles_utils::partition_extra_z<3>(&my_particles_positions[0], my_nb_particles, partition_interval_size, + particles_utils::partition_extra_z<partsize_t, 3>(&my_particles_positions[0], my_nb_particles, partition_interval_size, current_my_nb_particles_per_partition.get(), current_offset_particles_for_partition.get(), [&](const real_number& z_pos){ const int partition_level = computer.pbc_field_layer(z_pos, IDX_Z); assert(current_partition_interval.first <= partition_level && partition_level < current_partition_interval.second); return partition_level - current_partition_interval.first; }, - [&](const int idx1, const int idx2){ + [&](const partsize_t idx1, const partsize_t idx2){ std::swap(my_particles_positions_indexes[idx1], my_particles_positions_indexes[idx2]); for(int idx_rhs = 0 ; idx_rhs < int(my_particles_rhs.size()) ; ++idx_rhs){ for(int idx_val = 0 ; idx_val < 3 ; ++idx_val){ @@ -103,7 +103,7 @@ public: for(int idxPartition = 0 ; idxPartition < partition_interval_size ; ++idxPartition){ assert(current_my_nb_particles_per_partition[idxPartition] == current_offset_particles_for_partition[idxPartition+1] - current_offset_particles_for_partition[idxPartition]); - for(int idx = current_offset_particles_for_partition[idxPartition] ; idx < current_offset_particles_for_partition[idxPartition+1] ; ++idx){ + for(partsize_t idx = current_offset_particles_for_partition[idxPartition] ; idx < current_offset_particles_for_partition[idxPartition+1] ; ++idx){ assert(computer.pbc_field_layer(my_particles_positions[idx*3+IDX_Z], IDX_Z)-current_partition_interval.first == idxPartition); } } @@ -131,7 +131,7 @@ public: computer.redistribute(current_my_nb_particles_per_partition.get(), &my_nb_particles, &my_particles_positions, - my_particles_rhs.data(), my_particles_rhs.size(), + my_particles_rhs.data(), int(my_particles_rhs.size()), &my_particles_positions_indexes); } @@ -142,7 +142,7 @@ public: void shift_rhs_vectors() final { if(my_particles_rhs.size()){ std::unique_ptr<real_number[]> next_current(std::move(my_particles_rhs.back())); - for(int idx_rhs = my_particles_rhs.size()-1 ; idx_rhs > 0 ; --idx_rhs){ + for(int idx_rhs = int(my_particles_rhs.size())-1 ; idx_rhs > 0 ; --idx_rhs){ my_particles_rhs[idx_rhs] = std::move(my_particles_rhs[idx_rhs-1]); } my_particles_rhs[0] = std::move(next_current); @@ -167,11 +167,11 @@ public: return my_particles_rhs.data(); } - const int* getParticlesIndexes() const final { + const partsize_t* getParticlesIndexes() const final { return my_particles_positions_indexes.get(); } - int getLocalNbParticles() const final { + partsize_t getLocalNbParticles() const final { return my_nb_particles; } @@ -180,7 +180,7 @@ public: } void checkNan() const { // TODO remove - for(int idx_part = 0 ; idx_part < my_nb_particles ; ++idx_part){ // TODO remove me + for(partsize_t idx_part = 0 ; idx_part < my_nb_particles ; ++idx_part){ // TODO remove me assert(std::isnan(my_particles_positions[idx_part*3+IDX_X]) == false); assert(std::isnan(my_particles_positions[idx_part*3+IDX_Y]) == false); assert(std::isnan(my_particles_positions[idx_part*3+IDX_Z]) == false); diff --git a/bfps/cpp/particles/particles_system_builder.hpp b/bfps/cpp/particles/particles_system_builder.hpp index ab895d4b2d53ab562dba28e17e404297ab225700..86adb3afbc945c170419aa9efefe78d2e6a2afdd 100644 --- a/bfps/cpp/particles/particles_system_builder.hpp +++ b/bfps/cpp/particles/particles_system_builder.hpp @@ -108,14 +108,14 @@ inline RetType evaluate(IterType1 value1, IterType2 value2, Args... args){ /// ////////////////////////////////////////////////////////////////////////////// -template <class field_rnumber, field_backend be, class particles_rnumber> +template <class partsize_t, class field_rnumber, field_backend be, class particles_rnumber> struct particles_system_build_container { template <const int interpolation_size, const int spline_mode> - static std::unique_ptr<abstract_particles_system<particles_rnumber>> instanciate( + static std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>> instanciate( const field<field_rnumber, be, THREE>* fs_field, // (field object) const kspace<be, SMOOTH>* fs_kk, // (kspace object, contains dkx, dky, dkz) const int nsteps, // to check coherency between parameters and hdf input file (nb rhs) - const int nparticles, // to check coherency between parameters and hdf input file + const partsize_t nparticles, // to check coherency between parameters and hdf input file const std::string& fname_input, // particles input filename const std::string& inDatanameState, const std::string& inDatanameRhs, // input dataset names MPI_Comm mpi_comm, @@ -198,8 +198,8 @@ struct particles_system_build_container { const particles_rnumber my_spatial_up_limit_z = particles_rnumber(local_field_offset[IDX_Z]+local_field_dims[IDX_Z])*spatial_partition_width[IDX_Z]; // Create the particles system - particles_system<particles_rnumber, field_rnumber, particles_interp_spline<particles_rnumber, interpolation_size,spline_mode>, interpolation_size>* part_sys - = new particles_system<particles_rnumber, field_rnumber, particles_interp_spline<particles_rnumber, interpolation_size,spline_mode>, interpolation_size>(field_grid_dim, + particles_system<partsize_t, particles_rnumber, field_rnumber, particles_interp_spline<particles_rnumber, interpolation_size,spline_mode>, interpolation_size>* part_sys + = new particles_system<partsize_t, particles_rnumber, field_rnumber, particles_interp_spline<particles_rnumber, interpolation_size,spline_mode>, interpolation_size>(field_grid_dim, spatial_box_width, spatial_box_offset, spatial_partition_width, @@ -213,7 +213,7 @@ struct particles_system_build_container { in_current_iteration); // Load particles from hdf5 - particles_input_hdf5<particles_rnumber, 3,3> generator(mpi_comm, fname_input, + particles_input_hdf5<partsize_t, particles_rnumber, 3,3> generator(mpi_comm, fname_input, inDatanameState, inDatanameRhs, my_spatial_low_limit_z, my_spatial_up_limit_z); // Ensure parameters match the input file @@ -233,27 +233,27 @@ struct particles_system_build_container { assert(part_sys->getNbRhs() == nsteps); // Return the created particles system - return std::unique_ptr<abstract_particles_system<particles_rnumber>>(part_sys); + return std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>(part_sys); } }; -template <class field_rnumber, field_backend be, class particles_rnumber = double> -inline std::unique_ptr<abstract_particles_system<particles_rnumber>> particles_system_builder( +template <class partsize_t, class field_rnumber, field_backend be, class particles_rnumber = double> +inline std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>> particles_system_builder( const field<field_rnumber, be, THREE>* fs_field, // (field object) const kspace<be, SMOOTH>* fs_kk, // (kspace object, contains dkx, dky, dkz) const int nsteps, // to check coherency between parameters and hdf input file (nb rhs) - const int nparticles, // to check coherency between parameters and hdf input file + const partsize_t nparticles, // to check coherency between parameters and hdf input file const std::string& fname_input, // particles input filename const std::string& inDatanameState, const std::string& inDatanameRhs, // input dataset names const int interpolation_size, const int spline_mode, MPI_Comm mpi_comm, const int in_current_iteration){ - return Template_double_for_if::evaluate<std::unique_ptr<abstract_particles_system<particles_rnumber>>, + return Template_double_for_if::evaluate<std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>, int, 1, 7, 1, // interpolation_size int, 0, 3, 1, // spline_mode - particles_system_build_container<field_rnumber,be,particles_rnumber>>( + particles_system_build_container<partsize_t, field_rnumber,be,particles_rnumber>>( interpolation_size, // template iterator 1 spline_mode, // template iterator 2 fs_field,fs_kk, nsteps, nparticles, fname_input, inDatanameState, inDatanameRhs, mpi_comm, in_current_iteration); diff --git a/bfps/cpp/particles/particles_utils.hpp b/bfps/cpp/particles/particles_utils.hpp index ffef9f68f1d5ff8e1e06d768055e994c1daa99b8..146dc4399477b72c30329edff587d35d7b44d69d 100644 --- a/bfps/cpp/particles/particles_utils.hpp +++ b/bfps/cpp/particles/particles_utils.hpp @@ -36,29 +36,36 @@ namespace particles_utils { class GetMpiType{ const MPI_Datatype type; public: + explicit GetMpiType(const long long int&) : type(MPI_LONG_LONG_INT){} + explicit GetMpiType(const unsigned char&) : type(MPI_UNSIGNED_CHAR){} + explicit GetMpiType(const unsigned short&) : type(MPI_UNSIGNED_SHORT){} + explicit GetMpiType(const unsigned int&) : type(MPI_UNSIGNED){} + explicit GetMpiType(const unsigned long&) : type(MPI_UNSIGNED_LONG){} + explicit GetMpiType(const char&) : type(MPI_CHAR){} + explicit GetMpiType(const short&) : type(MPI_SHORT){} explicit GetMpiType(const int&) : type(MPI_INT){} + explicit GetMpiType(const long&) : type(MPI_LONG){} + explicit GetMpiType(const long double&) : type(MPI_LONG_DOUBLE){} explicit GetMpiType(const double&) : type(MPI_DOUBLE){} explicit GetMpiType(const float&) : type(MPI_FLOAT){} - explicit GetMpiType(const char&) : type(MPI_CHAR){} - explicit GetMpiType(const long&) : type(MPI_LONG){} /*do not make it explicit*/ operator MPI_Datatype() const { return type; } }; -template <int nb_values, class real_number, class Predicate> -inline int partition(real_number* array, const int size, Predicate pdc) +template <class partsize_t, int nb_values, class real_number, class Predicate> +inline partsize_t partition(real_number* array, const partsize_t size, Predicate pdc) { if(size == 0) return 0; if(size == 1) return (pdc(&array[0])?1:0); - int idxInsert = 0; + partsize_t idxInsert = 0; - for(int idx = 0 ; idx < size && pdc(&array[idx*nb_values]); ++idx){ + for(partsize_t idx = 0 ; idx < size && pdc(&array[idx*nb_values]); ++idx){ idxInsert += 1; } - for(int idx = idxInsert ; idx < size ; ++idx){ + for(partsize_t idx = idxInsert ; idx < size ; ++idx){ if(pdc(&array[idx*nb_values])){ for(int idxVal = 0 ; idxVal < nb_values ; ++idxVal){ std::swap(array[idx*nb_values + idxVal], array[idxInsert*nb_values + idxVal]); @@ -71,19 +78,19 @@ inline int partition(real_number* array, const int size, Predicate pdc) } -template <int nb_values, class real_number, class Predicate1, class Predicate2> -inline int partition_extra(real_number* array, const int size, Predicate1 pdc, Predicate2 pdcswap, const int offset_idx_swap = 0) +template <class partsize_t, int nb_values, class real_number, class Predicate1, class Predicate2> +inline partsize_t partition_extra(real_number* array, const partsize_t size, Predicate1 pdc, Predicate2 pdcswap, const partsize_t offset_idx_swap = 0) { if(size == 0) return 0; if(size == 1) return (pdc(&array[0])?1:0); - int idxInsert = 0; + partsize_t idxInsert = 0; - for(int idx = 0 ; idx < size && pdc(&array[idx*nb_values]); ++idx){ + for(partsize_t idx = 0 ; idx < size && pdc(&array[idx*nb_values]); ++idx){ idxInsert += 1; } - for(int idx = idxInsert ; idx < size ; ++idx){ + for(partsize_t idx = idxInsert ; idx < size ; ++idx){ if(pdc(&array[idx*nb_values])){ for(int idxVal = 0 ; idxVal < nb_values ; ++idxVal){ std::swap(array[idx*nb_values + idxVal], array[idxInsert*nb_values + idxVal]); @@ -96,9 +103,9 @@ inline int partition_extra(real_number* array, const int size, Predicate1 pdc, P return idxInsert; } -template <int nb_values, class real_number, class Predicate1, class Predicate2> -inline void partition_extra_z(real_number* array, const int size, const int nb_partitions, - int partitions_size[], int partitions_offset[], +template <class partsize_t, int nb_values, class real_number, class Predicate1, class Predicate2> +inline void partition_extra_z(real_number* array, const partsize_t size, const int nb_partitions, + partsize_t partitions_size[], partsize_t partitions_offset[], Predicate1 partitions_levels, Predicate2 pdcswap) { if(nb_partitions == 0){ @@ -114,7 +121,7 @@ inline void partition_extra_z(real_number* array, const int size, const int nb_p } if(nb_partitions == 2){ - const int size_current = partition_extra<nb_values>(array, size, + const partsize_t size_current = partition_extra<partsize_t, nb_values>(array, size, [&](const real_number inval[]){ return partitions_levels(inval[IDX_Z]) == 0; }, pdcswap); @@ -140,9 +147,9 @@ inline void partition_extra_z(real_number* array, const int size, const int nb_p else{ const int idx_middle = (current_part.second-current_part.first)/2 + current_part.first - 1; - const int size_unpart = partitions_offset[current_part.second]- partitions_offset[current_part.first]; + const partsize_t size_unpart = partitions_offset[current_part.second]- partitions_offset[current_part.first]; - const int size_current = partition_extra<nb_values>(&array[partitions_offset[current_part.first]*nb_values], + const partsize_t size_current = partition_extra<partsize_t, nb_values>(&array[partitions_offset[current_part.first]*nb_values], size_unpart, [&](const real_number inval[]){ return partitions_levels(inval[IDX_Z]) <= idx_middle; @@ -157,13 +164,13 @@ inline void partition_extra_z(real_number* array, const int size, const int nb_p } } -template <int nb_values, class real_number, class Predicate1, class Predicate2> -inline std::pair<std::vector<int>,std::vector<int>> partition_extra_z(real_number* array, const int size, +template <class partsize_t, int nb_values, class real_number, class Predicate1, class Predicate2> +inline std::pair<std::vector<partsize_t>,std::vector<partsize_t>> partition_extra_z(real_number* array, const partsize_t size, const int nb_partitions, Predicate1 partitions_levels, Predicate2 pdcswap){ - std::vector<int> partitions_size(nb_partitions); - std::vector<int> partitions_offset(nb_partitions+1); + std::vector<partsize_t> partitions_size(nb_partitions); + std::vector<partsize_t> partitions_offset(nb_partitions+1); partition_extra_z<nb_values, real_number, Predicate1, Predicate2>(array, size, nb_partitions, partitions_size.data(), partitions_offset.data(), partitions_levels, pdcswap); diff --git a/bfps/cpp/scope_timer.hpp b/bfps/cpp/scope_timer.hpp index e513e8e6e47a14d69fc0c695894fc2114a9b6058..2c48e2eda06ded74e668825181f0444eef22f647 100644 --- a/bfps/cpp/scope_timer.hpp +++ b/bfps/cpp/scope_timer.hpp @@ -374,10 +374,11 @@ public: if(myrank != 0){ const std::string strOutput = myResults.str(); - int sizeOutput = strOutput.length(); + assert(strOutput.length() <= std::numeric_limits<int>::max()); + int sizeOutput = int(strOutput.length()); retMpi = MPI_Send(&sizeOutput, 1, MPI_INT, 0, 99, inComm); assert(retMpi == MPI_SUCCESS); - retMpi = MPI_Send((void*)strOutput.data(), sizeOutput, MPI_CHAR, 0, 100, inComm); + retMpi = MPI_Send(const_cast<char*>(strOutput.data()), sizeOutput, MPI_CHAR, 0, 100, inComm); assert(retMpi == MPI_SUCCESS); } else{ @@ -445,7 +446,7 @@ public: if(myRank == 0){ nbEventsPerProc.reset(new int[nbProcess]); } - const int myNbEvents = myEvents.size(); + const int myNbEvents = int(myEvents.size()); retMpi = MPI_Gather(const_cast<int*>(&myNbEvents), 1, MPI_INT, nbEventsPerProc.get(), 1, MPI_INT, 0, inComm); @@ -463,13 +464,13 @@ public: diplsByte[0] = 0; for(int idx = 1 ; idx <= nbProcess ; ++idx){ dipls[idx] = dipls[idx-1] + nbEventsPerProc[idx-1]; - diplsByte[idx] = dipls[idx] * sizeof(SerializedEvent); - nbEventsPerProcByte[idx-1] = nbEventsPerProc[idx-1] * sizeof(SerializedEvent); + diplsByte[idx] = dipls[idx] * int(sizeof(SerializedEvent)); + nbEventsPerProcByte[idx-1] = nbEventsPerProc[idx-1] * int(sizeof(SerializedEvent)); } allEvents.reset(new SerializedEvent[dipls[nbProcess]]); } - retMpi = MPI_Gatherv(myEvents.data(), myNbEvents * sizeof(SerializedEvent), MPI_BYTE, + retMpi = MPI_Gatherv(myEvents.data(), myNbEvents * int(sizeof(SerializedEvent)), MPI_BYTE, allEvents.get(), nbEventsPerProcByte.get(), diplsByte.get(), MPI_BYTE, 0, inComm); assert(retMpi == MPI_SUCCESS); @@ -497,7 +498,7 @@ public: newEvent.totalTime = allEvents[idxEvent].totalTime; newEvent.minTime = allEvents[idxEvent].minTime; newEvent.maxTime = allEvents[idxEvent].maxTime; - newEvent.occurrence = allEvents[idxEvent].totalTime; + newEvent.occurrence = allEvents[idxEvent].occurrence; newEvent.nbProcess = 1; newEvent.minTimeProcess = allEvents[idxEvent].totalTime; newEvent.maxTimeProcess = allEvents[idxEvent].totalTime; @@ -638,10 +639,11 @@ public: if(myRank != 0){ const std::string strOutput = myResults.str(); - int sizeOutput = strOutput.length(); + assert(strOutput.length() <= std::numeric_limits<int>::max()); + int sizeOutput = int(strOutput.length()); retMpi = MPI_Send(&sizeOutput, 1, MPI_INT, 0, 99, inComm); assert(retMpi == MPI_SUCCESS); - retMpi = MPI_Send((void*)strOutput.data(), sizeOutput, MPI_CHAR, 0, 100, inComm); + retMpi = MPI_Send(const_cast<char*>(strOutput.data()), sizeOutput, MPI_CHAR, 0, 100, inComm); assert(retMpi == MPI_SUCCESS); } else{