diff --git a/bfps/cpp/distributed_particles.cpp b/bfps/cpp/distributed_particles.cpp index b0a445af03ab51d79dd1aded6ba230fc4b80adbf..3773b5f4e0c84811f456a7cef0cac7b750847381 100644 --- a/bfps/cpp/distributed_particles.cpp +++ b/bfps/cpp/distributed_particles.cpp @@ -39,7 +39,7 @@ extern int myrank, nprocs; -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> distributed_particles<particle_type, rnumber, interp_neighbours>::distributed_particles( const char *NAME, const hid_t data_file_id, @@ -56,21 +56,25 @@ distributed_particles<particle_type, rnumber, interp_neighbours>::distributed_pa this->vel = FIELD; this->rhs.resize(INTEGRATION_STEPS); this->integration_steps = INTEGRATION_STEPS; + this->state.reserve(2*this->nparticles / this->nprocs); + for (unsigned int i=0; i<this->rhs.size(); i++) + this->rhs[i].reserve(2*this->nparticles / this->nprocs); } -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> distributed_particles<particle_type, rnumber, interp_neighbours>::~distributed_particles() { } -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> void distributed_particles<particle_type, rnumber, interp_neighbours>::sample( interpolator<rnumber, interp_neighbours> *field, + const std::unordered_map<int, single_particle_state<particle_type>> &x, std::unordered_map<int, single_particle_state<POINT3D>> &y) { double *yy = new double[3]; y.clear(); - for (auto &pp: this->state) + for (auto &pp: x) { (*field)(pp.second.data, yy); y[pp.first] = yy; @@ -78,39 +82,41 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::sample( delete[] yy; } -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> void distributed_particles<particle_type, rnumber, interp_neighbours>::get_rhs( const std::unordered_map<int, single_particle_state<particle_type>> &x, std::unordered_map<int, single_particle_state<particle_type>> &y) { - double *yy = new double[this->ncomponents]; - y.clear(); - for (auto &pp: x) + std::unordered_map<int, single_particle_state<POINT3D>> yy; + switch(particle_type) { - (*this->vel)(pp.second.data, yy); - y[pp.first] = yy; + case VELOCITY_TRACER: + this->sample(this->vel, this->state, yy); + y.clear(); + for (auto &pp: x) + y[pp.first] = yy[pp.first].data; + break; } - delete[] yy; } -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> void distributed_particles<particle_type, rnumber, interp_neighbours>::sample( interpolator<rnumber, interp_neighbours> *field, const char *dset_name) { std::unordered_map<int, single_particle_state<POINT3D>> y; - this->sample(field, y); + this->sample(field, this->state, y); this->write(dset_name, y); } -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> void distributed_particles<particle_type, rnumber, interp_neighbours>::roll_rhs() { for (int i=this->integration_steps-2; i>=0; i--) rhs[i+1] = rhs[i]; } -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> void distributed_particles<particle_type, rnumber, interp_neighbours>::redistribute( std::unordered_map<int, single_particle_state<particle_type>> &x, std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals) @@ -168,7 +174,7 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::redistrib buffer_size = (buffer_size > npr[0])? buffer_size : npr[0]; buffer_size = (buffer_size > npr[1])? buffer_size : npr[1]; //DEBUG_MSG("buffer size is %d\n", buffer_size); - double *buffer = new double[buffer_size*this->ncomponents*(1+vals.size())]; + double *buffer = new double[buffer_size*state_dimension(particle_type)*(1+vals.size())]; for (rsrc = 0; rsrc<this->nprocs; rsrc++) for (int i=0; i<2; i++) { @@ -186,21 +192,21 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::redistrib for (int p: ps[i]) { std::copy(x[p].data, - x[p].data + this->ncomponents, - buffer + pcounter*(1+vals.size())*this->ncomponents); + x[p].data + state_dimension(particle_type), + buffer + pcounter*(1+vals.size())*state_dimension(particle_type)); x.erase(p); for (int tindex=0; tindex<vals.size(); tindex++) { std::copy(vals[tindex][p].data, - vals[tindex][p].data + this->ncomponents, - buffer + (pcounter*(1+vals.size()) + tindex+1)*this->ncomponents); + vals[tindex][p].data + state_dimension(particle_type), + buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type)); vals[tindex].erase(p); } pcounter++; } MPI_Send( buffer, - nps[i]*(1+vals.size())*this->ncomponents, + nps[i]*(1+vals.size())*state_dimension(particle_type), MPI_DOUBLE, rdst, 2*(rsrc*this->nprocs + rdst)+1, @@ -218,7 +224,7 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::redistrib MPI_STATUS_IGNORE); MPI_Recv( buffer, - npr[1-i]*(1+vals.size())*this->ncomponents, + npr[1-i]*(1+vals.size())*state_dimension(particle_type), MPI_DOUBLE, rsrc, 2*(rsrc*this->nprocs + rdst)+1, @@ -227,10 +233,10 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::redistrib int pcounter = 0; for (int p: pr[1-i]) { - x[p] = buffer + (pcounter*(1+vals.size()))*this->ncomponents; + x[p] = buffer + (pcounter*(1+vals.size()))*state_dimension(particle_type); for (int tindex=0; tindex<vals.size(); tindex++) { - vals[tindex][p] = buffer + (pcounter*(1+vals.size()) + tindex+1)*this->ncomponents; + vals[tindex][p] = buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type); } pcounter++; } @@ -255,13 +261,13 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::redistrib -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> void distributed_particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth( const int nsteps) { this->get_rhs(this->state, this->rhs[0]); for (auto &pp: this->state) - for (int i=0; i<this->ncomponents; i++) + for (int i=0; i<state_dimension(particle_type); i++) switch(nsteps) { case 1: @@ -303,7 +309,7 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::AdamsBash } -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> void distributed_particles<particle_type, rnumber, interp_neighbours>::step() { this->AdamsBashforth((this->iteration < this->integration_steps) ? @@ -313,10 +319,10 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::step() } -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> void distributed_particles<particle_type, rnumber, interp_neighbours>::read() { - double *temp = new double[this->chunk_size*this->ncomponents]; + double *temp = new double[this->chunk_size*state_dimension(particle_type)]; for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++) { //read state @@ -324,14 +330,14 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::read() this->read_state_chunk(cindex, temp); MPI_Bcast( temp, - this->chunk_size*this->ncomponents, + this->chunk_size*state_dimension(particle_type), MPI_DOUBLE, 0, this->comm); for (int p=0; p<this->chunk_size; p++) { - if (this->vel->get_rank(temp[this->ncomponents*p+2]) == this->myrank) - this->state[p+cindex*this->chunk_size] = temp + this->ncomponents*p; + if (this->vel->get_rank(temp[state_dimension(particle_type)*p+2]) == this->myrank) + this->state[p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p; } //read rhs if (this->iteration > 0) @@ -341,7 +347,7 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::read() this->read_rhs_chunk(cindex, i, temp); MPI_Bcast( temp, - this->chunk_size*this->ncomponents, + this->chunk_size*state_dimension(particle_type), MPI_DOUBLE, 0, this->comm); @@ -349,7 +355,7 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::read() { auto pp = this->state.find(p+cindex*this->chunk_size); if (pp != this->state.end()) - this->rhs[i][p+cindex*this->chunk_size] = temp + this->ncomponents*p; + this->rhs[i][p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p; } } } @@ -357,7 +363,7 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::read() delete[] temp; } -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> void distributed_particles<particle_type, rnumber, interp_neighbours>::write( const char *dset_name, std::unordered_map<int, single_particle_state<POINT3D>> &y) @@ -389,28 +395,28 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::write( delete[] data; } -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> void distributed_particles<particle_type, rnumber, interp_neighbours>::write( const bool write_rhs) { - double *temp0 = new double[this->chunk_size*this->ncomponents]; - double *temp1 = new double[this->chunk_size*this->ncomponents]; + double *temp0 = new double[this->chunk_size*state_dimension(particle_type)]; + double *temp1 = new double[this->chunk_size*state_dimension(particle_type)]; for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++) { //write state - std::fill_n(temp0, this->ncomponents*this->chunk_size, 0); + std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0); for (int p=0; p<this->chunk_size; p++) { auto pp = this->state.find(p + cindex*this->chunk_size); if (pp != this->state.end()) std::copy(pp->second.data, - pp->second.data + this->ncomponents, - temp0 + pp->first*this->ncomponents); + pp->second.data + state_dimension(particle_type), + temp0 + pp->first*state_dimension(particle_type)); } MPI_Allreduce( temp0, temp1, - this->ncomponents*this->chunk_size, + state_dimension(particle_type)*this->chunk_size, MPI_DOUBLE, MPI_SUM, this->comm); @@ -420,19 +426,19 @@ void distributed_particles<particle_type, rnumber, interp_neighbours>::write( if (write_rhs) for (int i=0; i<this->integration_steps; i++) { - std::fill_n(temp0, this->ncomponents*this->chunk_size, 0); + std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0); for (int p=0; p<this->chunk_size; p++) { auto pp = this->rhs[i].find(p + cindex*this->chunk_size); if (pp != this->rhs[i].end()) std::copy(pp->second.data, - pp->second.data + this->ncomponents, - temp0 + pp->first*this->ncomponents); + pp->second.data + state_dimension(particle_type), + temp0 + pp->first*state_dimension(particle_type)); } MPI_Allreduce( temp0, temp1, - this->ncomponents*this->chunk_size, + state_dimension(particle_type)*this->chunk_size, MPI_DOUBLE, MPI_SUM, this->comm); diff --git a/bfps/cpp/distributed_particles.hpp b/bfps/cpp/distributed_particles.hpp index e972f69943814823e8b5527422b9727ab7331a8d..cf6e124a7744c049b6fcf0c84c1618a0a214c30e 100644 --- a/bfps/cpp/distributed_particles.hpp +++ b/bfps/cpp/distributed_particles.hpp @@ -39,7 +39,7 @@ #define DISTRIBUTED_PARTICLES -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> class distributed_particles: public particles_io_base<particle_type> { private: @@ -74,6 +74,7 @@ class distributed_particles: public particles_io_base<particle_type> const char *dset_name); void sample( interpolator<rnumber, interp_neighbours> *field, + const std::unordered_map<int, single_particle_state<particle_type>> &x, std::unordered_map<int, single_particle_state<POINT3D>> &y); void get_rhs( const std::unordered_map<int, single_particle_state<particle_type>> &x, diff --git a/bfps/cpp/interpolator.hpp b/bfps/cpp/interpolator.hpp index d20ee6ee1b5023d6a5e177d15e8ddc92a339d7ca..7e56ebe159fd24ed7cf623f0a869e1d262d4aadb 100644 --- a/bfps/cpp/interpolator.hpp +++ b/bfps/cpp/interpolator.hpp @@ -42,6 +42,7 @@ class interpolator:public interpolator_base<rnumber, interp_neighbours> rnumber *field; public: + using interpolator_base<rnumber, interp_neighbours>::operator(); ptrdiff_t buffer_size; /* descriptor for buffered field */ @@ -67,17 +68,6 @@ class interpolator:public interpolator_base<rnumber, interp_neighbours> const double *__restrict__ x, double *__restrict__ y, const int *deriv = NULL); - /* interpolate 1 point */ - inline void operator()( - const double *__restrict__ x, - double *__restrict__ dest, - const int *deriv = NULL) - { - int xg[3]; - double xx[3]; - this->get_grid_coordinates(x, xg, xx); - (*this)(xg, xx, dest, deriv); - } void operator()( const int *__restrict__ xg, const double *__restrict__ xx, diff --git a/bfps/cpp/interpolator_base.hpp b/bfps/cpp/interpolator_base.hpp index 1f29a5c05e2afa2dda45f45e5f87a71a63d2b25d..7dda7fb08319bf2a044bcc220e204b748d6336d6 100644 --- a/bfps/cpp/interpolator_base.hpp +++ b/bfps/cpp/interpolator_base.hpp @@ -87,6 +87,18 @@ class interpolator_base const double *__restrict__ xx, double *__restrict__ dest, const int *deriv = NULL) = 0; + + /* interpolate 1 point */ + inline void operator()( + const double *__restrict__ x, + double *__restrict__ dest, + const int *deriv = NULL) + { + int xg[3]; + double xx[3]; + this->get_grid_coordinates(x, xg, xx); + (*this)(xg, xx, dest, deriv); + } }; #endif//INTERPOLATOR_BASE diff --git a/bfps/cpp/particles.cpp b/bfps/cpp/particles.cpp index 2182e769191d587aea2f5b174993ccae407bc109..459ea7a37847a6ce4f217bc937130f9304466ed1 100644 --- a/bfps/cpp/particles.cpp +++ b/bfps/cpp/particles.cpp @@ -39,7 +39,7 @@ extern int myrank, nprocs; -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> particles<particle_type, rnumber, interp_neighbours>::particles( const char *NAME, const hid_t data_file_id, @@ -55,7 +55,7 @@ particles<particle_type, rnumber, interp_neighbours>::particles( (INTEGRATION_STEPS >= 1)); this->vel = FIELD; this->integration_steps = INTEGRATION_STEPS; - this->array_size = this->nparticles * this->ncomponents; + this->array_size = this->nparticles * state_dimension(particle_type); this->state = new double[this->array_size]; std::fill_n(this->state, this->array_size, 0.0); for (int i=0; i < this->integration_steps; i++) @@ -65,7 +65,7 @@ particles<particle_type, rnumber, interp_neighbours>::particles( } } -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> particles<particle_type, rnumber, interp_neighbours>::~particles() { delete[] this->state; @@ -75,18 +75,18 @@ particles<particle_type, rnumber, interp_neighbours>::~particles() } } -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> void particles<particle_type, rnumber, interp_neighbours>::get_rhs(double *x, double *y) { switch(particle_type) { case VELOCITY_TRACER: - this->vel->sample(this->nparticles, this->ncomponents, x, y); + this->vel->sample(this->nparticles, state_dimension(particle_type), x, y); break; } } -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> void particles<particle_type, rnumber, interp_neighbours>::roll_rhs() { for (int i=this->integration_steps-2; i>=0; i--) @@ -97,7 +97,7 @@ void particles<particle_type, rnumber, interp_neighbours>::roll_rhs() -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> void particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth( const int nsteps) { @@ -107,26 +107,26 @@ void particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth( { case 1: for (int p=0; p<this->nparticles; p++) - for (int i=0; i<this->ncomponents; i++) + for (int i=0; i<state_dimension(particle_type); i++) { - ii = p*this->ncomponents+i; + ii = p*state_dimension(particle_type)+i; this->state[ii] += this->dt*this->rhs[0][ii]; } break; case 2: for (int p=0; p<this->nparticles; p++) - for (int i=0; i<this->ncomponents; i++) + for (int i=0; i<state_dimension(particle_type); i++) { - ii = p*this->ncomponents+i; + ii = p*state_dimension(particle_type)+i; this->state[ii] += this->dt*(3*this->rhs[0][ii] - this->rhs[1][ii])/2; } break; case 3: for (int p=0; p<this->nparticles; p++) - for (int i=0; i<this->ncomponents; i++) + for (int i=0; i<state_dimension(particle_type); i++) { - ii = p*this->ncomponents+i; + ii = p*state_dimension(particle_type)+i; this->state[ii] += this->dt*(23*this->rhs[0][ii] - 16*this->rhs[1][ii] + 5*this->rhs[2][ii])/12; @@ -134,9 +134,9 @@ void particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth( break; case 4: for (int p=0; p<this->nparticles; p++) - for (int i=0; i<this->ncomponents; i++) + for (int i=0; i<state_dimension(particle_type); i++) { - ii = p*this->ncomponents+i; + ii = p*state_dimension(particle_type)+i; this->state[ii] += this->dt*(55*this->rhs[0][ii] - 59*this->rhs[1][ii] + 37*this->rhs[2][ii] @@ -145,9 +145,9 @@ void particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth( break; case 5: for (int p=0; p<this->nparticles; p++) - for (int i=0; i<this->ncomponents; i++) + for (int i=0; i<state_dimension(particle_type); i++) { - ii = p*this->ncomponents+i; + ii = p*state_dimension(particle_type)+i; this->state[ii] += this->dt*(1901*this->rhs[0][ii] - 2774*this->rhs[1][ii] + 2616*this->rhs[2][ii] @@ -157,9 +157,9 @@ void particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth( break; case 6: for (int p=0; p<this->nparticles; p++) - for (int i=0; i<this->ncomponents; i++) + for (int i=0; i<state_dimension(particle_type); i++) { - ii = p*this->ncomponents+i; + ii = p*state_dimension(particle_type)+i; this->state[ii] += this->dt*(4277*this->rhs[0][ii] - 7923*this->rhs[1][ii] + 9982*this->rhs[2][ii] @@ -173,7 +173,7 @@ void particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth( } -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> void particles<particle_type, rnumber, interp_neighbours>::step() { this->AdamsBashforth((this->iteration < this->integration_steps) ? @@ -183,16 +183,16 @@ void particles<particle_type, rnumber, interp_neighbours>::step() } -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> void particles<particle_type, rnumber, interp_neighbours>::read() { if (this->myrank == 0) for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++) { - this->read_state_chunk(cindex, this->state+cindex*this->chunk_size*this->ncomponents); + this->read_state_chunk(cindex, this->state+cindex*this->chunk_size*state_dimension(particle_type)); if (this->iteration > 0) for (int i=0; i<this->integration_steps; i++) - this->read_rhs_chunk(cindex, i, this->rhs[i]+cindex*this->chunk_size*this->ncomponents); + this->read_rhs_chunk(cindex, i, this->rhs[i]+cindex*this->chunk_size*state_dimension(particle_type)); } MPI_Bcast( this->state, @@ -210,27 +210,27 @@ void particles<particle_type, rnumber, interp_neighbours>::read() this->comm); } -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> void particles<particle_type, rnumber, interp_neighbours>::write( const bool write_rhs) { if (this->myrank == 0) for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++) { - this->write_state_chunk(cindex, this->state+cindex*this->chunk_size*this->ncomponents); + this->write_state_chunk(cindex, this->state+cindex*this->chunk_size*state_dimension(particle_type)); if (write_rhs) for (int i=0; i<this->integration_steps; i++) - this->write_rhs_chunk(cindex, i, this->rhs[i]+cindex*this->chunk_size*this->ncomponents); + this->write_rhs_chunk(cindex, i, this->rhs[i]+cindex*this->chunk_size*state_dimension(particle_type)); } } -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> void particles<particle_type, rnumber, interp_neighbours>::sample( interpolator_base<rnumber, interp_neighbours> *field, const char *dset_name) { double *y = new double[this->nparticles*3]; - field->sample(this->nparticles, this->ncomponents, this->state, y); + field->sample(this->nparticles, state_dimension(particle_type), this->state, y); if (this->myrank == 0) for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++) this->write_point3D_chunk(dset_name, cindex, y+cindex*this->chunk_size*3); diff --git a/bfps/cpp/particles.hpp b/bfps/cpp/particles.hpp index cf67b1fc32509b16d7a0774c0fc40a74759e39e0..03daf3e3fc866ac485b3649a28dfb13cf1b50ff1 100644 --- a/bfps/cpp/particles.hpp +++ b/bfps/cpp/particles.hpp @@ -37,7 +37,7 @@ #define PARTICLES -template <int particle_type, class rnumber, int interp_neighbours> +template <particle_types particle_type, class rnumber, int interp_neighbours> class particles: public particles_io_base<particle_type> { private: @@ -75,7 +75,7 @@ class particles: public particles_io_base<particle_type> interpolator_base<rnumber, interp_neighbours> *field, double *y) { - field->sample(this->nparticles, this->ncomponents, this->state, y); + field->sample(this->nparticles, state_dimension(particle_type), this->state, y); } void get_rhs( diff --git a/bfps/cpp/particles_base.cpp b/bfps/cpp/particles_base.cpp index c0fc631799cf55e1fbd9b8c93db945f47aa9e980..59d6d078dd63a319f34d3ae03acccc1f259714be 100644 --- a/bfps/cpp/particles_base.cpp +++ b/bfps/cpp/particles_base.cpp @@ -30,78 +30,56 @@ #include <cassert> #include "particles_base.hpp" -template <int particle_type> +template <particle_types particle_type> single_particle_state<particle_type>::single_particle_state() { - switch(particle_type) - { - default: - this->data = new double[3]; - std::fill_n(this->data, 3, 0); - break; - } + std::fill_n(this->data, state_dimension(particle_type), 0); } -template <int particle_type> +template <particle_types particle_type> single_particle_state<particle_type>::single_particle_state( const single_particle_state<particle_type> &src) { - switch(particle_type) - { - default: - this->data = new double[3]; - std::copy(src.data, src.data + 3, this->data); - break; - } + std::copy( + src.data, + src.data + state_dimension(particle_type), + this->data); } -template <int particle_type> +template <particle_types particle_type> single_particle_state<particle_type>::single_particle_state( const double *src) { - switch(particle_type) - { - default: - this->data = new double[3]; - std::copy(src, src + 3, this->data); - break; - } + std::copy( + src, + src + state_dimension(particle_type), + this->data); } -template <int particle_type> +template <particle_types particle_type> single_particle_state<particle_type>::~single_particle_state() { - switch(particle_type) - { - default: - delete[] this->data; - break; - } } -template <int particle_type> +template <particle_types particle_type> single_particle_state<particle_type> &single_particle_state<particle_type>::operator=( const single_particle_state &src) { - switch(particle_type) - { - default: - std::copy(src.data, src.data + 3, this->data); - break; - } + std::copy( + src.data, + src.data + state_dimension(particle_type), + this->data); return *this; } -template <int particle_type> +template <particle_types particle_type> single_particle_state<particle_type> &single_particle_state<particle_type>::operator=( const double *src) { - switch(particle_type) - { - default: - std::copy(src, src + 3, this->data); - break; - } + std::copy( + src, + src + state_dimension(particle_type), + this->data); return *this; } @@ -134,19 +112,13 @@ int get_chunk_offsets( return EXIT_SUCCESS; } -template <int particle_type> +template <particle_types particle_type> particles_io_base<particle_type>::particles_io_base( const char *NAME, const int TRAJ_SKIP, const hid_t data_file_id, MPI_Comm COMM) { - switch(particle_type) - { - default: - this->ncomponents = 3; - break; - } this->name = std::string(NAME); this->traj_skip = TRAJ_SKIP; this->comm = COMM; @@ -161,7 +133,7 @@ particles_io_base<particle_type>::particles_io_base( dspace = H5Dget_space(dset); this->hdf5_state_dims.resize(H5Sget_simple_extent_ndims(dspace)); H5Sget_simple_extent_dims(dspace, &this->hdf5_state_dims.front(), NULL); - assert(this->hdf5_state_dims[this->hdf5_state_dims.size()-1] == this->ncomponents); + assert(this->hdf5_state_dims[this->hdf5_state_dims.size()-1] == state_dimension(particle_type)); this->nparticles = 1; for (int i=1; i<this->hdf5_state_dims.size()-1; i++) this->nparticles *= this->hdf5_state_dims[i]; @@ -235,14 +207,14 @@ particles_io_base<particle_type>::particles_io_base( DEBUG_MSG("exiting particles_io_base constructor\n"); } -template <int particle_type> +template <particle_types particle_type> particles_io_base<particle_type>::~particles_io_base() { if(this->myrank == 0) H5Gclose(this->hdf5_group_id); } -template <int particle_type> +template <particle_types particle_type> void particles_io_base<particle_type>::read_state_chunk( const int cindex, double *data) @@ -276,7 +248,7 @@ void particles_io_base<particle_type>::read_state_chunk( DEBUG_MSG("exiting read_state_chunk\n"); } -template <int particle_type> +template <particle_types particle_type> void particles_io_base<particle_type>::write_state_chunk( const int cindex, const double *data) @@ -308,7 +280,7 @@ void particles_io_base<particle_type>::write_state_chunk( delete[] offset; } -template <int particle_type> +template <particle_types particle_type> void particles_io_base<particle_type>::read_rhs_chunk( const int cindex, const int rhsindex, @@ -350,7 +322,7 @@ void particles_io_base<particle_type>::read_rhs_chunk( //DEBUG_MSG("exiting read_rhs_chunk\n"); } -template <int particle_type> +template <particle_types particle_type> void particles_io_base<particle_type>::write_rhs_chunk( const int cindex, const int rhsindex, @@ -387,7 +359,7 @@ void particles_io_base<particle_type>::write_rhs_chunk( delete[] offset; } -template <int particle_type> +template <particle_types particle_type> void particles_io_base<particle_type>::write_point3D_chunk( const std::string dset_name, const int cindex, diff --git a/bfps/cpp/particles_base.hpp b/bfps/cpp/particles_base.hpp index 923c29253ba1ddabff1791ca40d03f79109eaf20..114661fcc4c8f8e7d1a38bbb6d646e5745b9415f 100644 --- a/bfps/cpp/particles_base.hpp +++ b/bfps/cpp/particles_base.hpp @@ -26,6 +26,7 @@ #include <vector> #include <hdf5.h> +#include <unordered_map> #include "interpolator_base.hpp" #ifndef PARTICLES_BASE @@ -35,13 +36,21 @@ /* particle types */ enum particle_types {POINT3D, VELOCITY_TRACER}; +/* space dimension */ +constexpr unsigned int state_dimension(particle_types particle_type) +{ + return ((particle_type == POINT3D) ? 3 : ( + (particle_type == VELOCITY_TRACER) ? 3 : + 3)); +} + /* 1 particle state type */ -template <int particle_type> +template <particle_types particle_type> class single_particle_state { public: - double *data; + double data[state_dimension(particle_type)]; single_particle_state(); single_particle_state(const single_particle_state &src); @@ -61,7 +70,7 @@ std::vector<std::vector<hsize_t>> get_chunk_offsets( std::vector<hsize_t> data_dims, std::vector<hsize_t> chnk_dims); -template <int particle_type> +template <particle_types particle_type> class particles_io_base { protected: @@ -69,7 +78,6 @@ class particles_io_base MPI_Comm comm; int nparticles; - int ncomponents; std::string name; int chunk_size; diff --git a/bfps/cpp/rFFTW_distributed_particles.cpp b/bfps/cpp/rFFTW_distributed_particles.cpp new file mode 100644 index 0000000000000000000000000000000000000000..412671b89ea000efadd713e9cf67221009b0d2cb --- /dev/null +++ b/bfps/cpp/rFFTW_distributed_particles.cpp @@ -0,0 +1,692 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +//#define NDEBUG + +#include <cmath> +#include <cassert> +#include <cstring> +#include <string> +#include <sstream> +#include <set> + +#include "base.hpp" +#include "rFFTW_distributed_particles.hpp" +#include "fftw_tools.hpp" + + +extern int myrank, nprocs; + +template <particle_types particle_type, class rnumber, int interp_neighbours> +rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::rFFTW_distributed_particles( + const char *NAME, + const hid_t data_file_id, + rFFTW_interpolator<rnumber, interp_neighbours> *FIELD, + const int TRAJ_SKIP, + const int INTEGRATION_STEPS) : particles_io_base<particle_type>( + NAME, + TRAJ_SKIP, + data_file_id, + FIELD->descriptor->comm) +{ + /* check that integration_steps has a valid value. + * If NDEBUG is defined, "assert" doesn't do anything. + * With NDEBUG defined, and an invalid INTEGRATION_STEPS, + * the particles will simply sit still. + * */ + assert((INTEGRATION_STEPS <= 6) && + (INTEGRATION_STEPS >= 1)); + /* check that the field layout is compatible with this class. + * if it's not, the code will fail in bad ways, most likely ending up + * with various CPUs locked in some MPI send/receive. + * therefore I prefer to just kill the code at this point, + * no matter whether or not NDEBUG is present. + * */ + if (interp_neighbours*2+2 > FIELD->descriptor->subsizes[0]) + { + DEBUG_MSG("parameters incompatible with rFFTW_distributed_particles.\n" + "interp kernel size is %d, local_z_size is %d\n", + interp_neighbours*2+2, FIELD->descriptor->subsizes[0]); + if (FIELD->descriptor->myrank == 0) + std::cerr << "parameters incompatible with rFFTW_distributed_particles." << std::endl; + exit(0); + } + this->vel = FIELD; + this->rhs.resize(INTEGRATION_STEPS); + this->integration_steps = INTEGRATION_STEPS; + this->state.reserve(2*this->nparticles / this->nprocs); + for (unsigned int i=0; i<this->rhs.size(); i++) + this->rhs[i].reserve(2*this->nparticles / this->nprocs); + + /* build communicators and stuff for interpolation */ + + /* number of processors per domain */ + this->domain_nprocs[-1] = 2; // domain in common with lower z CPU + this->domain_nprocs[ 0] = 1; // local domain + this->domain_nprocs[ 1] = 2; // domain in common with higher z CPU + + /* initialize domain bins */ + this->domain_particles[-1] = std::unordered_set<int>(); + this->domain_particles[ 0] = std::unordered_set<int>(); + this->domain_particles[ 1] = std::unordered_set<int>(); + this->domain_particles[-1].reserve(unsigned( + 1.5*(interp_neighbours*2+2)* + float(this->nparticles) / + this->nprocs)); + this->domain_particles[ 1].reserve(unsigned( + 1.5*(interp_neighbours*2+2)* + float(this->nparticles) / + this->nprocs)); + this->domain_particles[ 0].reserve(unsigned( + 1.5*(this->vel->descriptor->subsizes[0] - interp_neighbours*2-2)* + float(this->nparticles) / + this->nprocs)); + + int rmaxz, rminz; + int color, key; + MPI_Comm tmpcomm; + for (int rank=0; rank<this->nprocs; rank++) + { + color = MPI_UNDEFINED; + key = MPI_UNDEFINED; + if (this->myrank == rank) + { + color = rank; + key = 0; + } + if (this->myrank == MOD(rank + 1, this->nprocs)) + { + color = rank; + key = 1; + } + MPI_Comm_split(this->comm, color, key, &tmpcomm); + if (this->myrank == rank) + this->domain_comm[ 1] = tmpcomm; + if (this->myrank == MOD(rank+1, this->nprocs)) + this->domain_comm[-1] = tmpcomm; + + } + + /* following code may be useful in the future for the general case */ + //this->interp_comm.resize(this->vel->descriptor->sizes[0]); + //this->interp_nprocs.resize(this->vel->descriptor->sizes[0]); + //for (int zg=0; zg<this->vel->descriptor->sizes[0]; zg++) + //{ + // color = (this->vel->get_rank_info( + // (zg+.5)*this->vel->dz, rminz, rmaxz) ? zg : MPI_UNDEFINED); + // key = zg - this->vel->descriptor->starts[0] + interp_neighbours; + // MPI_Comm_split(this->comm, color, key, &this->interp_comm[zg]); + // if (this->interp_comm[zg] != MPI_COMM_NULL) + // MPI_Comm_size(this->interp_comm[zg], &this->interp_nprocs[zg]); + // else + // this->interp_nprocs[zg] = 0; + //} +} + +template <particle_types particle_type, class rnumber, int interp_neighbours> +rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::~rFFTW_distributed_particles() +{ +} + +template <particle_types particle_type, class rnumber, int interp_neighbours> +void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sample( + rFFTW_interpolator<rnumber, interp_neighbours> *field, + const std::unordered_map<int, single_particle_state<particle_type>> &x, + const std::unordered_map<int, std::unordered_set<int>> &dp, + std::unordered_map<int, single_particle_state<POINT3D>> &y) +{ + double *yyy; + double *yy; + y.clear(); + /* local z domain */ + yy = new double[3]; + for (auto p: dp.at(0)) + { + (*field)(x.find(p)->second.data, yy); + y[p] = yy; + } + delete[] yy; + /* boundary z domains */ + int domain_index; + for (int rankpair = 0; rankpair < this->nprocs; rankpair++) + { + if (this->myrank == rankpair) + domain_index = 1; + if (this->myrank == MOD(rankpair+1, this->nprocs)) + domain_index = -1; + if (this->myrank == rankpair || + this->myrank == MOD(rankpair+1, this->nprocs)) + { + yy = new double[3*dp.at(domain_index).size()]; + yyy = new double[3*dp.at(domain_index).size()]; + int tindex; + tindex = 0; + // can this sorting be done more efficiently? + std::set<int> ordered_dp; + for (auto p: dp.at(domain_index)) + ordered_dp.insert(p); + + for (auto p: ordered_dp) + { + (*field)(x.at(p).data, yy + tindex*3); + tindex++; + } + MPI_Allreduce( + yy, + yyy, + 3*dp.at(domain_index).size(), + MPI_DOUBLE, + MPI_SUM, + this->domain_comm[domain_index]); + tindex = 0; + for (auto p: ordered_dp) + { + y[p] = yyy + tindex*3; + tindex++; + } + delete[] yy; + delete[] yyy; + } + } +} + +template <particle_types particle_type, class rnumber, int interp_neighbours> +void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::get_rhs( + const std::unordered_map<int, single_particle_state<particle_type>> &x, + const std::unordered_map<int, std::unordered_set<int>> &dp, + std::unordered_map<int, single_particle_state<particle_type>> &y) +{ + std::unordered_map<int, single_particle_state<POINT3D>> yy; + switch(particle_type) + { + case VELOCITY_TRACER: + this->sample(this->vel, x, dp, yy); + y.clear(); + for (auto &pp: x) + y[pp.first] = yy[pp.first].data; + break; + } +} + +template <particle_types particle_type, class rnumber, int interp_neighbours> +void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sample( + rFFTW_interpolator<rnumber, interp_neighbours> *field, + const char *dset_name) +{ + std::unordered_map<int, single_particle_state<POINT3D>> y; + this->sample(field, this->state, this->domain_particles, y); + this->write(dset_name, y); +} + +template <particle_types particle_type, class rnumber, int interp_neighbours> +void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::roll_rhs() +{ + for (int i=this->integration_steps-2; i>=0; i--) + rhs[i+1] = rhs[i]; +} + +template <particle_types particle_type, class rnumber, int interp_neighbours> +void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::redistribute( + std::unordered_map<int, single_particle_state<particle_type>> &x, + std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals, + std::unordered_map<int, std::unordered_set<int>> &dp) +{ + //DEBUG_MSG("entered redistribute\n"); + /* get new distribution of particles */ + std::unordered_map<int, std::unordered_set<int>> newdp; + this->sort_into_domains(x, newdp); + /* take care of particles that are leaving the shared domains */ + int dindex[2] = {-1, 1}; + // for each D of the 2 shared domains + for (int di=0; di<2; di++) + // for all particles previously in D + for (auto p: dp[dindex[di]]) + { + // if the particle is no longer in D + if (newdp[dindex[di]].find(p) == newdp[dindex[di]].end()) + { + // and the particle is not in the local domain + if (newdp[0].find(p) == newdp[0].end()) + { + // remove the particle from the local list + x.erase(p); + for (int i=0; i<vals.size(); i++) + vals[i].erase(p); + } + // if the particle is in the local domain, do nothing + } + } + /* take care of particles that are entering the shared domains */ + /* neighbouring rank offsets */ + int ro[2]; + ro[0] = -1; + ro[1] = 1; + /* neighbouring ranks */ + int nr[2]; + nr[0] = MOD(this->myrank+ro[0], this->nprocs); + nr[1] = MOD(this->myrank+ro[1], this->nprocs); + /* particles to send, particles to receive */ + std::vector<int> ps[2], pr[2]; + /* number of particles to send, number of particles to receive */ + int nps[2], npr[2]; + int rsrc, rdst; + /* get list of id-s to send */ + for (auto &p: dp[0]) + { + for (int di=0; di<2; di++) + { + if (newdp[dindex[di]].find(p) != newdp[dindex[di]].end()) + ps[di].push_back(p); + } + } + /* prepare data for send recv */ + for (int i=0; i<2; i++) + nps[i] = ps[i].size(); + for (rsrc = 0; rsrc<this->nprocs; rsrc++) + for (int i=0; i<2; i++) + { + rdst = MOD(rsrc+ro[i], this->nprocs); + if (this->myrank == rsrc) + MPI_Send( + nps+i, + 1, + MPI_INTEGER, + rdst, + 2*(rsrc*this->nprocs + rdst)+i, + this->comm); + if (this->myrank == rdst) + MPI_Recv( + npr+1-i, + 1, + MPI_INTEGER, + rsrc, + 2*(rsrc*this->nprocs + rdst)+i, + this->comm, + MPI_STATUS_IGNORE); + } + //DEBUG_MSG("I have to send %d %d particles\n", nps[0], nps[1]); + //DEBUG_MSG("I have to recv %d %d particles\n", npr[0], npr[1]); + for (int i=0; i<2; i++) + pr[i].resize(npr[i]); + + int buffer_size = (nps[0] > nps[1]) ? nps[0] : nps[1]; + buffer_size = (buffer_size > npr[0])? buffer_size : npr[0]; + buffer_size = (buffer_size > npr[1])? buffer_size : npr[1]; + //DEBUG_MSG("buffer size is %d\n", buffer_size); + double *buffer = new double[buffer_size*state_dimension(particle_type)*(1+vals.size())]; + for (rsrc = 0; rsrc<this->nprocs; rsrc++) + for (int i=0; i<2; i++) + { + rdst = MOD(rsrc+ro[i], this->nprocs); + if (this->myrank == rsrc && nps[i] > 0) + { + MPI_Send( + &ps[i].front(), + nps[i], + MPI_INTEGER, + rdst, + 2*(rsrc*this->nprocs + rdst), + this->comm); + int pcounter = 0; + for (int p: ps[i]) + { + std::copy(x[p].data, + x[p].data + state_dimension(particle_type), + buffer + pcounter*(1+vals.size())*state_dimension(particle_type)); + for (int tindex=0; tindex<vals.size(); tindex++) + { + std::copy(vals[tindex][p].data, + vals[tindex][p].data + state_dimension(particle_type), + buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type)); + } + pcounter++; + } + MPI_Send( + buffer, + nps[i]*(1+vals.size())*state_dimension(particle_type), + MPI_DOUBLE, + rdst, + 2*(rsrc*this->nprocs + rdst)+1, + this->comm); + } + if (this->myrank == rdst && npr[1-i] > 0) + { + MPI_Recv( + &pr[1-i].front(), + npr[1-i], + MPI_INTEGER, + rsrc, + 2*(rsrc*this->nprocs + rdst), + this->comm, + MPI_STATUS_IGNORE); + MPI_Recv( + buffer, + npr[1-i]*(1+vals.size())*state_dimension(particle_type), + MPI_DOUBLE, + rsrc, + 2*(rsrc*this->nprocs + rdst)+1, + this->comm, + MPI_STATUS_IGNORE); + int pcounter = 0; + for (int p: pr[1-i]) + { + x[p] = buffer + (pcounter*(1+vals.size()))*state_dimension(particle_type); + newdp[1-i].insert(p); + for (int tindex=0; tindex<vals.size(); tindex++) + { + vals[tindex][p] = buffer + (pcounter*(1+vals.size()) + tindex+1)*state_dimension(particle_type); + } + pcounter++; + } + } + } + delete[] buffer; + // x has been changed, so newdp is obsolete + // we need to sort into domains again + this->sort_into_domains(x, dp); + + +#ifndef NDEBUG + /* check that all particles at x are local */ + //for (auto &pp: x) + // if (this->vel->get_rank(pp.second.data[2]) != this->myrank) + // { + // DEBUG_MSG("found particle %d with rank %d\n", + // pp.first, + // this->vel->get_rank(pp.second.data[2])); + // assert(false); + // } +#endif + //DEBUG_MSG("exiting redistribute\n"); +} + + + +template <particle_types particle_type, class rnumber, int interp_neighbours> +void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::AdamsBashforth( + const int nsteps) +{ + this->get_rhs(this->state, this->domain_particles, this->rhs[0]); + for (auto &pp: this->state) + for (int i=0; i<state_dimension(particle_type); i++) + switch(nsteps) + { + case 1: + pp.second[i] += this->dt*this->rhs[0][pp.first][i]; + break; + case 2: + pp.second[i] += this->dt*(3*this->rhs[0][pp.first][i] + - this->rhs[1][pp.first][i])/2; + break; + case 3: + pp.second[i] += this->dt*(23*this->rhs[0][pp.first][i] + - 16*this->rhs[1][pp.first][i] + + 5*this->rhs[2][pp.first][i])/12; + break; + case 4: + pp.second[i] += this->dt*(55*this->rhs[0][pp.first][i] + - 59*this->rhs[1][pp.first][i] + + 37*this->rhs[2][pp.first][i] + - 9*this->rhs[3][pp.first][i])/24; + break; + case 5: + pp.second[i] += this->dt*(1901*this->rhs[0][pp.first][i] + - 2774*this->rhs[1][pp.first][i] + + 2616*this->rhs[2][pp.first][i] + - 1274*this->rhs[3][pp.first][i] + + 251*this->rhs[4][pp.first][i])/720; + break; + case 6: + pp.second[i] += this->dt*(4277*this->rhs[0][pp.first][i] + - 7923*this->rhs[1][pp.first][i] + + 9982*this->rhs[2][pp.first][i] + - 7298*this->rhs[3][pp.first][i] + + 2877*this->rhs[4][pp.first][i] + - 475*this->rhs[5][pp.first][i])/1440; + break; + } + this->redistribute(this->state, this->rhs, this->domain_particles); + this->roll_rhs(); +} + + +template <particle_types particle_type, class rnumber, int interp_neighbours> +void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::step() +{ + this->AdamsBashforth((this->iteration < this->integration_steps) ? + this->iteration+1 : + this->integration_steps); + this->iteration++; +} + + +template <particle_types particle_type, class rnumber, int interp_neighbours> +void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::sort_into_domains( + const std::unordered_map<int, single_particle_state<particle_type>> &x, + std::unordered_map<int, std::unordered_set<int>> &dp) +{ + int tmpint1, tmpint2; + dp.clear(); + dp[-1] = std::unordered_set<int>(); + dp[ 0] = std::unordered_set<int>(); + dp[ 1] = std::unordered_set<int>(); + dp[-1].reserve(unsigned( + 1.5*(interp_neighbours*2+2)* + float(this->nparticles) / + this->nprocs)); + dp[ 1].reserve(unsigned( + 1.5*(interp_neighbours*2+2)* + float(this->nparticles) / + this->nprocs)); + dp[ 0].reserve(unsigned( + 1.5*(this->vel->descriptor->subsizes[0] - interp_neighbours*2-2)* + float(this->nparticles) / + this->nprocs)); + for (auto &xx: x) + { + if (this->vel->get_rank_info(xx.second.data[2], tmpint1, tmpint2)) + { + if (tmpint1 == tmpint2) + dp[0].insert(xx.first); + else + { + if (this->myrank == tmpint1) + dp[-1].insert(xx.first); + else + dp[ 1].insert(xx.first); + } + } + } +} + + +template <particle_types particle_type, class rnumber, int interp_neighbours> +void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::read() +{ + double *temp = new double[this->chunk_size*state_dimension(particle_type)]; + int tmpint1, tmpint2; + for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++) + { + //read state + if (this->myrank == 0) + this->read_state_chunk(cindex, temp); + MPI_Bcast( + temp, + this->chunk_size*state_dimension(particle_type), + MPI_DOUBLE, + 0, + this->comm); + for (int p=0; p<this->chunk_size; p++) + { + if (this->vel->get_rank_info(temp[state_dimension(particle_type)*p+2], tmpint1, tmpint2)) + { + this->state[p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p; + } + } + //read rhs + if (this->iteration > 0) + for (int i=0; i<this->integration_steps; i++) + { + if (this->myrank == 0) + this->read_rhs_chunk(cindex, i, temp); + MPI_Bcast( + temp, + this->chunk_size*state_dimension(particle_type), + MPI_DOUBLE, + 0, + this->comm); + for (int p=0; p<this->chunk_size; p++) + { + auto pp = this->state.find(p+cindex*this->chunk_size); + if (pp != this->state.end()) + this->rhs[i][p+cindex*this->chunk_size] = temp + state_dimension(particle_type)*p; + } + } + } + this->sort_into_domains(this->state, this->domain_particles); + DEBUG_MSG("%s->state.size = %ld\n", this->name.c_str(), this->state.size()); + for (int domain=-1; domain<=1; domain++) + { + DEBUG_MSG("domain %d nparticles = %ld\n", domain, this->domain_particles[domain].size()); + } + delete[] temp; +} + +template <particle_types particle_type, class rnumber, int interp_neighbours> +void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::write( + const char *dset_name, + std::unordered_map<int, single_particle_state<POINT3D>> &y) +{ + double *data = new double[this->nparticles*3]; + double *yy = new double[this->nparticles*3]; + int zmin_rank, zmax_rank; + int pindex = 0; + for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++) + { + std::fill_n(yy, this->chunk_size*3, 0); + for (int p=0; p<this->chunk_size; p++, pindex++) + { + if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || + this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) + { + std::copy(y[pindex].data, + y[pindex].data + 3, + yy + p*3); + } + } + MPI_Allreduce( + yy, + data, + 3*this->chunk_size, + MPI_DOUBLE, + MPI_SUM, + this->comm); + if (this->myrank == 0) + this->write_point3D_chunk(dset_name, cindex, data); + } + delete[] yy; + delete[] data; +} + +template <particle_types particle_type, class rnumber, int interp_neighbours> +void rFFTW_distributed_particles<particle_type, rnumber, interp_neighbours>::write( + const bool write_rhs) +{ + double *temp0 = new double[this->chunk_size*state_dimension(particle_type)]; + double *temp1 = new double[this->chunk_size*state_dimension(particle_type)]; + int zmin_rank, zmax_rank; + int pindex = 0; + for (int cindex=0; cindex<this->get_number_of_chunks(); cindex++) + { + //write state + std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0); + pindex = cindex*this->chunk_size; + for (int p=0; p<this->chunk_size; p++, pindex++) + { + if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || + this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) + { + std::copy(this->state[pindex].data, + this->state[pindex].data + state_dimension(particle_type), + temp0 + p*state_dimension(particle_type)); + } + } + MPI_Allreduce( + temp0, + temp1, + state_dimension(particle_type)*this->chunk_size, + MPI_DOUBLE, + MPI_SUM, + this->comm); + if (this->myrank == 0) + this->write_state_chunk(cindex, temp1); + //write rhs + if (write_rhs) + for (int i=0; i<this->integration_steps; i++) + { + std::fill_n(temp0, state_dimension(particle_type)*this->chunk_size, 0); + pindex = cindex*this->chunk_size; + for (int p=0; p<this->chunk_size; p++, pindex++) + { + if (this->domain_particles[-1].find(pindex) != this->domain_particles[-1].end() || + this->domain_particles[ 0].find(pindex) != this->domain_particles[ 0].end()) + { + std::copy(this->rhs[i][pindex].data, + this->rhs[i][pindex].data + state_dimension(particle_type), + temp0 + p*state_dimension(particle_type)); + } + } + MPI_Allreduce( + temp0, + temp1, + state_dimension(particle_type)*this->chunk_size, + MPI_DOUBLE, + MPI_SUM, + this->comm); + if (this->myrank == 0) + this->write_rhs_chunk(cindex, i, temp1); + } + } + delete[] temp0; + delete[] temp1; +} + + +/*****************************************************************************/ +template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 1>; +template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 2>; +template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 3>; +template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 4>; +template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 5>; +template class rFFTW_distributed_particles<VELOCITY_TRACER, float, 6>; +template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 1>; +template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 2>; +template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 3>; +template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 4>; +template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 5>; +template class rFFTW_distributed_particles<VELOCITY_TRACER, double, 6>; +/*****************************************************************************/ + diff --git a/bfps/cpp/rFFTW_distributed_particles.hpp b/bfps/cpp/rFFTW_distributed_particles.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e271bbfae56c0d49bf66cebcb5e8e8158f81940b --- /dev/null +++ b/bfps/cpp/rFFTW_distributed_particles.hpp @@ -0,0 +1,116 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#include <stdio.h> +#include <stdlib.h> +#include <iostream> +#include <unordered_map> +#include <unordered_set> +#include <vector> +#include <hdf5.h> +#include "base.hpp" +#include "particles_base.hpp" +#include "fluid_solver_base.hpp" +#include "rFFTW_interpolator.hpp" + +#ifndef RFFTW_DISTRIBUTED_PARTICLES + +#define RFFTW_DISTRIBUTED_PARTICLES + +template <particle_types particle_type, class rnumber, int interp_neighbours> +class rFFTW_distributed_particles: public particles_io_base<particle_type> +{ + private: + std::unordered_map<int, single_particle_state<particle_type>> state; + std::vector<std::unordered_map<int, single_particle_state<particle_type>>> rhs; + std::unordered_map<int, int> domain_nprocs; + std::unordered_map<int, MPI_Comm> domain_comm; + std::unordered_map<int, std::unordered_set<int>> domain_particles; + + public: + int integration_steps; + // this class only works with rFFTW interpolator + rFFTW_interpolator<rnumber, interp_neighbours> *vel; + + /* simulation parameters */ + double dt; + + /* methods */ + + /* constructor and destructor. + * allocate and deallocate: + * this->state + * this->rhs + * */ + rFFTW_distributed_particles( + const char *NAME, + const hid_t data_file_id, + rFFTW_interpolator<rnumber, interp_neighbours> *FIELD, + const int TRAJ_SKIP, + const int INTEGRATION_STEPS = 2); + ~rFFTW_distributed_particles(); + + void sample( + rFFTW_interpolator<rnumber, interp_neighbours> *field, + const char *dset_name); + void sample( + rFFTW_interpolator<rnumber, interp_neighbours> *field, + const std::unordered_map<int, single_particle_state<particle_type>> &x, + const std::unordered_map<int, std::unordered_set<int>> &dp, + std::unordered_map<int, single_particle_state<POINT3D>> &y); + void get_rhs( + const std::unordered_map<int, single_particle_state<particle_type>> &x, + const std::unordered_map<int, std::unordered_set<int>> &dp, + std::unordered_map<int, single_particle_state<particle_type>> &y); + + + void sort_into_domains( + const std::unordered_map<int, single_particle_state<particle_type>> &x, + std::unordered_map<int, std::unordered_set<int>> &dp); + void redistribute( + std::unordered_map<int, single_particle_state<particle_type>> &x, + std::vector<std::unordered_map<int, single_particle_state<particle_type>>> &vals, + std::unordered_map<int, std::unordered_set<int>> &dp); + + + /* input/output */ + void read(); + void write( + const char *dset_name, + std::unordered_map<int, single_particle_state<POINT3D>> &y); + void write( + const char *dset_name, + std::unordered_map<int, single_particle_state<particle_type>> &y); + void write(const bool write_rhs = true); + + /* solvers */ + void step(); + void roll_rhs(); + void AdamsBashforth(const int nsteps); +}; + +#endif//RFFTW_DISTRIBUTED_PARTICLES + diff --git a/bfps/cpp/rFFTW_interpolator.cpp b/bfps/cpp/rFFTW_interpolator.cpp index facc09c42164d44478ffa01b47354a05b0decce0..bffae44f5986f9873a231442e92cba6cf005d3a4 100644 --- a/bfps/cpp/rFFTW_interpolator.cpp +++ b/bfps/cpp/rFFTW_interpolator.cpp @@ -54,6 +54,24 @@ rFFTW_interpolator<rnumber, interp_neighbours>::~rFFTW_interpolator() delete[] this->compute; } +template <class rnumber, int interp_neighbours> +bool rFFTW_interpolator<rnumber, interp_neighbours>::get_rank_info(double z, int &maxz_rank, int &minz_rank) +{ + int zg = int(floor(z/this->dz)); + minz_rank = this->descriptor->rank[MOD( + zg - interp_neighbours, + this->descriptor->sizes[0])]; + maxz_rank = this->descriptor->rank[MOD( + zg + 1 + interp_neighbours, + this->descriptor->sizes[0])]; + bool is_here = false; + for (int iz = -interp_neighbours; iz <= interp_neighbours+1; iz++) + is_here = (is_here || + (this->descriptor->myrank == + this->descriptor->rank[MOD(zg+iz, this->descriptor->sizes[0])])); + return is_here; +} + template <class rnumber, int interp_neighbours> void rFFTW_interpolator<rnumber, interp_neighbours>::sample( const int nparticles, diff --git a/bfps/cpp/rFFTW_interpolator.hpp b/bfps/cpp/rFFTW_interpolator.hpp index fb96963eee8dd1361a59edf00b38b6460a27d9e9..795257d2744e432d9c346b93848cadfbd8cc85dc 100644 --- a/bfps/cpp/rFFTW_interpolator.hpp +++ b/bfps/cpp/rFFTW_interpolator.hpp @@ -37,6 +37,7 @@ template <class rnumber, int interp_neighbours> class rFFTW_interpolator:public interpolator_base<rnumber, interp_neighbours> { public: + using interpolator_base<rnumber, interp_neighbours>::operator(); /* size of field to interpolate */ ptrdiff_t field_size; @@ -62,6 +63,8 @@ class rFFTW_interpolator:public interpolator_base<rnumber, interp_neighbours> return EXIT_SUCCESS; } + bool get_rank_info(double z, int &maxz_rank, int &minz_rank); + /* interpolate field at an array of locations */ void sample( const int nparticles, diff --git a/bfps/tools.py b/bfps/tools.py index 0f94a98ace7b849ae5f112ca7bd517829cdf4eca..fefb7819cf96b4f9b178252dd444ff5eabe01238 100644 --- a/bfps/tools.py +++ b/bfps/tools.py @@ -232,13 +232,16 @@ def particle_finite_diff_test( if interp_name not in pars.keys(): # old format interp_name = 'tracers{0}_field'.format(species) + interp_name = pars[interp_name].value + if type(interp_name) == bytes: + interp_name = str(interp_name, 'ASCII') to_print = ( 'steps={0}, interp={1}, neighbours={2}, '.format( pars['tracers{0}_integration_steps'.format(species)].value, - pars[str(pars[interp_name].value, 'ASCII') + '_type'].value, - pars[str(pars[interp_name].value, 'ASCII') + '_neighbours'].value)) - if 'spline' in str(pars[interp_name].value, 'ASCII'): - to_print += 'smoothness = {0}, '.format(pars[str(pars[interp_name].value, 'ASCII') + '_smoothness'].value) + pars[interp_name + '_type'].value, + pars[interp_name + '_neighbours'].value)) + if 'spline' in interp_name: + to_print += 'smoothness = {0}, '.format(pars[interp_name + '_smoothness'].value) to_print += ( 'SNR d1p-vel={0:.3f}'.format(np.mean(snr_vel1))) if acc_on: diff --git a/setup.py b/setup.py index 8f982144e9188dbc74eea1bb4d6ef1f32508b45c..8cb0e225fb8518645fb6bddef753ae8973f0d43d 100644 --- a/setup.py +++ b/setup.py @@ -92,6 +92,7 @@ print('This is bfps version ' + VERSION) ### lists of files and MANIFEST.in src_file_list = ['field_descriptor', 'interpolator_base', + 'rFFTW_distributed_particles', 'distributed_particles', 'particles_base', 'interpolator', diff --git a/tests/base.py b/tests/base.py index 2c616aeb1bbfbdc1b346ae365836429daafa93b0..b438874d79d837b71a5f286bf1005f3fd10ecf39 100644 --- a/tests/base.py +++ b/tests/base.py @@ -195,12 +195,14 @@ def compare_stats( key, np.max(np.abs(c0.statistics[key + '(t)'] - c0.statistics[key + '(t)'])))) for i in range(c0.particle_species): - print('maximum traj difference species {0} is {1} {2}'.format( + print('species={0} differences: max pos(t) {1:.1e} min vel(0) {2:.1e}, max vel(0) {3:.1e}'.format( i, np.max(np.abs(c0.get_particle_file()['tracers{0}/state'.format(i)][:] - c1.get_particle_file()['tracers{0}/state'.format(i)][:])), - np.nanmax(np.abs(c0.get_particle_file()['tracers{0}/state'.format(i)][:] - - c1.get_particle_file()['tracers{0}/state'.format(i)][:])))) + np.min(np.abs(c0.get_particle_file()['tracers{0}/velocity'.format(i)][0] - + c1.get_particle_file()['tracers{0}/velocity'.format(i)][0])), + np.max(np.abs(c0.get_particle_file()['tracers{0}/velocity'.format(i)][0] - + c1.get_particle_file()['tracers{0}/velocity'.format(i)][0])))) if plots_on: # plot energy and enstrophy fig = plt.figure(figsize = (12, 12)) diff --git a/tests/test_particle_classes.py b/tests/test_particle_classes.py index 6f38859a22c4f2e5a0ae71627899cd580af740d4..c14145c72fc57a886368e1a0a8cf9744486bf0ef 100644 --- a/tests/test_particle_classes.py +++ b/tests/test_particle_classes.py @@ -47,7 +47,7 @@ def scaling(opt): c1.compute_statistics() compare_stats(opt, c0, c1) opt.work_dir = wd + '/N{0:0>3x}_3'.format(opt.n) - c2 = launch(opt, dt = c0.parameters['dt'], particle_class = 'particles', interpolator_class = 'interpolator') + c2 = launch(opt, dt = c0.parameters['dt'], particle_class = 'rFFTW_distributed_particles') c2.compute_statistics() compare_stats(opt, c0, c2) compare_stats(opt, c1, c2)