diff --git a/bfps/cpp/particles.cpp b/bfps/cpp/particles.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2f50916a87fadf5b93ef95c3face8778b116cc0f --- /dev/null +++ b/bfps/cpp/particles.cpp @@ -0,0 +1,855 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + +// code is generally compiled via setuptools, therefore NDEBUG is present +#ifdef NDEBUG +#undef NDEBUG +#endif//NDEBUG + + +#include <cmath> +#include <cassert> +#include <cstring> +#include <string> +#include <sstream> + +#include "base.hpp" +#include "particles.hpp" +#include "fftw_tools.hpp" + + +extern int myrank, nprocs; + +template <int particle_type, class rnumber, bool multistep, int ncomponents, int interp_neighbours, base_polynomial_values compute_beta> +particles<particle_type, rnumber, multistep, ncomponents, interp_neighbours, compute_beta>::particles( + const char *NAME, + fluid_solver_base<rnumber> *FSOLVER, + const int NPARTICLES, + const int TRAJ_SKIP, + const int INTEGRATION_STEPS) +{ + assert((INTEGRATION_STEPS <= 6) && + (INTEGRATION_STEPS >= 1)); + strncpy(this->name, NAME, 256); + this->fs = FSOLVER; + this->nparticles = NPARTICLES; + this->integration_steps = INTEGRATION_STEPS; + this->traj_skip = TRAJ_SKIP; + // in principle only the buffer width at the top needs the +1, + // but things are simpler if buffer_width is the same + this->buffer_width = interp_neighbours+1; + this->buffer_size = this->buffer_width*this->fs->rd->slice_size; + this->array_size = this->nparticles * ncomponents; + this->state = fftw_alloc_real(this->array_size); + std::fill_n(this->state, this->array_size, 0.0); + for (int i=0; i < this->integration_steps; i++) + { + this->rhs[i] = fftw_alloc_real(this->array_size); + std::fill_n(this->rhs[i], this->array_size, 0.0); + } + this->watching = new bool[this->fs->rd->nprocs*nparticles]; + std::fill_n(this->watching, this->fs->rd->nprocs*this->nparticles, false); + this->computing = new int[nparticles]; + + int tdims[4]; + tdims[0] = this->buffer_width*2*this->fs->rd->nprocs + this->fs->rd->sizes[0]; + tdims[1] = this->fs->rd->sizes[1]; + tdims[2] = this->fs->rd->sizes[2]; + tdims[3] = this->fs->rd->sizes[3]; + this->buffered_field_descriptor = new field_descriptor<rnumber>( + 4, tdims, + this->fs->rd->mpi_dtype, + this->fs->rd->comm); + + // compute dx, dy, dz; + this->dx = 4*acos(0) / (this->fs->dkx*this->fs->rd->sizes[2]); + this->dy = 4*acos(0) / (this->fs->dky*this->fs->rd->sizes[1]); + this->dz = 4*acos(0) / (this->fs->dkz*this->fs->rd->sizes[0]); + + // compute lower and upper bounds + this->lbound = new double[nprocs]; + this->ubound = new double[nprocs]; + double *tbound = new double[nprocs]; + std::fill_n(tbound, nprocs, 0.0); + tbound[this->fs->rd->myrank] = this->fs->rd->starts[0]*this->dz; + MPI_Allreduce( + tbound, + this->lbound, + nprocs, + MPI_DOUBLE, + MPI_SUM, + this->fs->rd->comm); + std::fill_n(tbound, nprocs, 0.0); + tbound[this->fs->rd->myrank] = (this->fs->rd->starts[0] + this->fs->rd->subsizes[0])*this->dz; + MPI_Allreduce( + tbound, + this->ubound, + nprocs, + MPI_DOUBLE, + MPI_SUM, + this->fs->rd->comm); + delete[] tbound; + //for (int r = 0; r<nprocs; r++) + // DEBUG_MSG( + // "lbound[%d] = %lg, ubound[%d] = %lg\n", + // r, this->lbound[r], + // r, this->ubound[r] + // ); +} + +template <int particle_type, class rnumber, bool multistep, int ncomponents, int interp_neighbours, base_polynomial_values compute_beta> +particles<particle_type, rnumber, multistep, ncomponents, interp_neighbours, compute_beta>::~particles() +{ + delete[] this->computing; + delete[] this->watching; + fftw_free(this->state); + for (int i=0; i < this->integration_steps; i++) + { + fftw_free(this->rhs[i]); + } + delete[] this->lbound; + delete[] this->ubound; + delete this->buffered_field_descriptor; +} + +template <int particle_type, class rnumber, bool multistep, int ncomponents, int interp_neighbours, base_polynomial_values compute_beta> +void particles<particle_type, rnumber, multistep, ncomponents, interp_neighbours, compute_beta>::get_rhs(double *x, double *y) +{ + std::fill_n(y, this->array_size, 0.0); + switch(particle_type) + { + case VELOCITY_TRACER: + int deriv[] = {0, 0, 0}; + /* get grid coordinates */ + int *xg = new int[this->array_size]; + double *xx = new double[this->array_size]; + rnumber *vel = this->data + this->buffer_size; + this->get_grid_coordinates(x, xg, xx); + for (int p=0; p<this->nparticles; p++) + { + if (this->watching[this->fs->rd->myrank*this->nparticles+p]) + { + int crank = this->get_rank(x[p*3 + 2]); + if (this->fs->rd->myrank == crank) + { + this->interpolation_formula(vel, xg + p*3, xx + p*3, y + p*3, deriv); + //DEBUG_MSG( + // "position is %g %g %g %d %d %d %g %g %g, result is %g %g %g\n", + // x[p*3], x[p*3+1], x[p*3+2], + // xg[p*3], xg[p*3+1], xg[p*3+2], + // xx[p*3], xx[p*3+1], xx[p*3+2], + // y[p*3], y[p*3+1], y[p*3+2]); + } + if (crank != this->computing[p]) + { + this->synchronize_single_particle_state(p, y, crank); + } + //DEBUG_MSG( + // "after synch crank is %d, computing rank is %d, position is %g %g %g, result is %g %g %g\n", + // this->iteration, p, + // crank, this->computing[p], + // x[p*3], x[p*3+1], x[p*3+2], + // y[p*3], y[p*3+1], y[p*3+2]); + } + } + delete[] xg; + delete[] xx; + break; + } +} + +template <int particle_type, class rnumber, bool multistep, int ncomponents, int interp_neighbours, base_polynomial_values compute_beta> +void particles<particle_type, rnumber, multistep, ncomponents, interp_neighbours, compute_beta>::jump_estimate(double *jump) +{ + std::fill_n(jump, this->nparticles, 0.0); + switch(particle_type) + { + case VELOCITY_TRACER: + int deriv[] = {0, 0, 0}; + int *xg = new int[this->array_size]; + double *xx = new double[this->array_size]; + rnumber *vel = this->data + this->buffer_size; + double tmp[3]; + /* get grid coordinates */ + this->get_grid_coordinates(this->state, xg, xx); + + /* perform interpolation */ + for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) + { + this->interpolation_formula(vel, xg + p*3, xx + p*3, tmp, deriv); + jump[p] = fabs(3*this->dt * tmp[2]); + if (jump[p] < this->dz*1.01) + jump[p] = this->dz*1.01; + } + delete[] xg; + delete[] xx; + break; + } +} + +template <int particle_type, class rnumber, bool multistep, int ncomponents, int interp_neighbours, base_polynomial_values compute_beta> +int particles<particle_type, rnumber, multistep, ncomponents, interp_neighbours, compute_beta>::get_rank(double z) +{ + int tmp = this->fs->rd->rank[MOD(int(floor(z/this->dz)), this->fs->rd->sizes[0])]; + assert(tmp >= 0 && tmp < this->fs->rd->nprocs); + return tmp; +} + +template <int particle_type, class rnumber, bool multistep, int ncomponents, int interp_neighbours, base_polynomial_values compute_beta> +void particles<particle_type, rnumber, multistep, ncomponents, interp_neighbours, compute_beta>::synchronize_single_particle_state(int p, double *x, int source) +{ + if (source == -1) source = this->computing[p]; + if (this->watching[this->fs->rd->myrank*this->nparticles+p]) for (int r=0; r<this->fs->rd->nprocs; r++) + if (r != source && + this->watching[r*this->nparticles+p]) + { + //DEBUG_MSG("synchronizing state %d from %d to %d\n", p, this->computing[p], r); + if (this->fs->rd->myrank == source) + MPI_Send( + x+p*ncomponents, + ncomponents, + MPI_DOUBLE, + r, + p+this->computing[p]*this->nparticles, + this->fs->rd->comm); + if (this->fs->rd->myrank == r) + MPI_Recv( + x+p*ncomponents, + ncomponents, + MPI_DOUBLE, + source, + p+this->computing[p]*this->nparticles, + this->fs->rd->comm, + MPI_STATUS_IGNORE); + } +} + +template <int particle_type, class rnumber, bool multistep, int ncomponents, int interp_neighbours, base_polynomial_values compute_beta> +void particles<particle_type, rnumber, multistep, ncomponents, interp_neighbours, compute_beta>::synchronize() +{ + double *tstate = fftw_alloc_real(this->array_size); + // first, synchronize state and jump across CPUs + std::fill_n(tstate, this->array_size, 0.0); + for (int p=0; p<this->nparticles; p++) + { + //if (this->watching[this->fs->rd->myrank*this->nparticles + p]) + //DEBUG_MSG( + // "in synchronize, position for particle %d is %g %g %g\n", + // p, + // this->state[p*ncomponents], + // this->state[p*ncomponents+1], + // this->state[p*ncomponents+2]); + if (this->fs->rd->myrank == this->computing[p]) + std::copy(this->state + p*ncomponents, + this->state + (p+1)*ncomponents, + tstate + p*ncomponents); + } + MPI_Allreduce( + tstate, + this->state, + this->array_size, + MPI_DOUBLE, + MPI_SUM, + this->fs->rd->comm); + if (this->integration_steps >= 1) + { + for (int i=0; i<this->integration_steps; i++) + { + std::fill_n(tstate, this->array_size, 0.0); + for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) + std::copy(this->rhs[i] + p*ncomponents, + this->rhs[i] + (p+1)*ncomponents, + tstate + p*ncomponents); + std::fill_n(this->rhs[i], this->array_size, 0.0); + MPI_Allreduce( + tstate, + this->rhs[i], + this->array_size, + MPI_DOUBLE, + MPI_SUM, + this->fs->rd->comm); + } + } + fftw_free(tstate); + // assignment of particles + for (int p=0; p<this->nparticles; p++) + { + this->computing[p] = this->get_rank(this->state[p*ncomponents + 2]); + //DEBUG_MSG("synchronizing particles, particle %d computing is %d\n", p, this->computing[p]); + } + double *jump = fftw_alloc_real(this->nparticles); + this->jump_estimate(jump); + // now, see who needs to watch + bool *local_watching = new bool[this->fs->rd->nprocs*this->nparticles]; + std::fill_n(local_watching, this->fs->rd->nprocs*this->nparticles, false); + for (int p=0; p<this->nparticles; p++) + if (this->fs->rd->myrank == this->computing[p]) + { + local_watching[this->get_rank(this->state[ncomponents*p+2] )*this->nparticles+p] = true; + local_watching[this->get_rank(this->state[ncomponents*p+2]-jump[p])*this->nparticles+p] = true; + local_watching[this->get_rank(this->state[ncomponents*p+2]+jump[p])*this->nparticles+p] = true; + } + fftw_free(jump); + MPI_Allreduce( + local_watching, + this->watching, + this->nparticles*this->fs->rd->nprocs, + MPI_C_BOOL, + MPI_LOR, + this->fs->rd->comm); + delete[] local_watching; + for (int p=0; p<this->nparticles; p++) + DEBUG_MSG("watching = %d for particle %d\n", this->watching[this->fs->rd->myrank*nparticles+p], p); +} + + +template <int particle_type, class rnumber, bool multistep, int ncomponents, int interp_neighbours, base_polynomial_values compute_beta> +void particles<particle_type, rnumber, multistep, ncomponents, interp_neighbours, compute_beta>::roll_rhs() +{ + for (int i=this->integration_steps-2; i>=0; i--) + std::copy(this->rhs[i], + this->rhs[i] + this->array_size, + this->rhs[i+1]); +} + + + +template <int particle_type, class rnumber, bool multistep, int ncomponents, int interp_neighbours, base_polynomial_values compute_beta> +void particles<particle_type, rnumber, multistep, ncomponents, interp_neighbours, compute_beta>::AdamsBashforth(int nsteps) +{ + ptrdiff_t ii; + this->get_rhs(this->state, this->rhs[0]); + switch(nsteps) + { + case 1: + for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) + for (int i=0; i<ncomponents; i++) + { + ii = p*ncomponents+i; + this->state[ii] += this->dt*this->rhs[0][ii]; + } + break; + case 2: + for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) + for (int i=0; i<ncomponents; i++) + { + ii = p*ncomponents+i; + this->state[ii] += this->dt*(3*this->rhs[0][ii] + - this->rhs[1][ii])/2; + } + break; + case 3: + for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) + for (int i=0; i<ncomponents; i++) + { + ii = p*ncomponents+i; + this->state[ii] += this->dt*(23*this->rhs[0][ii] + - 16*this->rhs[1][ii] + + 5*this->rhs[2][ii])/12; + } + break; + case 4: + for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) + for (int i=0; i<ncomponents; i++) + { + ii = p*ncomponents+i; + this->state[ii] += this->dt*(55*this->rhs[0][ii] + - 59*this->rhs[1][ii] + + 37*this->rhs[2][ii] + - 9*this->rhs[3][ii])/24; + } + break; + case 5: + for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) + for (int i=0; i<ncomponents; i++) + { + ii = p*ncomponents+i; + this->state[ii] += this->dt*(1901*this->rhs[0][ii] + - 2774*this->rhs[1][ii] + + 2616*this->rhs[2][ii] + - 1274*this->rhs[3][ii] + + 251*this->rhs[4][ii])/720; + } + break; + case 6: + for (int p=0; p<this->nparticles; p++) if (this->fs->rd->myrank == this->computing[p]) + for (int i=0; i<ncomponents; i++) + { + ii = p*ncomponents+i; + this->state[ii] += this->dt*(4277*this->rhs[0][ii] + - 7923*this->rhs[1][ii] + + 9982*this->rhs[2][ii] + - 7298*this->rhs[3][ii] + + 2877*this->rhs[4][ii] + - 475*this->rhs[5][ii])/1440; + } + break; + } + this->roll_rhs(); +} + + +template <int particle_type, class rnumber, bool multistep, int ncomponents, int interp_neighbours, base_polynomial_values compute_beta> +void particles<particle_type, rnumber, multistep, ncomponents, interp_neighbours, compute_beta>::step() +{ + if (multistep) + this->AdamsBashforth((this->iteration < this->integration_steps) ? + this->iteration+1 : + this->integration_steps); + else + this->Heun(); + this->iteration++; + this->synchronize(); +} + + + +template <int particle_type, class rnumber, bool multistep, int ncomponents, int interp_neighbours, base_polynomial_values compute_beta> +void particles<particle_type, rnumber, multistep, ncomponents, interp_neighbours, compute_beta>::Heun() +{ + if (!multistep) + { + double *y = new double[this->array_size]; + double dtfactor[] = {0.0, this->dt}; + this->get_rhs(this->state, this->rhs[0]); + for (int p=0; p<this->nparticles; p++) + this->synchronize_single_particle_state(p, this->rhs[0]); + for (int kindex = 1; kindex < 2; kindex++) + { + for (int p=0; p<this->nparticles; p++) + { + if (this->watching[this->fs->rd->myrank*this->nparticles+p]) + for (int i=0; i<ncomponents; i++) + { + ptrdiff_t tindex = ptrdiff_t(p)*ncomponents + i; + y[tindex] = this->state[tindex] + dtfactor[kindex]*this->rhs[kindex-1][tindex]; + } + } + for (int p=0; p<this->nparticles; p++) + this->synchronize_single_particle_state(p, y); + this->get_rhs(y, this->rhs[kindex]); + for (int p=0; p<this->nparticles; p++) + this->synchronize_single_particle_state(p, this->rhs[kindex]); + } + for (int p=0; p<this->nparticles; p++) + { + if (this->watching[this->fs->rd->myrank*this->nparticles+p]) + { + for (int i=0; i<ncomponents; i++) + { + ptrdiff_t tindex = ptrdiff_t(p)*ncomponents + i; + this->state[tindex] += this->dt*(this->rhs[0][tindex] + this->rhs[1][tindex])/2; + } + } + } + delete[] y; + } +} + + +template <int particle_type, class rnumber, bool multistep, int ncomponents, int interp_neighbours, base_polynomial_values compute_beta> +void particles<particle_type, rnumber, multistep, ncomponents, interp_neighbours, compute_beta>::cRK4() +{ + if (!multistep) + { + double *y = new double[this->array_size]; + double dtfactor[] = {0.0, this->dt/2, this->dt/2, this->dt}; + this->get_rhs(this->state, this->rhs[0]); + for (int p=0; p<this->nparticles; p++) + this->synchronize_single_particle_state(p, this->rhs[0]); + for (int kindex = 1; kindex < 4; kindex++) + { + for (int p=0; p<this->nparticles; p++) + { + if (this->watching[this->fs->rd->myrank*this->nparticles+p]) + for (int i=0; i<ncomponents; i++) + { + ptrdiff_t tindex = ptrdiff_t(p)*ncomponents + i; + y[tindex] = this->state[tindex] + dtfactor[kindex]*this->rhs[kindex-1][tindex]; + } + } + for (int p=0; p<this->nparticles; p++) + this->synchronize_single_particle_state(p, y); + this->get_rhs(y, this->rhs[kindex]); + for (int p=0; p<this->nparticles; p++) + this->synchronize_single_particle_state(p, this->rhs[kindex]); + } + for (int p=0; p<this->nparticles; p++) + { + if (this->watching[this->fs->rd->myrank*this->nparticles+p]) + for (int i=0; i<ncomponents; i++) + { + ptrdiff_t tindex = ptrdiff_t(p)*ncomponents + i; + this->state[tindex] += this->dt*(this->rhs[0][tindex] + + 2*(this->rhs[1][tindex] + this->rhs[2][tindex]) + + this->rhs[3][tindex])/6; + } + } + delete[] y; + } +} + +template <int particle_type, class rnumber, bool multistep, int ncomponents, int interp_neighbours, base_polynomial_values compute_beta> +void particles<particle_type, rnumber, multistep, ncomponents, interp_neighbours, compute_beta>::get_grid_coordinates(double *x, int *xg, double *xx) +{ + static double grid_size[] = {this->dx, this->dy, this->dz}; + double tval; + std::fill_n(xg, this->nparticles*3, 0); + std::fill_n(xx, this->nparticles*3, 0.0); + for (int p=0; p<this->nparticles; p++) if (this->watching[this->fs->rd->myrank*this->nparticles+p]) + { + for (int c=0; c<3; c++) + { + tval = floor(x[p*ncomponents+c]/grid_size[c]); + xg[p*3+c] = MOD(int(tval), this->fs->rd->sizes[2-c]); + xx[p*3+c] = (x[p*ncomponents+c] - tval*grid_size[c]) / grid_size[c]; + } + xg[p*3+2] -= this->fs->rd->starts[0]; + if (this->fs->rd->myrank == this->fs->rd->rank[0] && + xg[p*3+2] > this->fs->rd->subsizes[0]) + xg[p*3+2] -= this->fs->rd->sizes[0]; + //DEBUG_MSG( + // "particle %d x is %lg %lg %lg xx is %lg %lg %lg xg is %d %d %d\n", + // p, + // x[p*3], x[p*3+1], x[p*3+2], + // xx[p*3], xx[p*3+1], xx[p*3+2], + // xg[p*3], xg[p*3+1], xg[p*3+2]); + } +} + +template <int particle_type, class rnumber, bool multistep, int ncomponents, int interp_neighbours, base_polynomial_values compute_beta> +void particles<particle_type, rnumber, multistep, ncomponents, interp_neighbours, compute_beta>::interpolation_formula(rnumber *field, int *xg, double *xx, double *dest, int *deriv) +{ + double bx[interp_neighbours*2+2], by[interp_neighbours*2+2], bz[interp_neighbours*2+2]; + compute_beta(deriv[0], xx[0], bx); + compute_beta(deriv[1], xx[1], by); + compute_beta(deriv[2], xx[2], bz); + std::fill_n(dest, 3, 0); + for (int iz = -interp_neighbours; iz <= interp_neighbours+1; iz++) + for (int iy = -interp_neighbours; iy <= interp_neighbours+1; iy++) + for (int ix = -interp_neighbours; ix <= interp_neighbours+1; ix++) + for (int c=0; c<3; c++) + dest[c] += field[((ptrdiff_t( xg[2]+iz ) *this->fs->rd->subsizes[1] + + ptrdiff_t(MOD(xg[1]+iy, this->fs->rd->sizes[1])))*this->fs->rd->subsizes[2] + + ptrdiff_t(MOD(xg[0]+ix, this->fs->rd->sizes[2])))*3+c]*(bz[iz+interp_neighbours]* + by[iy+interp_neighbours]* + bx[ix+interp_neighbours]); +} + +template <int particle_type, class rnumber, bool multistep, int ncomponents, int interp_neighbours, base_polynomial_values compute_beta> +void particles<particle_type, rnumber, multistep, ncomponents, interp_neighbours, compute_beta>::read(hid_t data_file_id) +{ + //DEBUG_MSG("aloha\n"); + if (this->fs->rd->myrank == 0) + { + std::string temp_string = (std::string("/particles/") + + std::string(this->name) + + std::string("/state")); + hid_t Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT); + hid_t mspace, rspace; + hsize_t count[4], offset[4]; + rspace = H5Dget_space(Cdset); + H5Sget_simple_extent_dims(rspace, count, NULL); + count[0] = 1; + offset[0] = this->iteration / this->traj_skip; + offset[1] = 0; + offset[2] = 0; + mspace = H5Screate_simple(3, count, NULL); + H5Sselect_hyperslab(rspace, H5S_SELECT_SET, offset, NULL, count, NULL); + H5Dread(Cdset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, this->state); + H5Sclose(mspace); + H5Sclose(rspace); + H5Dclose(Cdset); + if (this->iteration > 0) + { + temp_string = (std::string("/particles/") + + std::string(this->name) + + std::string("/rhs")); + Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT); + rspace = H5Dget_space(Cdset); + H5Sget_simple_extent_dims(rspace, count, NULL); + //reading from last available position + offset[0] = count[0] - 1; + offset[3] = 0; + count[0] = 1; + count[1] = 1; + mspace = H5Screate_simple(4, count, NULL); + for (int i=0; i<this->integration_steps; i++) + { + offset[1] = i; + H5Sselect_hyperslab(rspace, H5S_SELECT_SET, offset, NULL, count, NULL); + H5Dread(Cdset, H5T_NATIVE_DOUBLE, mspace, rspace, H5P_DEFAULT, this->rhs[i]); + } + H5Sclose(mspace); + H5Sclose(rspace); + H5Dclose(Cdset); + } + } + MPI_Bcast( + this->state, + this->array_size, + MPI_DOUBLE, + 0, + this->fs->rd->comm); + for (int i = 0; i<this->integration_steps; i++) + { + MPI_Bcast( + this->rhs[i], + this->array_size, + MPI_DOUBLE, + 0, + this->fs->rd->comm); + } + // initial assignment of particles + for (int p=0; p<this->nparticles; p++) + { + this->computing[p] = this->get_rank(this->state[p*ncomponents + 2]); + //DEBUG_MSG("reading particles, particle %d computing is %d\n", p, this->computing[p]); + } + // now actual synchronization + this->synchronize(); +} + +template <int particle_type, class rnumber, bool multistep, int ncomponents, int interp_neighbours, base_polynomial_values compute_beta> +void particles<particle_type, rnumber, multistep, ncomponents, interp_neighbours, compute_beta>::write(hid_t data_file_id, bool write_rhs) +{ + if (this->fs->rd->myrank == 0) + { + std::string temp_string = (std::string("/particles/") + + std::string(this->name) + + std::string("/state")); + hid_t Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT); + hid_t mspace, wspace; + hsize_t count[4], offset[4]; + wspace = H5Dget_space(Cdset); + H5Sget_simple_extent_dims(wspace, count, NULL); + count[0] = 1; + offset[0] = this->iteration / this->traj_skip; + offset[1] = 0; + offset[2] = 0; + mspace = H5Screate_simple(3, count, NULL); + H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL); + H5Dwrite(Cdset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, this->state); + H5Sclose(mspace); + H5Sclose(wspace); + H5Dclose(Cdset); + if (write_rhs) + { + temp_string = (std::string("/particles/") + + std::string(this->name) + + std::string("/rhs")); + Cdset = H5Dopen(data_file_id, temp_string.c_str(), H5P_DEFAULT); + wspace = H5Dget_space(Cdset); + H5Sget_simple_extent_dims(wspace, count, NULL); + //writing to last available position + offset[0] = count[0] - 1; + count[0] = 1; + count[1] = 1; + offset[3] = 0; + mspace = H5Screate_simple(4, count, NULL); + for (int i=0; i<this->integration_steps; i++) + { + offset[1] = i; + H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL); + H5Dwrite(Cdset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, this->rhs[i]); + } + H5Sclose(mspace); + H5Sclose(wspace); + H5Dclose(Cdset); + } + } +} + + + +/*****************************************************************************/ +/* macro for specializations to numeric types compatible with FFTW */ +#define PARTICLES_DEFINITIONS(R, MPI_RNUM) \ + \ +template <int particle_type, class rnumber, bool multistep, int ncomponents, int interp_neighbours, base_polynomial_values compute_beta> \ +void particles<particle_type, rnumber, multistep, ncomponents, interp_neighbours, compute_beta>::rFFTW_to_buffered(R *src, R *dst) \ +{ \ + /* do big copy of middle stuff */ \ + std::copy(src, \ + src + this->fs->rd->local_size, \ + dst + this->buffer_size); \ + int rsrc; \ + /* get upper slices */ \ + for (int rdst = 0; rdst < this->fs->rd->nprocs; rdst++) \ + { \ + rsrc = this->fs->rd->rank[(this->fs->rd->all_start0[rdst] + \ + this->fs->rd->all_size0[rdst]) % \ + this->fs->rd->sizes[0]]; \ + if (this->fs->rd->myrank == rsrc) \ + MPI_Send( \ + (void*)(src), \ + this->buffer_size, \ + MPI_RNUM, \ + rdst, \ + 2*(rsrc*this->fs->rd->nprocs + rdst), \ + this->fs->rd->comm); \ + if (this->fs->rd->myrank == rdst) \ + MPI_Recv( \ + (void*)(dst + this->buffer_size + this->fs->rd->local_size), \ + this->buffer_size, \ + MPI_RNUM, \ + rsrc, \ + 2*(rsrc*this->fs->rd->nprocs + rdst), \ + this->fs->rd->comm, \ + MPI_STATUS_IGNORE); \ + } \ + /* get lower slices */ \ + for (int rdst = 0; rdst < this->fs->rd->nprocs; rdst++) \ + { \ + rsrc = this->fs->rd->rank[MOD(this->fs->rd->all_start0[rdst] - 1, \ + this->fs->rd->sizes[0])]; \ + if (this->fs->rd->myrank == rsrc) \ + MPI_Send( \ + (void*)(src + this->fs->rd->local_size - this->buffer_size), \ + this->buffer_size, \ + MPI_RNUM, \ + rdst, \ + 2*(rsrc*this->fs->rd->nprocs + rdst)+1, \ + this->fs->rd->comm); \ + if (this->fs->rd->myrank == rdst) \ + MPI_Recv( \ + (void*)(dst), \ + this->buffer_size, \ + MPI_RNUM, \ + rsrc, \ + 2*(rsrc*this->fs->rd->nprocs + rdst)+1, \ + this->fs->rd->comm, \ + MPI_STATUS_IGNORE); \ + } \ +} \ +/*****************************************************************************/ + + + +/*****************************************************************************/ +/* now actually use the macro defined above */ +PARTICLES_DEFINITIONS( + float, + MPI_FLOAT) +PARTICLES_DEFINITIONS( + double, + MPI_DOUBLE) +/*****************************************************************************/ + + + +/*****************************************************************************/ +template class particles<VELOCITY_TRACER, float, true, 3, 1, beta_n1_m0>; +template class particles<VELOCITY_TRACER, float, true, 3, 1, beta_n1_m1>; +template class particles<VELOCITY_TRACER, float, true, 3, 1, beta_n1_m2>; +template class particles<VELOCITY_TRACER, float, true, 3, 2, beta_n2_m0>; +template class particles<VELOCITY_TRACER, float, true, 3, 2, beta_n2_m1>; +template class particles<VELOCITY_TRACER, float, true, 3, 2, beta_n2_m2>; +template class particles<VELOCITY_TRACER, float, true, 3, 3, beta_n3_m0>; +template class particles<VELOCITY_TRACER, float, true, 3, 3, beta_n3_m1>; +template class particles<VELOCITY_TRACER, float, true, 3, 3, beta_n3_m2>; +template class particles<VELOCITY_TRACER, float, true, 3, 4, beta_n4_m0>; +template class particles<VELOCITY_TRACER, float, true, 3, 4, beta_n4_m1>; +template class particles<VELOCITY_TRACER, float, true, 3, 4, beta_n4_m2>; +template class particles<VELOCITY_TRACER, float, true, 3, 4, beta_n4_m3>; +template class particles<VELOCITY_TRACER, float, true, 3, 5, beta_n5_m0>; +template class particles<VELOCITY_TRACER, float, true, 3, 5, beta_n5_m1>; +template class particles<VELOCITY_TRACER, float, true, 3, 5, beta_n5_m2>; +template class particles<VELOCITY_TRACER, float, true, 3, 5, beta_n5_m3>; +template class particles<VELOCITY_TRACER, float, true, 3, 5, beta_n5_m4>; +template class particles<VELOCITY_TRACER, float, true, 3, 6, beta_n6_m0>; +template class particles<VELOCITY_TRACER, float, true, 3, 6, beta_n6_m1>; +template class particles<VELOCITY_TRACER, float, true, 3, 6, beta_n6_m2>; +template class particles<VELOCITY_TRACER, float, true, 3, 6, beta_n6_m3>; +template class particles<VELOCITY_TRACER, float, true, 3, 6, beta_n6_m4>; +template class particles<VELOCITY_TRACER, float, false, 3, 1, beta_n1_m0>; +template class particles<VELOCITY_TRACER, float, false, 3, 1, beta_n1_m1>; +template class particles<VELOCITY_TRACER, float, false, 3, 1, beta_n1_m2>; +template class particles<VELOCITY_TRACER, float, false, 3, 2, beta_n2_m0>; +template class particles<VELOCITY_TRACER, float, false, 3, 2, beta_n2_m1>; +template class particles<VELOCITY_TRACER, float, false, 3, 2, beta_n2_m2>; +template class particles<VELOCITY_TRACER, float, false, 3, 3, beta_n3_m0>; +template class particles<VELOCITY_TRACER, float, false, 3, 3, beta_n3_m1>; +template class particles<VELOCITY_TRACER, float, false, 3, 3, beta_n3_m2>; +template class particles<VELOCITY_TRACER, float, false, 3, 4, beta_n4_m0>; +template class particles<VELOCITY_TRACER, float, false, 3, 4, beta_n4_m1>; +template class particles<VELOCITY_TRACER, float, false, 3, 4, beta_n4_m2>; +template class particles<VELOCITY_TRACER, float, false, 3, 4, beta_n4_m3>; +template class particles<VELOCITY_TRACER, float, false, 3, 5, beta_n5_m0>; +template class particles<VELOCITY_TRACER, float, false, 3, 5, beta_n5_m1>; +template class particles<VELOCITY_TRACER, float, false, 3, 5, beta_n5_m2>; +template class particles<VELOCITY_TRACER, float, false, 3, 5, beta_n5_m3>; +template class particles<VELOCITY_TRACER, float, false, 3, 5, beta_n5_m4>; +template class particles<VELOCITY_TRACER, float, false, 3, 6, beta_n6_m0>; +template class particles<VELOCITY_TRACER, float, false, 3, 6, beta_n6_m1>; +template class particles<VELOCITY_TRACER, float, false, 3, 6, beta_n6_m2>; +template class particles<VELOCITY_TRACER, float, false, 3, 6, beta_n6_m3>; +template class particles<VELOCITY_TRACER, float, false, 3, 6, beta_n6_m4>; +template class particles<VELOCITY_TRACER, double, true, 3, 1, beta_n1_m0>; +template class particles<VELOCITY_TRACER, double, true, 3, 1, beta_n1_m1>; +template class particles<VELOCITY_TRACER, double, true, 3, 1, beta_n1_m2>; +template class particles<VELOCITY_TRACER, double, true, 3, 2, beta_n2_m0>; +template class particles<VELOCITY_TRACER, double, true, 3, 2, beta_n2_m1>; +template class particles<VELOCITY_TRACER, double, true, 3, 2, beta_n2_m2>; +template class particles<VELOCITY_TRACER, double, true, 3, 3, beta_n3_m0>; +template class particles<VELOCITY_TRACER, double, true, 3, 3, beta_n3_m1>; +template class particles<VELOCITY_TRACER, double, true, 3, 3, beta_n3_m2>; +template class particles<VELOCITY_TRACER, double, true, 3, 4, beta_n4_m0>; +template class particles<VELOCITY_TRACER, double, true, 3, 4, beta_n4_m1>; +template class particles<VELOCITY_TRACER, double, true, 3, 4, beta_n4_m2>; +template class particles<VELOCITY_TRACER, double, true, 3, 4, beta_n4_m3>; +template class particles<VELOCITY_TRACER, double, true, 3, 5, beta_n5_m0>; +template class particles<VELOCITY_TRACER, double, true, 3, 5, beta_n5_m1>; +template class particles<VELOCITY_TRACER, double, true, 3, 5, beta_n5_m2>; +template class particles<VELOCITY_TRACER, double, true, 3, 5, beta_n5_m3>; +template class particles<VELOCITY_TRACER, double, true, 3, 5, beta_n5_m4>; +template class particles<VELOCITY_TRACER, double, true, 3, 6, beta_n6_m0>; +template class particles<VELOCITY_TRACER, double, true, 3, 6, beta_n6_m1>; +template class particles<VELOCITY_TRACER, double, true, 3, 6, beta_n6_m2>; +template class particles<VELOCITY_TRACER, double, true, 3, 6, beta_n6_m3>; +template class particles<VELOCITY_TRACER, double, true, 3, 6, beta_n6_m4>; +template class particles<VELOCITY_TRACER, double, false, 3, 1, beta_n1_m0>; +template class particles<VELOCITY_TRACER, double, false, 3, 1, beta_n1_m1>; +template class particles<VELOCITY_TRACER, double, false, 3, 1, beta_n1_m2>; +template class particles<VELOCITY_TRACER, double, false, 3, 2, beta_n2_m0>; +template class particles<VELOCITY_TRACER, double, false, 3, 2, beta_n2_m1>; +template class particles<VELOCITY_TRACER, double, false, 3, 2, beta_n2_m2>; +template class particles<VELOCITY_TRACER, double, false, 3, 3, beta_n3_m0>; +template class particles<VELOCITY_TRACER, double, false, 3, 3, beta_n3_m1>; +template class particles<VELOCITY_TRACER, double, false, 3, 3, beta_n3_m2>; +template class particles<VELOCITY_TRACER, double, false, 3, 4, beta_n4_m0>; +template class particles<VELOCITY_TRACER, double, false, 3, 4, beta_n4_m1>; +template class particles<VELOCITY_TRACER, double, false, 3, 4, beta_n4_m2>; +template class particles<VELOCITY_TRACER, double, false, 3, 4, beta_n4_m3>; +template class particles<VELOCITY_TRACER, double, false, 3, 5, beta_n5_m0>; +template class particles<VELOCITY_TRACER, double, false, 3, 5, beta_n5_m1>; +template class particles<VELOCITY_TRACER, double, false, 3, 5, beta_n5_m2>; +template class particles<VELOCITY_TRACER, double, false, 3, 5, beta_n5_m3>; +template class particles<VELOCITY_TRACER, double, false, 3, 5, beta_n5_m4>; +template class particles<VELOCITY_TRACER, double, false, 3, 6, beta_n6_m0>; +template class particles<VELOCITY_TRACER, double, false, 3, 6, beta_n6_m1>; +template class particles<VELOCITY_TRACER, double, false, 3, 6, beta_n6_m2>; +template class particles<VELOCITY_TRACER, double, false, 3, 6, beta_n6_m3>; +template class particles<VELOCITY_TRACER, double, false, 3, 6, beta_n6_m4>; +/*****************************************************************************/ diff --git a/bfps/cpp/particles.hpp b/bfps/cpp/particles.hpp new file mode 100644 index 0000000000000000000000000000000000000000..24177694614605dc29a6add7132adeb8f5987efc --- /dev/null +++ b/bfps/cpp/particles.hpp @@ -0,0 +1,144 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + + + +#include <stdio.h> +#include <stdlib.h> +#include <iostream> +#include <hdf5.h> +#include "base.hpp" +#include "fluid_solver_base.hpp" +#include "spline_n1.hpp" +#include "spline_n2.hpp" +#include "spline_n3.hpp" +#include "spline_n4.hpp" +#include "spline_n5.hpp" +#include "spline_n6.hpp" +#include "Lagrange_polys.hpp" + +#ifndef PARTICLES + +#define PARTICLES + +extern int myrank, nprocs; + +typedef void (*base_polynomial_values)( + int derivative, + double fraction, + double *destination); + +/* particle types */ +enum particle_types {VELOCITY_TRACER}; + +template <int particle_type, class rnumber, bool multistep, int ncomponents, int interp_neighbours, base_polynomial_values compute_beta> +class particles +{ + public: + fluid_solver_base<rnumber> *fs; + field_descriptor<rnumber> *buffered_field_descriptor; + rnumber *data; + + /* watching is an array of shape [nparticles], with + * watching[p] being true if particle p is in the domain of myrank + * or in the buffer regions. + * only used if multistep is false. + * */ + bool *watching; + /* computing is an array of shape [nparticles], with + * computing[p] being the rank that is currently working on particle p + * */ + int *computing; + + /* state will generally hold all the information about the particles. + * in the beginning, we will only need to solve 3D ODEs, but I figured + * a general ncomponents is better, since we may change our minds. + * */ + double *state; + double *rhs[6]; + int nparticles; + int array_size; + int integration_steps; + int traj_skip; + int buffer_width; + ptrdiff_t buffer_size; + double *lbound; + double *ubound; + + /* simulation parameters */ + char name[256]; + int iteration; + double dt; + + /* physical parameters of field */ + double dx, dy, dz; + + /* methods */ + + /* constructor and destructor. + * allocate and deallocate: + * this->state + * this->rhs + * this->lbound + * this->ubound + * this->computing + * this->watching + * */ + particles( + const char *NAME, + fluid_solver_base<rnumber> *FSOLVER, + const int NPARTICLES, + const int TRAJ_SKIP, + const int INTEGRATION_STEPS = 2); + ~particles(); + void rFFTW_to_buffered(float *src, float *dst); + void rFFTW_to_buffered(double *src, double *dst); + + /* an Euler step is needed to compute an estimate of future positions, + * which is needed for synchronization. + * */ + void jump_estimate(double *jump_length); + void get_rhs(double *x, double *rhs); + + int get_rank(double z); // get rank for given value of z + void synchronize(); + void synchronize_single_particle_state(int p, double *x, int source_id = -1); + void get_grid_coordinates(double *x, int *xg, double *xx); + void interpolation_formula(rnumber *field, int *xg, double *xx, double *dest, int *deriv); + + + /* input/output */ + void read(hid_t data_file_id); + void write(hid_t data_file_id, bool write_rhs = true); + + /* solvers */ + void step(); + void roll_rhs(); + void AdamsBashforth(int nsteps); + void Heun(); + void cRK4(); +}; + +#endif//PARTICLES + diff --git a/setup.py b/setup.py index 6cb873558327c7e84d8223b98a16c08e4fda6eb6..fb9759418205c1ec9a912f8a78cb9d348dcac6cc 100644 --- a/setup.py +++ b/setup.py @@ -50,6 +50,7 @@ src_file_list = ['field_descriptor', 'fluid_solver_base', 'fluid_solver', 'slab_field_particles', + 'particles', 'tracers', 'spline_n1', 'spline_n2',