From f40ecb767b240cda3c9958b3f0d67b19121ece1b Mon Sep 17 00:00:00 2001 From: Cristian C Lalescu <Cristian.Lalescu@ds.mpg.de> Date: Sun, 7 Feb 2016 23:31:51 +0100 Subject: [PATCH] update buffered interpolator --- bfps/cpp/interpolator.cpp | 120 +++++++++++++++++++++----------------- bfps/cpp/interpolator.hpp | 23 +------- 2 files changed, 70 insertions(+), 73 deletions(-) diff --git a/bfps/cpp/interpolator.cpp b/bfps/cpp/interpolator.cpp index cf04af66..43e91339 100644 --- a/bfps/cpp/interpolator.cpp +++ b/bfps/cpp/interpolator.cpp @@ -24,36 +24,34 @@ +#define NDEBUG + #include "interpolator.hpp" template <class rnumber, int interp_neighbours> interpolator<rnumber, interp_neighbours>::interpolator( fluid_solver_base<rnumber> *fs, - base_polynomial_values BETA_POLYS) + base_polynomial_values BETA_POLYS) : interpolator_base<rnumber, interp_neighbours>(fs, BETA_POLYS) { int tdims[4]; - this->unbuffered_descriptor = fs->rd; - this->buffer_size = (interp_neighbours+1)*this->unbuffered_descriptor->slice_size; + this->buffer_size = (interp_neighbours+1)*this->descriptor->slice_size; this->compute_beta = BETA_POLYS; - tdims[0] = (interp_neighbours+1)*2*this->unbuffered_descriptor->nprocs + this->unbuffered_descriptor->sizes[0]; - tdims[1] = this->unbuffered_descriptor->sizes[1]; - tdims[2] = this->unbuffered_descriptor->sizes[2]; - tdims[3] = this->unbuffered_descriptor->sizes[3]; - this->descriptor = new field_descriptor<rnumber>( + tdims[0] = (interp_neighbours+1)*2*this->descriptor->nprocs + this->descriptor->sizes[0]; + tdims[1] = this->descriptor->sizes[1]; + tdims[2] = this->descriptor->sizes[2]; + tdims[3] = this->descriptor->sizes[3]; + this->buffered_descriptor = new field_descriptor<rnumber>( 4, tdims, - this->unbuffered_descriptor->mpi_dtype, - this->unbuffered_descriptor->comm); + this->descriptor->mpi_dtype, + this->descriptor->comm); if (sizeof(rnumber) == 4) { - this->f0 = (rnumber*)((void*)fftwf_alloc_real(this->descriptor->local_size)); - this->f1 = (rnumber*)((void*)fftwf_alloc_real(this->descriptor->local_size)); + this->field = (rnumber*)((void*)fftwf_alloc_real(this->descriptor->local_size)); } else if (sizeof(rnumber) == 8) { - this->f0 = (rnumber*)((void*)fftw_alloc_real(this->descriptor->local_size)); - this->f1 = (rnumber*)((void*)fftw_alloc_real(this->descriptor->local_size)); + this->field = (rnumber*)((void*)fftw_alloc_real(this->descriptor->local_size)); } - this->temp = this->f1 + this->buffer_size; } template <class rnumber, int interp_neighbours> @@ -61,80 +59,73 @@ interpolator<rnumber, interp_neighbours>::~interpolator() { if (sizeof(rnumber) == 4) { - fftwf_free((float*)((void*)this->f0)); - fftwf_free((float*)((void*)this->f1)); + fftwf_free((float*)((void*)this->field)); } else if (sizeof(rnumber) == 8) { - fftw_free((double*)((void*)this->f0)); - fftw_free((double*)((void*)this->f1)); + fftw_free((double*)((void*)this->field)); } - delete this->descriptor; + delete this->buffered_descriptor; } template <class rnumber, int interp_neighbours> int interpolator<rnumber, interp_neighbours>::read_rFFTW(void *void_src) { - /* first, roll fields */ - rnumber *tmp = this->f0; - this->f0 = this->f1; - this->f1 = tmp; - this->temp = this->f0 + this->buffer_size; /* now do regular things */ rnumber *src = (rnumber*)void_src; - rnumber *dst = this->f1; + rnumber *dst = this->field; /* do big copy of middle stuff */ - clip_zero_padding<rnumber>(this->unbuffered_descriptor, src, 3); + clip_zero_padding<rnumber>(this->descriptor, src, 3); std::copy(src, - src + this->unbuffered_descriptor->local_size, + src + this->descriptor->local_size, dst + this->buffer_size); MPI_Datatype MPI_RNUM = (sizeof(rnumber) == 4) ? MPI_FLOAT : MPI_DOUBLE; int rsrc; /* get upper slices */ - for (int rdst = 0; rdst < this->unbuffered_descriptor->nprocs; rdst++) + for (int rdst = 0; rdst < this->descriptor->nprocs; rdst++) { - rsrc = this->unbuffered_descriptor->rank[(this->unbuffered_descriptor->all_start0[rdst] + - this->unbuffered_descriptor->all_size0[rdst]) % - this->unbuffered_descriptor->sizes[0]]; - if (this->unbuffered_descriptor->myrank == rsrc) + rsrc = this->descriptor->rank[(this->descriptor->all_start0[rdst] + + this->descriptor->all_size0[rdst]) % + this->descriptor->sizes[0]]; + if (this->descriptor->myrank == rsrc) MPI_Send( src, this->buffer_size, MPI_RNUM, rdst, - 2*(rsrc*this->unbuffered_descriptor->nprocs + rdst), - this->descriptor->comm); - if (this->unbuffered_descriptor->myrank == rdst) + 2*(rsrc*this->descriptor->nprocs + rdst), + this->buffered_descriptor->comm); + if (this->descriptor->myrank == rdst) MPI_Recv( - dst + this->buffer_size + this->unbuffered_descriptor->local_size, + dst + this->buffer_size + this->descriptor->local_size, this->buffer_size, MPI_RNUM, rsrc, - 2*(rsrc*this->unbuffered_descriptor->nprocs + rdst), - this->descriptor->comm, + 2*(rsrc*this->descriptor->nprocs + rdst), + this->buffered_descriptor->comm, MPI_STATUS_IGNORE); } /* get lower slices */ - for (int rdst = 0; rdst < this->unbuffered_descriptor->nprocs; rdst++) + for (int rdst = 0; rdst < this->descriptor->nprocs; rdst++) { - rsrc = this->unbuffered_descriptor->rank[MOD(this->unbuffered_descriptor->all_start0[rdst] - 1, - this->unbuffered_descriptor->sizes[0])]; - if (this->unbuffered_descriptor->myrank == rsrc) + rsrc = this->descriptor->rank[MOD(this->descriptor->all_start0[rdst] - 1, + this->descriptor->sizes[0])]; + if (this->descriptor->myrank == rsrc) MPI_Send( - src + this->unbuffered_descriptor->local_size - this->buffer_size, + src + this->descriptor->local_size - this->buffer_size, this->buffer_size, MPI_RNUM, rdst, - 2*(rsrc*this->unbuffered_descriptor->nprocs + rdst)+1, - this->unbuffered_descriptor->comm); - if (this->unbuffered_descriptor->myrank == rdst) + 2*(rsrc*this->descriptor->nprocs + rdst)+1, + this->descriptor->comm); + if (this->descriptor->myrank == rdst) MPI_Recv( dst, this->buffer_size, MPI_RNUM, rsrc, - 2*(rsrc*this->unbuffered_descriptor->nprocs + rdst)+1, - this->unbuffered_descriptor->comm, + 2*(rsrc*this->descriptor->nprocs + rdst)+1, + this->descriptor->comm, MPI_STATUS_IGNORE); } return EXIT_SUCCESS; @@ -170,14 +161,14 @@ void interpolator<rnumber, interp_neighbours>::operator()( std::fill_n(tval, 3, 0); for (int iy = -interp_neighbours; iy <= interp_neighbours+1; iy++) { - bigiy = ptrdiff_t(MOD(xg[1]+iy, this->descriptor->sizes[1])); + bigiy = ptrdiff_t(MOD(xg[1]+iy, this->buffered_descriptor->sizes[1])); for (int ix = -interp_neighbours; ix <= interp_neighbours+1; ix++) { - bigix = ptrdiff_t(MOD(xg[0]+ix, this->descriptor->sizes[2])); + bigix = ptrdiff_t(MOD(xg[0]+ix, this->buffered_descriptor->sizes[2])); for (int c=0; c<3; c++) { - ptrdiff_t tindex = ((bigiz *this->descriptor->sizes[1] + - bigiy)*this->descriptor->sizes[2] + + ptrdiff_t tindex = ((bigiz *this->buffered_descriptor->sizes[1] + + bigiy)*this->buffered_descriptor->sizes[2] + bigix)*3+c + this->buffer_size; dest[c] += (this->f0[tindex]*(1-t) + t*this->f1[tindex])*(bz[iz+interp_neighbours]* by[iy+interp_neighbours]* @@ -192,6 +183,29 @@ void interpolator<rnumber, interp_neighbours>::operator()( } } +template <class rnumber, int interp_neighbours> +void interpolator<rnumber, interp_neighbours>::sample( + const int nparticles, + const int pdimension, + const double *__restrict__ x, + double *__restrict__ y, + const int *deriv) +{ + /* get grid coordinates */ + int *xg = new int[3*nparticles]; + double *xx = new double[3*nparticles]; + double *yy = new double[3*nparticles]; + std::fill_n(yy, 3*nparticles, 0.0); + this->get_grid_coordinates(nparticles, pdimension, x, xg, xx); + /* perform interpolation */ + for (int p=0; p<nparticles; p++) + if (this->descriptor->zrank[MOD(xg[p*3+2], this->descriptor->sizes[0])] == this->descriptor->myrank) + this->operator()(xg + p*3, xx + p*3, yy + p*3, deriv); + delete[] yy; + delete[] xg; + delete[] xx; +} + template class interpolator<float, 1>; template class interpolator<float, 2>; template class interpolator<float, 3>; diff --git a/bfps/cpp/interpolator.hpp b/bfps/cpp/interpolator.hpp index 739defcf..775c4ce9 100644 --- a/bfps/cpp/interpolator.hpp +++ b/bfps/cpp/interpolator.hpp @@ -34,42 +34,25 @@ #define INTERPOLATOR template <class rnumber, int interp_neighbours> -class interpolator +class interpolator:public interpolator_base<rnumber, interp_neighbours> { public: ptrdiff_t buffer_size; - /* pointer to polynomial function */ - base_polynomial_values compute_beta; - - /* descriptor of field to interpolate */ - field_descriptor<rnumber> *descriptor; - /* descriptor for buffered field */ - field_descriptor<rnumber> *unbuffered_descriptor; + field_descriptor<rnumber> *buffered_descriptor; /* pointer to buffered field */ rnumber *field; - /* physical parameters of field */ - double dx, dy, dz; - interpolator( fluid_solver_base<rnumber> *FSOLVER, - base_polynomial_values BETA_POLYS, - rnumber *FIELD_DATA); + base_polynomial_values BETA_POLYS); ~interpolator(); /* destroys input */ int read_rFFTW(void *src); - /* map real locations to grid coordinates */ - void get_grid_coordinates( - const int nparticles, - const int pdimension, - const double *__restrict__ x, - int *__restrict__ xg, - double *__restrict__ xx); /* interpolate field at an array of locations */ void sample( const int nparticles, -- GitLab