From e8a0dbacd60556adea7287f2b97ddd117503c576 Mon Sep 17 00:00:00 2001 From: Chichi Lalescu <Cristian.Lalescu@ds.mpg.de> Date: Sat, 26 Dec 2015 17:55:58 +0100 Subject: [PATCH] preliminary optimization of interpolation --- bfps/cpp/rFFTW_interpolator.cpp | 48 +++++++++------------------------ bfps/cpp/rFFTW_interpolator.hpp | 7 ++--- done.txt | 2 ++ todo.txt | 1 - 4 files changed, 19 insertions(+), 39 deletions(-) diff --git a/bfps/cpp/rFFTW_interpolator.cpp b/bfps/cpp/rFFTW_interpolator.cpp index be8546f6..8310be04 100644 --- a/bfps/cpp/rFFTW_interpolator.cpp +++ b/bfps/cpp/rFFTW_interpolator.cpp @@ -43,39 +43,17 @@ rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator( this->field = (rnumber*)((void*)fftw_alloc_real(this->field_size)); // compute dx, dy, dz; - this->dx = 4*acos(0) / (fs->dkx*fs->rd->sizes[2]); - this->dy = 4*acos(0) / (fs->dky*fs->rd->sizes[1]); - this->dz = 4*acos(0) / (fs->dkz*fs->rd->sizes[0]); + this->dx = 4*acos(0) / (fs->dkx*this->descriptor->sizes[2]); + this->dy = 4*acos(0) / (fs->dky*this->descriptor->sizes[1]); + this->dz = 4*acos(0) / (fs->dkz*this->descriptor->sizes[0]); - // compute lower and upper bounds - this->lbound = new double[nprocs]; - this->ubound = new double[nprocs]; - double *tbound = new double[nprocs]; - std::fill_n(tbound, nprocs, 0.0); - tbound[this->descriptor->myrank] = fs->rd->starts[0]*this->dz; - MPI_Allreduce( - tbound, - this->lbound, - nprocs, - MPI_DOUBLE, - MPI_SUM, - this->descriptor->comm); - std::fill_n(tbound, this->descriptor->nprocs, 0.0); - tbound[this->descriptor->myrank] = (fs->rd->starts[0] + fs->rd->subsizes[0])*this->dz; - MPI_Allreduce( - tbound, - this->ubound, - nprocs, - MPI_DOUBLE, - MPI_SUM, - this->descriptor->comm); - delete[] tbound; - //for (int r = 0; r<nprocs; r++) - // DEBUG_MSG( - // "lbound[%d] = %lg, ubound[%d] = %lg\n", - // r, this->lbound[r], - // r, this->ubound[r] - // ); + // generate compute array + this->compute = new bool[this->descriptor->sizes[0]]; + std::fill_n(this->compute, this->descriptor->sizes[0], false); + for (int iz = this->descriptor->starts[0]-interp_neighbours-1; + iz <= this->descriptor->starts[0]+this->descriptor->subsizes[0]+interp_neighbours+1; + iz++) + this->compute[((iz + this->descriptor->sizes[0]) % this->descriptor->sizes[0])] = true; } template <class rnumber, int interp_neighbours> @@ -85,8 +63,7 @@ rFFTW_interpolator<rnumber, interp_neighbours>::~rFFTW_interpolator() fftwf_free((float*)((void*)this->field)); else if (sizeof(rnumber) == 8) fftw_free((double*)((void*)this->field)); - delete[] this->lbound; - delete[] this->ubound; + delete[] this->compute; } template <class rnumber, int interp_neighbours> @@ -139,7 +116,8 @@ void rFFTW_interpolator<rnumber, interp_neighbours>::sample( this->get_grid_coordinates(nparticles, pdimension, x, xg, xx); /* perform interpolation */ for (int p=0; p<nparticles; p++) - this->operator()(xg + p*3, xx + p*3, yy + p*3, deriv); + if (this->compute[xg[p*3+2]]) + this->operator()(xg + p*3, xx + p*3, yy + p*3, deriv); MPI_Allreduce( yy, y, diff --git a/bfps/cpp/rFFTW_interpolator.hpp b/bfps/cpp/rFFTW_interpolator.hpp index 1b07bcb2..0864bd96 100644 --- a/bfps/cpp/rFFTW_interpolator.hpp +++ b/bfps/cpp/rFFTW_interpolator.hpp @@ -60,9 +60,10 @@ class rFFTW_interpolator /* physical parameters of field */ double dx, dy, dz; - /* precomputed boundaries of process's domain */ - double *lbound; - double *ubound; + /* compute[iz] is true if . + * local_zstart - neighbours <= iz <= local_zend + 1 + neighbours + * */ + bool *compute; rFFTW_interpolator( fluid_solver_base<rnumber> *FSOLVER, diff --git a/done.txt b/done.txt index 8dd974c1..344832ae 100644 --- a/done.txt +++ b/done.txt @@ -1,3 +1,5 @@ x 2015-12-04 make code py3 compatible @python3 x 2015-12-23 decide on versioning system +merge0 x 2015-12-24 move get_grid coords to interpolator @optimization +v1.0 +x 2015-12-25 get rid of temporal interpolation @optimization +v1.0 +x 2015-12-26 call interpolation only when needed @optimization +v1.0 diff --git a/todo.txt b/todo.txt index dcc58366..0ec54254 100644 --- a/todo.txt +++ b/todo.txt @@ -1,6 +1,5 @@ (B) FFTW interpolator doesn't need its own field @optimization +v1.0 (B) compute z polynomials only when needed @optimization +v1.0 -(B) get rid of temporal interpolation @optimization +v1.0 (B) use less memory @optimization (C) clean up machine_settings mess @design @documentation +v2.0 (C) code overview @documentation -- GitLab