diff --git a/bfps/cpp/rFFTW_interpolator.cpp b/bfps/cpp/rFFTW_interpolator.cpp index be8546f67e0f3968e814b3f5941e325573fc4460..8310be04ca36714ed01d5facc5a4d973713de550 100644 --- a/bfps/cpp/rFFTW_interpolator.cpp +++ b/bfps/cpp/rFFTW_interpolator.cpp @@ -43,39 +43,17 @@ rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator( this->field = (rnumber*)((void*)fftw_alloc_real(this->field_size)); // compute dx, dy, dz; - this->dx = 4*acos(0) / (fs->dkx*fs->rd->sizes[2]); - this->dy = 4*acos(0) / (fs->dky*fs->rd->sizes[1]); - this->dz = 4*acos(0) / (fs->dkz*fs->rd->sizes[0]); + this->dx = 4*acos(0) / (fs->dkx*this->descriptor->sizes[2]); + this->dy = 4*acos(0) / (fs->dky*this->descriptor->sizes[1]); + this->dz = 4*acos(0) / (fs->dkz*this->descriptor->sizes[0]); - // compute lower and upper bounds - this->lbound = new double[nprocs]; - this->ubound = new double[nprocs]; - double *tbound = new double[nprocs]; - std::fill_n(tbound, nprocs, 0.0); - tbound[this->descriptor->myrank] = fs->rd->starts[0]*this->dz; - MPI_Allreduce( - tbound, - this->lbound, - nprocs, - MPI_DOUBLE, - MPI_SUM, - this->descriptor->comm); - std::fill_n(tbound, this->descriptor->nprocs, 0.0); - tbound[this->descriptor->myrank] = (fs->rd->starts[0] + fs->rd->subsizes[0])*this->dz; - MPI_Allreduce( - tbound, - this->ubound, - nprocs, - MPI_DOUBLE, - MPI_SUM, - this->descriptor->comm); - delete[] tbound; - //for (int r = 0; r<nprocs; r++) - // DEBUG_MSG( - // "lbound[%d] = %lg, ubound[%d] = %lg\n", - // r, this->lbound[r], - // r, this->ubound[r] - // ); + // generate compute array + this->compute = new bool[this->descriptor->sizes[0]]; + std::fill_n(this->compute, this->descriptor->sizes[0], false); + for (int iz = this->descriptor->starts[0]-interp_neighbours-1; + iz <= this->descriptor->starts[0]+this->descriptor->subsizes[0]+interp_neighbours+1; + iz++) + this->compute[((iz + this->descriptor->sizes[0]) % this->descriptor->sizes[0])] = true; } template <class rnumber, int interp_neighbours> @@ -85,8 +63,7 @@ rFFTW_interpolator<rnumber, interp_neighbours>::~rFFTW_interpolator() fftwf_free((float*)((void*)this->field)); else if (sizeof(rnumber) == 8) fftw_free((double*)((void*)this->field)); - delete[] this->lbound; - delete[] this->ubound; + delete[] this->compute; } template <class rnumber, int interp_neighbours> @@ -139,7 +116,8 @@ void rFFTW_interpolator<rnumber, interp_neighbours>::sample( this->get_grid_coordinates(nparticles, pdimension, x, xg, xx); /* perform interpolation */ for (int p=0; p<nparticles; p++) - this->operator()(xg + p*3, xx + p*3, yy + p*3, deriv); + if (this->compute[xg[p*3+2]]) + this->operator()(xg + p*3, xx + p*3, yy + p*3, deriv); MPI_Allreduce( yy, y, diff --git a/bfps/cpp/rFFTW_interpolator.hpp b/bfps/cpp/rFFTW_interpolator.hpp index 1b07bcb25d51fc7d884bc342e0842e555e8018f0..0864bd964b77b5b3527a58898b8c22da63c33c2e 100644 --- a/bfps/cpp/rFFTW_interpolator.hpp +++ b/bfps/cpp/rFFTW_interpolator.hpp @@ -60,9 +60,10 @@ class rFFTW_interpolator /* physical parameters of field */ double dx, dy, dz; - /* precomputed boundaries of process's domain */ - double *lbound; - double *ubound; + /* compute[iz] is true if . + * local_zstart - neighbours <= iz <= local_zend + 1 + neighbours + * */ + bool *compute; rFFTW_interpolator( fluid_solver_base<rnumber> *FSOLVER, diff --git a/done.txt b/done.txt index 8dd974c1f1353734f7a4f17dc79c6ca548c88cba..344832ae21046d40f47702000ab8b31d0128114b 100644 --- a/done.txt +++ b/done.txt @@ -1,3 +1,5 @@ x 2015-12-04 make code py3 compatible @python3 x 2015-12-23 decide on versioning system +merge0 x 2015-12-24 move get_grid coords to interpolator @optimization +v1.0 +x 2015-12-25 get rid of temporal interpolation @optimization +v1.0 +x 2015-12-26 call interpolation only when needed @optimization +v1.0 diff --git a/todo.txt b/todo.txt index dcc583669300cd001fe198ec6ac15659bcdcdfa3..0ec54254e740d14d72b7c97cf7592fd5bfcfdf81 100644 --- a/todo.txt +++ b/todo.txt @@ -1,6 +1,5 @@ (B) FFTW interpolator doesn't need its own field @optimization +v1.0 (B) compute z polynomials only when needed @optimization +v1.0 -(B) get rid of temporal interpolation @optimization +v1.0 (B) use less memory @optimization (C) clean up machine_settings mess @design @documentation +v2.0 (C) code overview @documentation