From e8a0dbacd60556adea7287f2b97ddd117503c576 Mon Sep 17 00:00:00 2001
From: Chichi Lalescu <Cristian.Lalescu@ds.mpg.de>
Date: Sat, 26 Dec 2015 17:55:58 +0100
Subject: [PATCH] preliminary optimization of interpolation

---
 bfps/cpp/rFFTW_interpolator.cpp | 48 +++++++++------------------------
 bfps/cpp/rFFTW_interpolator.hpp |  7 ++---
 done.txt                        |  2 ++
 todo.txt                        |  1 -
 4 files changed, 19 insertions(+), 39 deletions(-)

diff --git a/bfps/cpp/rFFTW_interpolator.cpp b/bfps/cpp/rFFTW_interpolator.cpp
index be8546f6..8310be04 100644
--- a/bfps/cpp/rFFTW_interpolator.cpp
+++ b/bfps/cpp/rFFTW_interpolator.cpp
@@ -43,39 +43,17 @@ rFFTW_interpolator<rnumber, interp_neighbours>::rFFTW_interpolator(
         this->field = (rnumber*)((void*)fftw_alloc_real(this->field_size));
 
     // compute dx, dy, dz;
-    this->dx = 4*acos(0) / (fs->dkx*fs->rd->sizes[2]);
-    this->dy = 4*acos(0) / (fs->dky*fs->rd->sizes[1]);
-    this->dz = 4*acos(0) / (fs->dkz*fs->rd->sizes[0]);
+    this->dx = 4*acos(0) / (fs->dkx*this->descriptor->sizes[2]);
+    this->dy = 4*acos(0) / (fs->dky*this->descriptor->sizes[1]);
+    this->dz = 4*acos(0) / (fs->dkz*this->descriptor->sizes[0]);
 
-    // compute lower and upper bounds
-    this->lbound = new double[nprocs];
-    this->ubound = new double[nprocs];
-    double *tbound = new double[nprocs];
-    std::fill_n(tbound, nprocs, 0.0);
-    tbound[this->descriptor->myrank] = fs->rd->starts[0]*this->dz;
-    MPI_Allreduce(
-            tbound,
-            this->lbound,
-            nprocs,
-            MPI_DOUBLE,
-            MPI_SUM,
-            this->descriptor->comm);
-    std::fill_n(tbound, this->descriptor->nprocs, 0.0);
-    tbound[this->descriptor->myrank] = (fs->rd->starts[0] + fs->rd->subsizes[0])*this->dz;
-    MPI_Allreduce(
-            tbound,
-            this->ubound,
-            nprocs,
-            MPI_DOUBLE,
-            MPI_SUM,
-            this->descriptor->comm);
-    delete[] tbound;
-    //for (int r = 0; r<nprocs; r++)
-    //    DEBUG_MSG(
-    //            "lbound[%d] = %lg, ubound[%d] = %lg\n",
-    //            r, this->lbound[r],
-    //            r, this->ubound[r]
-    //            );
+    // generate compute array
+    this->compute = new bool[this->descriptor->sizes[0]];
+    std::fill_n(this->compute, this->descriptor->sizes[0], false);
+    for (int iz = this->descriptor->starts[0]-interp_neighbours-1;
+            iz <= this->descriptor->starts[0]+this->descriptor->subsizes[0]+interp_neighbours+1;
+            iz++)
+        this->compute[((iz + this->descriptor->sizes[0]) % this->descriptor->sizes[0])] = true;
 }
 
 template <class rnumber, int interp_neighbours>
@@ -85,8 +63,7 @@ rFFTW_interpolator<rnumber, interp_neighbours>::~rFFTW_interpolator()
         fftwf_free((float*)((void*)this->field));
     else if (sizeof(rnumber) == 8)
         fftw_free((double*)((void*)this->field));
-    delete[] this->lbound;
-    delete[] this->ubound;
+    delete[] this->compute;
 }
 
 template <class rnumber, int interp_neighbours>
@@ -139,7 +116,8 @@ void rFFTW_interpolator<rnumber, interp_neighbours>::sample(
     this->get_grid_coordinates(nparticles, pdimension, x, xg, xx);
     /* perform interpolation */
     for (int p=0; p<nparticles; p++)
-        this->operator()(xg + p*3, xx + p*3, yy + p*3, deriv);
+        if (this->compute[xg[p*3+2]])
+            this->operator()(xg + p*3, xx + p*3, yy + p*3, deriv);
     MPI_Allreduce(
             yy,
             y,
diff --git a/bfps/cpp/rFFTW_interpolator.hpp b/bfps/cpp/rFFTW_interpolator.hpp
index 1b07bcb2..0864bd96 100644
--- a/bfps/cpp/rFFTW_interpolator.hpp
+++ b/bfps/cpp/rFFTW_interpolator.hpp
@@ -60,9 +60,10 @@ class rFFTW_interpolator
         /* physical parameters of field */
         double dx, dy, dz;
 
-        /* precomputed boundaries of process's domain */
-        double *lbound;
-        double *ubound;
+        /* compute[iz] is true if .
+         * local_zstart - neighbours <= iz <= local_zend + 1 + neighbours
+         * */
+        bool *compute;
 
         rFFTW_interpolator(
                 fluid_solver_base<rnumber> *FSOLVER,
diff --git a/done.txt b/done.txt
index 8dd974c1..344832ae 100644
--- a/done.txt
+++ b/done.txt
@@ -1,3 +1,5 @@
 x 2015-12-04 make code py3 compatible                                @python3
 x 2015-12-23 decide on versioning system                                           +merge0
 x 2015-12-24 move get_grid coords to interpolator                    @optimization +v1.0
+x 2015-12-25 get rid of temporal interpolation                       @optimization +v1.0
+x 2015-12-26 call interpolation only when needed                     @optimization +v1.0
diff --git a/todo.txt b/todo.txt
index dcc58366..0ec54254 100644
--- a/todo.txt
+++ b/todo.txt
@@ -1,6 +1,5 @@
 (B) FFTW interpolator doesn't need its own field            @optimization +v1.0
 (B) compute z polynomials only when needed                  @optimization +v1.0
-(B) get rid of temporal interpolation                       @optimization +v1.0
 (B) use less memory                                         @optimization
 (C) clean up machine_settings mess                          @design @documentation +v2.0
 (C) code overview                                           @documentation
-- 
GitLab