diff --git a/py_template/test_FFT.cpp b/py_template/test_FFT.cpp
index 23d7e104d5b2456c5187c811f923aefd1eceafcb..4948ff9954a7f5d261957947d10c0545a5153b52 100644
--- a/py_template/test_FFT.cpp
+++ b/py_template/test_FFT.cpp
@@ -8,13 +8,11 @@ vector_field<float> rv(fs->cd, fs->rvorticity);
 fs->cd->read(
         "Kdata0",
         (void*)fs->cvorticity);
+DEBUG_MSG("######### %g\n", fs->correl_vec(fs->cvorticity, fs->cvorticity));
 fftwf_execute(*(fftwf_plan*)fs->c2r_vorticity);
-//rv*(1. / (fs->rd->sizes[0]*fs->rd->sizes[1]*fs->rd->sizes[2]));
 fftwf_execute(*(fftwf_plan*)fs->r2c_vorticity);
-cv = cv*(1. / (fs->rd->sizes[0]*fs->rd->sizes[1]*fs->rd->sizes[2]));
-fs->cd->write(
-        "Kdata1",
-        (void*)fs->cvorticity);
+cv = cv*(1. / (fs->normalization_factor));
+DEBUG_MSG("######### %g\n", fs->correl_vec(fs->cvorticity, fs->cvorticity));
 
 DEBUG_MSG("full size is %ld\n", fs->rd->full_size);
 
diff --git a/py_template/test_step.cpp b/py_template/test_step.cpp
index deca5c8ccf21fedf0c8a7bbaabbb9e01490205e2..532dcbaabc47a059cdda5ba660fe68c50d0fc69e 100644
--- a/py_template/test_step.cpp
+++ b/py_template/test_step.cpp
@@ -2,13 +2,20 @@ fluid_solver<float> *fs;
 fs = new fluid_solver<float>(32, 32, 32);
 DEBUG_MSG("fluid_solver object created\n");
 
+DEBUG_MSG("nu = %g\n", fs->nu);
 fs->cd->read(
         "Kdata0",
         (void*)fs->cvorticity);
+fs->low_pass_Fourier(fs->cvorticity, 3, fs->kM);
+fs->force_divfree(fs->cvorticity);
+fs->symmetrize(fs->cvorticity, 3);
 DEBUG_MSG("field read\n");
 DEBUG_MSG("######### %d %g\n", fs->iteration, fs->correl_vec(fs->cvorticity, fs->cvorticity));
-fs->step(0.0001);
-DEBUG_MSG("######### %d %g\n", fs->iteration, fs->correl_vec(fs->cvorticity, fs->cvorticity));
+for (int t = 0; t < 8; t++)
+{
+    fs->step(0.0001);
+    DEBUG_MSG("######### %d %g\n", fs->iteration, fs->correl_vec(fs->cvorticity, fs->cvorticity));
+}
 
 delete fs;
 DEBUG_MSG("fluid_solver object deleted\n");
diff --git a/src/fluid_solver.cpp b/src/fluid_solver.cpp
index a8991ec173afb3e086fc778ea6a1873bf3e68c4b..083239ce82f1755a01509abe2c512ba703f00138 100644
--- a/src/fluid_solver.cpp
+++ b/src/fluid_solver.cpp
@@ -177,35 +177,44 @@ void fluid_solver<R>::omega_nonlin( \
     assert(src >= 0 && src < 3); \
     double k2; \
     /* compute velocity field */ \
+    this->low_pass_Fourier(this->cv[src], 3, this->kM); \
+    std::fill_n((R*)this->cu, this->rd->full_size, 0); \
+    DEBUG_MSG(">>>>>>>>>>>>>>nonlin start v %g\n", this->correl_vec(this->cv[src], this->cv[src])); \
     CLOOP( \
             k2 = (this->kx[xindex]*this->kx[xindex] + \
                   this->ky[yindex]*this->ky[yindex] + \
                   this->kz[zindex]*this->kz[zindex]); \
-            this->cu[cindex*3+0][0] = (this->ky[yindex]*this->cv[src][cindex*3+2][1] - this->kz[zindex]*this->cv[src][cindex*3+1][1]) / k2; \
-            this->cu[cindex*3+1][0] = (this->kz[zindex]*this->cv[src][cindex*3+0][1] - this->kx[xindex]*this->cv[src][cindex*3+2][1]) / k2; \
-            this->cu[cindex*3+2][0] = (this->kx[xindex]*this->cv[src][cindex*3+1][1] - this->ky[yindex]*this->cv[src][cindex*3+0][1]) / k2; \
-            this->cu[cindex*3+0][1] = (this->ky[yindex]*this->cv[src][cindex*3+2][0] - this->kz[zindex]*this->cv[src][cindex*3+1][0]) / k2; \
-            this->cu[cindex*3+1][1] = (this->kz[zindex]*this->cv[src][cindex*3+0][0] - this->kx[xindex]*this->cv[src][cindex*3+2][0]) / k2; \
-            this->cu[cindex*3+2][1] = (this->kx[xindex]*this->cv[src][cindex*3+1][0] - this->ky[yindex]*this->cv[src][cindex*3+0][0]) / k2; \
+            this->cu[cindex*3+0][0] = ((this->ky[yindex]*this->cv[src][cindex*3+2][1] - this->kz[zindex]*this->cv[src][cindex*3+1][1]) / k2); \
+            this->cu[cindex*3+1][0] = ((this->kz[zindex]*this->cv[src][cindex*3+0][1] - this->kx[xindex]*this->cv[src][cindex*3+2][1]) / k2); \
+            this->cu[cindex*3+2][0] = ((this->kx[xindex]*this->cv[src][cindex*3+1][1] - this->ky[yindex]*this->cv[src][cindex*3+0][1]) / k2); \
+            this->cu[cindex*3+0][1] = ((this->ky[yindex]*this->cv[src][cindex*3+2][0] - this->kz[zindex]*this->cv[src][cindex*3+1][0]) / k2); \
+            this->cu[cindex*3+1][1] = ((this->kz[zindex]*this->cv[src][cindex*3+0][0] - this->kx[xindex]*this->cv[src][cindex*3+2][0]) / k2); \
+            this->cu[cindex*3+2][1] = ((this->kx[xindex]*this->cv[src][cindex*3+1][0] - this->ky[yindex]*this->cv[src][cindex*3+0][0]) / k2); \
             ); \
     this->impose_zero_modes(); \
+    this->symmetrize(this->cu, 3); \
+    std::fill_n(this->ru, this->rd->full_size, 0.); \
+    std::fill_n(this->rv[src], this->rd->full_size, 0.); \
     /* get fields from Fourier space to real space */ \
     FFTW(execute)(*((FFTW(plan)*)this->c2r_velocity )); \
     FFTW(execute)(*((FFTW(plan)*)this->vc2r[src])); \
     /* compute cross product $u \times \omega$, and normalize */ \
     R tmpx0, tmpy0, tmpz0; \
     RLOOP ( \
-             tmpx0 = (this->ru[rindex*3+1]*this->rv[src][rindex*3+2] - this->ru[rindex*3+2]*this->rv[src][rindex*3+1]); \
-             tmpy0 = (this->ru[rindex*3+2]*this->rv[src][rindex*3+0] - this->ru[rindex*3+0]*this->rv[src][rindex*3+2]); \
-             tmpz0 = (this->ru[rindex*3+0]*this->rv[src][rindex*3+1] - this->ru[rindex*3+1]*this->rv[src][rindex*3+0]); \
-             this->ru[rindex*3+0] = tmpx0 / (this->rd->full_size / 3); \
-             this->ru[rindex*3+1] = tmpy0 / (this->rd->full_size / 3); \
-             this->ru[rindex*3+2] = tmpz0 / (this->rd->full_size / 3); \
+             tmpx0 = (this->ru[rindex*3+1]*this->rv[src][rindex*3+2] - this->ru[rindex*3+2]*this->rv[src][rindex*3+1]) / this->normalization_factor; \
+             tmpy0 = (this->ru[rindex*3+2]*this->rv[src][rindex*3+0] - this->ru[rindex*3+0]*this->rv[src][rindex*3+2]) / this->normalization_factor; \
+             tmpz0 = (this->ru[rindex*3+0]*this->rv[src][rindex*3+1] - this->ru[rindex*3+1]*this->rv[src][rindex*3+0]) / this->normalization_factor; \
+             this->ru[rindex*3+0] = tmpx0 / this->normalization_factor; \
+             this->ru[rindex*3+1] = tmpy0 / this->normalization_factor; \
+             this->ru[rindex*3+2] = tmpz0 / this->normalization_factor; \
             ); \
     /* go back to Fourier space */ \
     FFTW(execute)(*((FFTW(plan)*)this->r2c_velocity )); \
+    this->low_pass_Fourier(this->cu, 3, this->kM); \
+    this->symmetrize(this->cu, 3); \
     /* $\imath k \times DFT(u \times \omega)$ */ \
     R tmpx1, tmpy1, tmpz1; \
+    FFTW(execute)(*((FFTW(plan)*)this->vc2r[src])); \
     CLOOP( \
             tmpx0 = -(this->ky[yindex]*this->cu[cindex*3+2][1] - this->kz[zindex]*this->cu[cindex*3+1][1]); \
             tmpy0 = -(this->kz[zindex]*this->cu[cindex*3+0][1] - this->kx[xindex]*this->cu[cindex*3+2][1]); \
@@ -213,13 +222,20 @@ void fluid_solver<R>::omega_nonlin( \
             tmpx1 =  (this->ky[yindex]*this->cu[cindex*3+2][0] - this->kz[zindex]*this->cu[cindex*3+1][0]); \
             tmpy1 =  (this->kz[zindex]*this->cu[cindex*3+0][0] - this->kx[xindex]*this->cu[cindex*3+2][0]); \
             tmpz1 =  (this->kx[xindex]*this->cu[cindex*3+1][0] - this->ky[yindex]*this->cu[cindex*3+0][0]); \
-            this->cu[cindex*3+0][0] = tmpx0;\
-            this->cu[cindex*3+1][0] = tmpy0;\
-            this->cu[cindex*3+2][0] = tmpz0;\
-            this->cu[cindex*3+0][1] = tmpx1;\
-            this->cu[cindex*3+1][1] = tmpy1;\
-            this->cu[cindex*3+2][1] = tmpz1;\
+            this->cu[cindex*3+0][0] = 0.0 /*tmpx0*/;\
+            this->cu[cindex*3+1][0] = 0.0 /*tmpy0*/;\
+            this->cu[cindex*3+2][0] = 0.0 /*tmpz0*/;\
+            this->cu[cindex*3+0][1] = 0.0 /*tmpx1*/;\
+            this->cu[cindex*3+1][1] = 0.0 /*tmpy1*/;\
+            this->cu[cindex*3+2][1] = 0.0 /*tmpz1*/;\
+            this->cv[src][cindex*3+0][0] /= this->normalization_factor; \
+            this->cv[src][cindex*3+0][1] /= this->normalization_factor; \
+            this->cv[src][cindex*3+1][0] /= this->normalization_factor; \
+            this->cv[src][cindex*3+1][1] /= this->normalization_factor; \
+            this->cv[src][cindex*3+2][0] /= this->normalization_factor; \
+            this->cv[src][cindex*3+2][1] /= this->normalization_factor; \
             ); \
+    DEBUG_MSG(">>>>>>>>>>>>>>nonlin end   v %g\n", this->correl_vec(this->cv[src], this->cv[src])); \
 } \
  \
 template<> \
@@ -227,6 +243,7 @@ void fluid_solver<R>::step(double dt) \
 { \
     double k2, factor0, factor1; \
     this->omega_nonlin(0); \
+    std::fill_n(this->rv[2], this->rd->full_size, 0); \
     CLOOP( \
             k2 = (this->kx[xindex]*this->kx[xindex] + \
                   this->ky[yindex]*this->ky[yindex] + \
@@ -240,13 +257,18 @@ void fluid_solver<R>::step(double dt) \
             this->cv[1][cindex*3+2][1] = (this->cv[0][cindex*3+2][1] + dt*this->cu[cindex*3+2][1])*factor0; \
             ); \
  \
+    this->force_divfree(this->cv[1]); \
+    DEBUG_MSG(">>>>>>>>>>>>>>before nonlin1 v %g\n", this->correl_vec(this->cv[1], this->cv[1])); \
     this->omega_nonlin(1); \
+    DEBUG_MSG(">>>>>>>>>>>>>>after  nonlin1 v %g\n", this->correl_vec(this->cv[1], this->cv[1])); \
+    std::fill_n(this->rv[1], this->rd->full_size, 0); \
     CLOOP( \
             k2 = (this->kx[xindex]*this->kx[xindex] + \
                   this->ky[yindex]*this->ky[yindex] + \
                   this->kz[zindex]*this->kz[zindex]); \
             factor0 = exp(-this->nu * k2 * dt/2); \
-            factor1 = exp( this->nu * k2 * dt/2); \
+            factor1 = exp(-this->nu * k2 * dt/2); \
+            if (factor0 > 1 || factor1 > 1) \
             this->cv[2][cindex*3+0][0] = (3*this->cv[0][cindex*3+0][0]*factor0 + (this->cv[1][cindex*3+0][0] + dt*this->cu[cindex*3+0][0])*factor1)*0.25; \
             this->cv[2][cindex*3+1][0] = (3*this->cv[0][cindex*3+1][0]*factor0 + (this->cv[1][cindex*3+1][0] + dt*this->cu[cindex*3+1][0])*factor1)*0.25; \
             this->cv[2][cindex*3+2][0] = (3*this->cv[0][cindex*3+2][0]*factor0 + (this->cv[1][cindex*3+2][0] + dt*this->cu[cindex*3+2][0])*factor1)*0.25; \
@@ -255,20 +277,26 @@ void fluid_solver<R>::step(double dt) \
             this->cv[2][cindex*3+2][1] = (3*this->cv[0][cindex*3+2][1]*factor0 + (this->cv[1][cindex*3+2][1] + dt*this->cu[cindex*3+2][1])*factor1)*0.25; \
             ); \
  \
+    this->force_divfree(this->cv[2]); \
+    DEBUG_MSG(">>>>>>>>>>>>>>before nonlin2 v %g\n", this->correl_vec(this->cv[2], this->cv[2])); \
     this->omega_nonlin(2); \
+    DEBUG_MSG(">>>>>>>>>>>>>>after  nonlin2 v %g\n", this->correl_vec(this->cv[2], this->cv[2])); \
     CLOOP( \
             k2 = (this->kx[xindex]*this->kx[xindex] + \
                   this->ky[yindex]*this->ky[yindex] + \
                   this->kz[zindex]*this->kz[zindex]); \
             factor0 = exp(-this->nu * k2 * dt); \
             factor1 = exp(-this->nu * k2 * dt/2); \
-            this->cv[3][cindex*3+0][0] = (this->cv[0][cindex*3+0][0]*factor0 + 2*(this->cv[1][cindex*3+0][0] + dt*this->cu[cindex*3+0][0])*factor1)/3; \
-            this->cv[3][cindex*3+1][0] = (this->cv[0][cindex*3+1][0]*factor0 + 2*(this->cv[1][cindex*3+1][0] + dt*this->cu[cindex*3+1][0])*factor1)/3; \
-            this->cv[3][cindex*3+2][0] = (this->cv[0][cindex*3+2][0]*factor0 + 2*(this->cv[1][cindex*3+2][0] + dt*this->cu[cindex*3+2][0])*factor1)/3; \
-            this->cv[3][cindex*3+0][1] = (this->cv[0][cindex*3+0][1]*factor0 + 2*(this->cv[1][cindex*3+0][1] + dt*this->cu[cindex*3+0][1])*factor1)/3; \
-            this->cv[3][cindex*3+1][1] = (this->cv[0][cindex*3+1][1]*factor0 + 2*(this->cv[1][cindex*3+1][1] + dt*this->cu[cindex*3+1][1])*factor1)/3; \
-            this->cv[3][cindex*3+2][1] = (this->cv[0][cindex*3+2][1]*factor0 + 2*(this->cv[1][cindex*3+2][1] + dt*this->cu[cindex*3+2][1])*factor1)/3; \
-            ); \
+            this->cv[3][cindex*3+0][0] = (this->cv[0][cindex*3+0][0]*factor0 + 2*(this->cv[2][cindex*3+0][0] + dt*this->cu[cindex*3+0][0])*factor1)/3; \
+            this->cv[3][cindex*3+1][0] = (this->cv[0][cindex*3+1][0]*factor0 + 2*(this->cv[2][cindex*3+1][0] + dt*this->cu[cindex*3+1][0])*factor1)/3; \
+            this->cv[3][cindex*3+2][0] = (this->cv[0][cindex*3+2][0]*factor0 + 2*(this->cv[2][cindex*3+2][0] + dt*this->cu[cindex*3+2][0])*factor1)/3; \
+            this->cv[3][cindex*3+0][1] = (this->cv[0][cindex*3+0][1]*factor0 + 2*(this->cv[2][cindex*3+0][1] + dt*this->cu[cindex*3+0][1])*factor1)/3; \
+            this->cv[3][cindex*3+1][1] = (this->cv[0][cindex*3+1][1]*factor0 + 2*(this->cv[2][cindex*3+1][1] + dt*this->cu[cindex*3+1][1])*factor1)/3; \
+            this->cv[3][cindex*3+2][1] = (this->cv[0][cindex*3+2][1]*factor0 + 2*(this->cv[2][cindex*3+2][1] + dt*this->cu[cindex*3+2][1])*factor1)/3; \
+            );  \
+    this->force_divfree(this->cv[0]); \
+    DEBUG_MSG(">>>>>>>>>>>>>>3 u %g\n", this->correl_vec(this->cu, this->cu)); \
+    DEBUG_MSG(">>>>>>>>>>>>>>3 v %g\n", this->correl_vec(this->cv[3], this->cv[3])); \
  \
     this->iteration++; \
 }
diff --git a/src/fluid_solver_base.cpp b/src/fluid_solver_base.cpp
index 7ab4a730993e7e1fe294e92aab3788d36e17ac7a..57e85324e5daa127edde7ee288cde03d1d30b386 100644
--- a/src/fluid_solver_base.cpp
+++ b/src/fluid_solver_base.cpp
@@ -49,6 +49,7 @@ fluid_solver_base<R>::fluid_solver_base( \
     ntmp[3] = 3; \
     this->rd = new field_descriptor<R>( \
             4, ntmp, MPI_RNUM, MPI_COMM_WORLD);\
+    this->normalization_factor = (this->rd->full_size/3); \
     ntmp[0] = ny; \
     ntmp[1] = nz; \
     ntmp[2] = nx/2 + 1; \
@@ -143,17 +144,121 @@ R fluid_solver_base<R>::correl_vec(C *a, C *b) \
             if (k2 < this->kM2) \
             { \
                 factor = (xindex == 0) ? 1 : 2; \
-                val_process += factor * ((*(a + cindex))[0] * (*(b + cindex))[0] + \
-                                         (*(a + cindex))[1] * (*(b + cindex))[1]); \
+                val_process += factor * ((*(a + 3*cindex))[0] * (*(b + 3*cindex))[0] + \
+                                         (*(a + 3*cindex))[1] * (*(b + 3*cindex))[1] + \
+                                         (*(a + 3*cindex+1))[0] * (*(b + 3*cindex+1))[0] + \
+                                         (*(a + 3*cindex+1))[1] * (*(b + 3*cindex+1))[1] + \
+                                         (*(a + 3*cindex+2))[0] * (*(b + 3*cindex+2))[0] + \
+                                         (*(a + 3*cindex+2))[1] * (*(b + 3*cindex+2))[1] \
+                                        ); \
             } \
             );\
     MPI_Allreduce( \
             (void*)(&val_process), \
             (void*)(&val), \
             1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD); \
-    /*return R(val / (this->rd->full_size / 3)) / (this->rd->full_size / 3);*/ \
-    return R(val); \
-}
+    return R(val / this->normalization_factor); \
+} \
+ \
+template<> \
+void fluid_solver_base<R>::low_pass_Fourier(C *a, const int howmany, const double kmax) \
+{ \
+    double k2; \
+    const double km2 = kmax*kmax; \
+    const int howmany2 = 2*howmany; \
+    CLOOP( \
+            k2 = (this->kx[xindex]*this->kx[xindex] + \
+                  this->ky[yindex]*this->ky[yindex] + \
+                  this->kz[zindex]*this->kz[zindex]); \
+            if (k2 >= km2) \
+                std::fill_n((R*)(a + howmany*cindex), howmany2, 0.0); \
+            );\
+} \
+ \
+template<> \
+void fluid_solver_base<R>::force_divfree(C *a) \
+{ \
+    double k2; \
+    C tval; \
+    CLOOP( \
+            k2 = (this->kx[xindex]*this->kx[xindex] + \
+                  this->ky[yindex]*this->ky[yindex] + \
+                  this->kz[zindex]*this->kz[zindex]); \
+            if (k2 >= this->kM2) \
+            { \
+                tval[0] = (this->kx[xindex]*((*(a + cindex*3  ))[0]) + \
+                           this->ky[yindex]*((*(a + cindex*3+1))[0]) + \
+                           this->kz[zindex]*((*(a + cindex*3+2))[0]) ) / k2; \
+                tval[1] = (this->kx[xindex]*((*(a + cindex*3  ))[1]) + \
+                           this->ky[yindex]*((*(a + cindex*3+1))[1]) + \
+                           this->kz[zindex]*((*(a + cindex*3+2))[1]) ) / k2; \
+                a[cindex*3  ][0] -= tval[0]*this->kx[xindex]; \
+                a[cindex*3+1][1] -= tval[1]*this->kx[xindex]; \
+                a[cindex*3+2][0] -= tval[0]*this->ky[yindex]; \
+                a[cindex*3  ][1] -= tval[1]*this->ky[yindex]; \
+                a[cindex*3+1][0] -= tval[0]*this->kz[zindex]; \
+                a[cindex*3+2][1] -= tval[1]*this->kz[zindex]; \
+            } \
+            );\
+} \
+ \
+template<> \
+void fluid_solver_base<R>::symmetrize(C *data, const int howmany) \
+{ \
+    ptrdiff_t ii, cc; \
+    MPI_Status *mpistatus = new MPI_Status[1]; \
+    C *buffer; \
+    buffer = FFTW(alloc_complex)(howmany*this->cd->sizes[1]); \
+    if (this->cd->myrank == this->cd->rank[0]) \
+    { \
+        for (cc = 0; cc < howmany; cc++) \
+            (*(data+cc))[1] = 0.0; \
+        for (ii = 1; ii < this->cd->sizes[1]/2; ii++) \
+            for (cc = 0; cc < howmany; cc++) { \
+                ( *(data + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]+cc))[0] = \
+                 (*(data + howmany*ii*this->cd->sizes[2]+cc))[0]; \
+                ( *(data + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]+cc))[1] = \
+                -(*(data + howmany*ii*this->cd->sizes[2]+cc))[1]; \
+                } \
+    } \
+    int yy; \
+    int ranksrc, rankdst; \
+    for (yy = 1; yy < this->cd->sizes[0]/2; yy++) { \
+        ranksrc = this->cd->rank[yy]; \
+        rankdst = this->cd->rank[this->cd->sizes[0] - yy]; \
+        if (this->cd->myrank == ranksrc) \
+            for (ii = 0; ii < this->cd->sizes[1]; ii++) \
+                for (cc = 0; cc < howmany; cc++) { \
+                    (*(buffer + howmany*ii+cc))[0] = (*((data + howmany*(yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2]) + howmany*ii*this->cd->sizes[2] + cc))[0]; \
+                    (*(buffer + howmany*ii+cc))[1] = (*((data + howmany*(yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2]) + howmany*ii*this->cd->sizes[2] + cc))[1]; \
+                } \
+        if (ranksrc != rankdst) \
+        { \
+            if (this->cd->myrank == ranksrc) \
+                MPI_Send((void*)buffer, \
+                         howmany*this->cd->sizes[1], MPI_CNUM, rankdst, yy, \
+                         MPI_COMM_WORLD); \
+            if (this->cd->myrank == rankdst) \
+                MPI_Recv((void*)buffer, \
+                         howmany*this->cd->sizes[1], MPI_CNUM, ranksrc, yy, \
+                         MPI_COMM_WORLD, mpistatus); \
+        } \
+        if (this->cd->myrank == rankdst) \
+        { \
+            for (ii = 1; ii < this->cd->sizes[1]; ii++) \
+                for (cc = 0; cc < howmany; cc++) { \
+                    (*((data + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2]) + howmany*ii*this->cd->sizes[2] + cc))[0] =  (*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[0]; \
+                    (*((data + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2]) + howmany*ii*this->cd->sizes[2] + cc))[1] = -(*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[1]; \
+                } \
+            for (cc = 0; cc < howmany; cc++) { \
+                (*((data + (this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2] + cc)))[0] =  (*(buffer + cc))[0]; \
+                (*((data + (this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2] + cc)))[1] = -(*(buffer + cc))[1]; \
+            } \
+        } \
+    } \
+    FFTW(free)(buffer); \
+    delete mpistatus; \
+} \
 
 /*****************************************************************************/
 
diff --git a/src/fluid_solver_base.hpp b/src/fluid_solver_base.hpp
index f0001e5323fd06731e262f4074c222346f5de6a4..679a3342783ec294904b7eb2182c3090a4672378 100644
--- a/src/fluid_solver_base.hpp
+++ b/src/fluid_solver_base.hpp
@@ -21,6 +21,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <iostream>
+#include "base.hpp"
 #include "field_descriptor.hpp"
 
 #ifndef FLUID_SOLVER_BASE
@@ -42,6 +43,7 @@ class fluid_solver_base
         typedef rnumber cnumber[2];
     public:
         field_descriptor<rnumber> *cd, *rd;
+        ptrdiff_t normalization_factor;
 
         /* simulation parameters */
         int iteration;
@@ -69,6 +71,9 @@ class fluid_solver_base
                 double DKZ = 1.0);
         ~fluid_solver_base();
 
+        void low_pass_Fourier(cnumber *a, int howmany, double kmax);
+        void force_divfree(cnumber *a);
+        void symmetrize(cnumber *a, int howmany);
         rnumber correl_vec(cnumber *a, cnumber *b);
 };