diff --git a/src/fluid_solver.cpp b/src/fluid_solver.cpp index d10e6960bb72b301ee9b71bc056b6d87c6894e1c..6b64d08b8cf2d2d2985c936bbb56786b6bf8e100 100644 --- a/src/fluid_solver.cpp +++ b/src/fluid_solver.cpp @@ -19,12 +19,35 @@ ************************************************************************/ - +#include <cassert> #include "fluid_solver.hpp" #include "fftw_tools.hpp" +/*****************************************************************************/ +/* macros for loops */ + +/* complex loop */ +#define CLOOP(expression) \ + \ +{ \ + int cindex; \ + for (int yindex = 0; yindex < this->cd->subsizes[0]; yindex++) \ + for (int zindex = 0; zindex < this->cd->subsizes[1]; zindex++) \ + { \ + cindex = (yindex * this->cd->sizes[1] + zindex)*this->cd->sizes[2]; \ + for (int xindex = 0; xindex < this->cd->subsizes[2]; xindex++) \ + { \ + expression; \ + cindex++; \ + } \ + } \ +} + +/*****************************************************************************/ + + /*****************************************************************************/ /* macro for specializations to numeric types compatible with FFTW */ @@ -34,7 +57,10 @@ template<> \ fluid_solver<R>::fluid_solver( \ int nx, \ int ny, \ - int nz) \ + int nz, \ + double DKX, \ + double DKY, \ + double DKZ) \ { \ int ntmp[4]; \ ntmp[0] = nz; \ @@ -43,6 +69,8 @@ fluid_solver<R>::fluid_solver( \ ntmp[3] = 3; \ this->rd = new field_descriptor<R>( \ 4, ntmp, MPI_RNUM, MPI_COMM_WORLD);\ + ntmp[0] = ny; \ + ntmp[1] = nz; \ ntmp[2] = nx/2 + 1; \ this->cd = new field_descriptor<R>( \ 4, ntmp, MPI_CNUM, MPI_COMM_WORLD);\ @@ -50,6 +78,19 @@ fluid_solver<R>::fluid_solver( \ this->cvelocity = FFTW(alloc_complex)(this->cd->local_size);\ this->rvorticity = FFTW(alloc_real)(this->cd->local_size*2);\ this->rvelocity = FFTW(alloc_real)(this->cd->local_size*2);\ + \ + this->ru = this->rvelocity;\ + this->cu = this->cvelocity;\ + \ + this->rv[0] = this->rvorticity;\ + this->rv[3] = this->rvorticity;\ + this->cv[0] = this->cvorticity;\ + this->cv[3] = this->cvorticity;\ + \ + this->cv[1] = FFTW(alloc_complex)(this->cd->local_size);\ + this->cv[2] = FFTW(alloc_complex)(this->cd->local_size);\ + this->rv[1] = (R*)(this->cv);\ + this->rv[2] = (R*)(this->cv);\ \ this->c2r_vorticity = new FFTW(plan);\ this->r2c_vorticity = new FFTW(plan);\ @@ -61,51 +102,141 @@ fluid_solver<R>::fluid_solver( \ nx};\ \ *(FFTW(plan)*)this->c2r_vorticity = FFTW(mpi_plan_many_dft_c2r)( \ - 3, sizes, 3, \ - FFTW_MPI_DEFAULT_BLOCK, \ - FFTW_MPI_DEFAULT_BLOCK, \ + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ this->cvorticity, this->rvorticity, \ - MPI_COMM_WORLD, \ - FFTW_ESTIMATE); \ + MPI_COMM_WORLD, FFTW_ESTIMATE | FFTW_MPI_TRANSPOSED_IN); \ \ *(FFTW(plan)*)this->r2c_vorticity = FFTW(mpi_plan_many_dft_r2c)( \ - 3, sizes, 3, \ - FFTW_MPI_DEFAULT_BLOCK, \ - FFTW_MPI_DEFAULT_BLOCK, \ + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ this->rvorticity, this->cvorticity, \ - MPI_COMM_WORLD, \ - FFTW_ESTIMATE); \ + MPI_COMM_WORLD, FFTW_ESTIMATE | FFTW_MPI_TRANSPOSED_OUT); \ \ *(FFTW(plan)*)this->c2r_velocity = FFTW(mpi_plan_many_dft_c2r)( \ - 3, sizes, 3, \ - FFTW_MPI_DEFAULT_BLOCK, \ - FFTW_MPI_DEFAULT_BLOCK, \ + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ this->cvelocity, this->rvelocity, \ - MPI_COMM_WORLD, \ - FFTW_ESTIMATE); \ + MPI_COMM_WORLD, FFTW_ESTIMATE | FFTW_MPI_TRANSPOSED_IN); \ \ *(FFTW(plan)*)this->r2c_velocity = FFTW(mpi_plan_many_dft_r2c)( \ - 3, sizes, 3, \ - FFTW_MPI_DEFAULT_BLOCK, \ - FFTW_MPI_DEFAULT_BLOCK, \ + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ this->rvelocity, this->cvelocity, \ - MPI_COMM_WORLD, \ - FFTW_ESTIMATE); \ + MPI_COMM_WORLD, FFTW_ESTIMATE | FFTW_MPI_TRANSPOSED_OUT); \ + \ + this->uc2r = this->c2r_velocity;\ + this->ur2c = this->r2c_velocity;\ + this->vc2r[0] = this->c2r_vorticity;\ + this->vr2c[0] = this->r2c_vorticity;\ + \ + this->vc2r[1] = new FFTW(plan);\ + this->vr2c[1] = new FFTW(plan);\ + this->vc2r[2] = new FFTW(plan);\ + this->vr2c[2] = new FFTW(plan);\ + \ + *(FFTW(plan)*)this->vc2r[1] = FFTW(mpi_plan_many_dft_c2r)( \ + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ + this->cv[1], this->rv[1], \ + MPI_COMM_WORLD, FFTW_ESTIMATE | FFTW_MPI_TRANSPOSED_IN); \ + \ + *(FFTW(plan)*)this->vc2r[2] = FFTW(mpi_plan_many_dft_c2r)( \ + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ + this->cv[2], this->rv[2], \ + MPI_COMM_WORLD, FFTW_ESTIMATE | FFTW_MPI_TRANSPOSED_IN); \ + \ + *(FFTW(plan)*)this->vr2c[1] = FFTW(mpi_plan_many_dft_r2c)( \ + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ + this->rv[1], this->cv[1], \ + MPI_COMM_WORLD, FFTW_ESTIMATE | FFTW_MPI_TRANSPOSED_OUT); \ + \ + *(FFTW(plan)*)this->vr2c[2] = FFTW(mpi_plan_many_dft_r2c)( \ + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ + this->rv[2], this->cv[2], \ + MPI_COMM_WORLD, FFTW_ESTIMATE | FFTW_MPI_TRANSPOSED_OUT); \ + \ + /* wavenumbers etc */ \ + \ + this->dkx = DKX; \ + this->dky = DKY; \ + this->dkz = DKZ; \ + this->kx = new double[this->cd->sizes[2]]; \ + this->ky = new double[this->cd->subsizes[0]]; \ + this->kz = new double[this->cd->sizes[1]]; \ + this->knullx = new bool[this->cd->sizes[2]]; \ + this->knully = new bool[this->cd->subsizes[0]]; \ + this->knullz = new bool[this->cd->sizes[1]]; \ + this->nonzerokx = int(this->rd->sizes[2] / 3); \ + this->kMx = this->dkx*(this->nonzerokx-1); \ + this->nonzeroky = int(this->rd->sizes[1] / 3); \ + this->kMy = this->dky*(this->nonzeroky-1); \ + this->nonzeroky = 2*this->nonzeroky - 1; \ + this->nonzerokz = int(this->rd->sizes[0] / 3); \ + this->kMz = this->dkz*(this->nonzerokz-1); \ + this->nonzerokz = 2*this->nonzerokz - 1; \ + int i, ii; \ + for (i = 0; i<this->cd->sizes[2]; i++) \ + { \ + this->kx[i] = i*this->dkx; \ + if (i < this->nonzerokx) \ + this->knullx[i] = false; \ + else \ + this->knullx[i] = true; \ + } \ + for (i = 0; i<this->cd->subsizes[0]; i++) \ + { \ + int tval = (this->nonzeroky+1)/2; \ + ii = i + this->cd->starts[0]; \ + if (ii <= this->rd->sizes[1]/2) \ + this->ky[i] = this->dky*ii; \ + else \ + this->ky[i] = this->dky*(ii - this->rd->sizes[1]); \ + if (ii < tval || (this->rd->sizes[1] - ii) < tval) \ + this->knully[i] = false; \ + else \ + this->knully[i] = true; \ + } \ + for (i = 0; i<this->cd->sizes[1]; i++) \ + { \ + int tval = (this->nonzerokz+1)/2; \ + if (i <= this->rd->sizes[0]/2) \ + this->kz[i] = this->dkz*i; \ + else \ + this->kz[i] = this->dkz*(i - this->rd->sizes[0]); \ + if (i < tval || (this->rd->sizes[0] - i) < tval) \ + this->knullz[i] = false; \ + else \ + this->knullz[i] = true; \ + } \ } \ \ template<> \ fluid_solver<R>::~fluid_solver() \ { \ + \ + delete this->kx;\ + delete this->ky;\ + delete this->kz;\ + delete this->knullx;\ + delete this->knully;\ + delete this->knullz;\ + \ FFTW(destroy_plan)(*(FFTW(plan)*)this->c2r_vorticity);\ FFTW(destroy_plan)(*(FFTW(plan)*)this->r2c_vorticity);\ FFTW(destroy_plan)(*(FFTW(plan)*)this->c2r_velocity );\ FFTW(destroy_plan)(*(FFTW(plan)*)this->r2c_velocity );\ + FFTW(destroy_plan)(*(FFTW(plan)*)this->vc2r[1]);\ + FFTW(destroy_plan)(*(FFTW(plan)*)this->vr2c[1]);\ + FFTW(destroy_plan)(*(FFTW(plan)*)this->vc2r[2]);\ + FFTW(destroy_plan)(*(FFTW(plan)*)this->vr2c[2]);\ \ delete (FFTW(plan)*)this->c2r_vorticity;\ delete (FFTW(plan)*)this->r2c_vorticity;\ delete (FFTW(plan)*)this->c2r_velocity ;\ delete (FFTW(plan)*)this->r2c_velocity ;\ + delete (FFTW(plan)*)this->vc2r[1];\ + delete (FFTW(plan)*)this->vr2c[1];\ + delete (FFTW(plan)*)this->vc2r[2];\ + delete (FFTW(plan)*)this->vr2c[2];\ \ + FFTW(free)(this->cv[1]);\ + FFTW(free)(this->cv[2]);\ FFTW(free)(this->cvorticity);\ FFTW(free)(this->rvorticity);\ FFTW(free)(this->cvelocity);\ @@ -116,8 +247,37 @@ fluid_solver<R>::~fluid_solver() \ } \ \ template<> \ -void fluid_solver<R>::step() \ -{} +void fluid_solver<R>::omega_nonlin( \ + int src) \ +{ \ + assert(src >= 0 && src < 3); \ + double k2; \ + /* compute velocity field */ \ + CLOOP( \ + k2 = (this->kx[xindex]*this->kx[xindex] + \ + this->ky[yindex]*this->ky[yindex] + \ + this->kz[zindex]*this->kz[zindex]); \ + this->cu[cindex*3 + 0][0] = (this->ky[yindex]*this->cv[src][cindex*3+2][1] - this->kz[zindex]*this->cv[src][cindex*3+1][1]) / k2; \ + this->cu[cindex*3 + 1][0] = (this->kz[zindex]*this->cv[src][cindex*3+0][1] - this->kx[xindex]*this->cv[src][cindex*3+2][1]) / k2; \ + this->cu[cindex*3 + 2][0] = (this->kx[xindex]*this->cv[src][cindex*3+1][1] - this->ky[yindex]*this->cv[src][cindex*3+0][1]) / k2; \ + this->cu[cindex*3 + 0][1] = (this->ky[yindex]*this->cv[src][cindex*3+2][0] - this->kz[zindex]*this->cv[src][cindex*3+1][0]) / k2; \ + this->cu[cindex*3 + 1][1] = (this->kz[zindex]*this->cv[src][cindex*3+0][0] - this->kx[xindex]*this->cv[src][cindex*3+2][0]) / k2; \ + this->cu[cindex*3 + 2][1] = (this->kx[xindex]*this->cv[src][cindex*3+1][0] - this->ky[yindex]*this->cv[src][cindex*3+0][0]) / k2; \ + ); \ + /* $\imath k \times DFT(u \times \omega)$ */ \ + /* get fields from Fourier space to real space */ \ + FFTW(execute)(*((FFTW(plan)*)this->r2c_vorticity)); \ + FFTW(execute)(*((FFTW(plan)*)this->r2c_velocity )); \ + /* compute cross product into vr */ \ + /* get cross product back into Fourier space */ \ + FFTW(execute)(*((FFTW(plan)*)this->c2r_velocity)); \ + /* dealiasing */ \ +} \ + \ +template<> \ +void fluid_solver<R>::step(double dt) \ +{ \ +} /*****************************************************************************/