diff --git a/cpp/field.cpp b/cpp/field.cpp index 65ac42990535a7a9d7964d87dcce680ec7843ec5..a10b9e964209e40e9ddc301a28145730b4de643a 100644 --- a/cpp/field.cpp +++ b/cpp/field.cpp @@ -1502,6 +1502,24 @@ void field<rnumber, be, fc>::Hermitian_reflect() finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD); } + +/** \brief Enforce Hermitian symmetry (slow, reference) + * + * TurTLE uses real-to-complex and complex-to-real FFTW transforms, because the + * equations are PDEs of real-valued fields. + * Hermitian symmetry means that Fourier-transformed real valued fields must + * respect the equation \f$\hat f(-\mathbf{k}) = {\hat f}^*(\mathbf{k})\f$. + * + * FFTW enforces this property mainly by only storing the positive half of the + * Fourier grid for the fastest array component. In TurTLE's case, this means + * \f$ k_x \f$. + * For the \f$ k_x = 0 \f$ plane, the symmetry must be enforced. + * + * This method uses a pair of backwards-forwards FFTs, which leads to FFTW + * effectively imposing Hermitian symmetry. It should be used as a reference + * implementation to calibrate against in the general case. + * + * */ template <typename rnumber, field_backend be, field_components fc> @@ -1557,10 +1575,33 @@ void field<rnumber, be, fc>::symmetrize_FFT() return; } +/** \brief Enforce Hermitian symmetry (unoptimized, amplitude-aware) + * + * TurTLE uses real-to-complex and complex-to-real FFTW transforms, because the + * equations are PDEs of real-valued fields. + * Hermitian symmetry means that Fourier-transformed real valued fields must + * respect the equation \f$\hat f(-\mathbf{k}) = {\hat f}^*(\mathbf{k})\f$. + * + * FFTW enforces this property mainly by only storing the positive half of the + * Fourier grid for the fastest array component. In TurTLE's case, this means + * \f$ k_x \f$. + * For the \f$ k_x = 0 \f$ plane, the symmetry must be enforced. + * + * This method is an alternative to the default arithmetic mean method, meant + * to be used in special circumstances where it is important to retain the + * exact amplitude of modes. + * Rather than an arithmetic mean, here we compute the amplitudes and phases + * for the \f$(0, k_y, k_z)\f$ and \f$(0, -k_y, -k_z)\f$ modes. We then compute + * a mean amplitude as the square root of the product of the two amplitudes, + * and a mean phase as the arithmetic mean of the two phases. + * When this method is applied to a field with fixed amplitudes, but random + * phases, it should preserve the spectrum of the initial field exactly. + * + * */ template <typename rnumber, field_backend be, field_components fc> -void field<rnumber, be, fc>::symmetrize() +void field<rnumber, be, fc>::symmetrize_alternate() { TIMEZONE("field::symmetrize"); start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD); @@ -1701,7 +1742,6 @@ void field<rnumber, be, fc>::symmetrize() if (this->clayout->myrank == rankp) { ptrdiff_t iyy = iy - this->clayout->starts[0]; - #pragma omp simd for (ptrdiff_t iz = zstart; iz < zend; iz++) { // modulo operation makes sense only for slab decomposition @@ -1732,7 +1772,6 @@ void field<rnumber, be, fc>::symmetrize() if (this->clayout->myrank == rankm) { ptrdiff_t iyy = (this->clayout->sizes[0] - iy) - this->clayout->starts[0]; - #pragma omp simd for (ptrdiff_t iz = zstart; iz < zend; iz++) { // modulo operation makes sense only for slab decomposition @@ -1768,6 +1807,249 @@ void field<rnumber, be, fc>::symmetrize() delete mpistatus; // symmetrize kx = 0, ky = 0 line if (this->clayout->myrank == this->clayout->rank[0][0]) + { + for (ptrdiff_t iz = 1; iz < ptrdiff_t(this->clayout->sizes[1]/2); iz++) + { + ptrdiff_t cindex0 = this->get_cindex(0, 0, iz); + ptrdiff_t cindex1 = this->get_cindex(0, 0, this->clayout->sizes[1] - iz); + for (int cc = 0; cc < int(ncomp(fc)); cc++) + { + double re = ((*(cdata + cc + ncomp(fc)*cindex0))[0] + (*(cdata + cc + ncomp(fc)*cindex1))[0])/2; + double im = ((*(cdata + cc + ncomp(fc)*cindex0))[1] - (*(cdata + cc + ncomp(fc)*cindex1))[1])/2; + const double ampp = sqrt( + (*(cdata + cc + ncomp(fc)*cindex0))[0]*(*(cdata + cc + ncomp(fc)*cindex0))[0] + + (*(cdata + cc + ncomp(fc)*cindex0))[1]*(*(cdata + cc + ncomp(fc)*cindex0))[1]); + const double phip = atan2( + (*(cdata + cc + ncomp(fc)*cindex0))[1], + (*(cdata + cc + ncomp(fc)*cindex0))[0]); + const double ampm = sqrt( + (*(cdata + cc + ncomp(fc)*cindex1))[0]*(*(cdata + cc + ncomp(fc)*cindex1))[0] + + (*(cdata + cc + ncomp(fc)*cindex1))[1]*(*(cdata + cc + ncomp(fc)*cindex1))[1]); + const double phim = atan2( + (*(cdata + cc + ncomp(fc)*cindex1))[1], + (*(cdata + cc + ncomp(fc)*cindex1))[0]); + const double amp = sqrt(ampp*ampm); + const double phi = (phip - phim)/2; + (*(cdata + cc + ncomp(fc)*cindex0))[0] = amp*cos(phi); + (*(cdata + cc + ncomp(fc)*cindex0))[1] = amp*sin(phi); + (*(cdata + cc + ncomp(fc)*cindex1))[0] = amp*cos(phi); + (*(cdata + cc + ncomp(fc)*cindex1))[1] = -amp*sin(phi); + } + } + } + finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD); +} + +/** \brief Enforce Hermitian symmetry (fast and reasonable) + * + * TurTLE uses real-to-complex and complex-to-real FFTW transforms, because the + * equations are PDEs of real-valued fields. + * Hermitian symmetry means that Fourier-transformed real valued fields must + * respect the equation \f$\hat f(-\mathbf{k}) = {\hat f}^*(\mathbf{k})\f$. + * + * FFTW enforces this property mainly by only storing the positive half of the + * Fourier grid for the fastest array component. In TurTLE's case, this means + * \f$ k_x \f$. + * For the \f$ k_x = 0 \f$ plane, the symmetry must be enforced. + * + * This method uses an arithmetic mean of the \f$ (0, k_y, k_z)\f$ mode and + * the conjugate of the \f$(0, -k_y, -k_z) \f$ mode to generate the desired + * values. The method is fast (other than the required MPI communications). + * + * Note: the method is adequate either in cases where deviations from + * Hermitian symmetry are small, or in cases where deviations from correct + * physics is irrelevant. + * In practice: initial condition fields may be strongly perturbed by the + * application of this method, but they are unphysical anyway; during + * the quasistationary regime of some simulation, the method is applied + * regularly to all relevant fields, and deviations are expected to be small, + * i.e. effect on PDE approximations is negligible. + * + * */ + +template <typename rnumber, + field_backend be, + field_components fc> +void field<rnumber, be, fc>::symmetrize() +{ + TIMEZONE("field::symmetrize"); + start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD); + assert(!this->real_space_representation); + typename fftw_interface<rnumber>::complex *cdata = this->get_cdata(); + + // make 0 mode real + if (this->myrank == this->clayout->rank[0][0]) + { + for (ptrdiff_t cc = 0; cc < ncomp(fc); cc++) + cdata[cc][1] = 0.0; + } + // put kx = nx/2 modes to 0 + for (ptrdiff_t iy = 0; iy < ptrdiff_t(this->clayout->subsizes[0]); iy++) + for (ptrdiff_t iz = 0; iz < ptrdiff_t(this->clayout->subsizes[1]); iz++) + { + ptrdiff_t cindex = this->get_cindex(this->clayout->sizes[2]-1, iy, iz); + for (int cc = 0; cc < int(ncomp(fc)); cc++) { + (*(cdata + cc + ncomp(fc)*cindex))[0] = 0.0; + (*(cdata + cc + ncomp(fc)*cindex))[1] = 0.0; + } + } + // put ky = ny/2 modes to 0 + if (this->clayout->myrank == this->clayout->rank[0][this->clayout->sizes[0]/2]) + { + for (ptrdiff_t iz = 0; iz < ptrdiff_t(this->clayout->subsizes[1]); iz++) + for (ptrdiff_t ix = 0; ix < ptrdiff_t(this->clayout->subsizes[2]); ix++) + { + ptrdiff_t cindex = this->get_cindex(ix, this->clayout->sizes[0]/2-this->clayout->starts[0], iz); + for (int cc = 0; cc < int(ncomp(fc)); cc++) { + (*(cdata + cc + ncomp(fc)*cindex))[0] = 0.0; + (*(cdata + cc + ncomp(fc)*cindex))[1] = 0.0; + } + } + } + // put kz = nz/2 modes to 0 + for (ptrdiff_t iy = 0; iy < ptrdiff_t(this->clayout->subsizes[0]); iy++) + for (ptrdiff_t ix = 0; ix < ptrdiff_t(this->clayout->subsizes[2]); ix++) + { + ptrdiff_t cindex = this->get_cindex(ix, iy, this->clayout->sizes[1]/2); + for (int cc = 0; cc < int(ncomp(fc)); cc++) { + (*(cdata + cc + ncomp(fc)*cindex))[0] = 0.0; + (*(cdata + cc + ncomp(fc)*cindex))[1] = 0.0; + } + } + + // symmetrize kx = 0 plane, line by line, for ky != 0 + MPI_Status *mpistatus = new MPI_Status; + // bufferp will hold data from "plus", i.e. iy + // bufferm will hold data from "minus", i.e. ny - iy + typename fftw_interface<rnumber>::complex *bufferp = new typename fftw_interface<rnumber>::complex[ncomp(fc)*this->clayout->sizes[1]]; + typename fftw_interface<rnumber>::complex *bufferm = new typename fftw_interface<rnumber>::complex[ncomp(fc)*this->clayout->sizes[1]]; + int rankp, rankm; + // for each ky slice except ky=0 + for (ptrdiff_t iy = 1; iy < ptrdiff_t(this->clayout->sizes[0]/2); iy++) + { + // read rank plus and rank minus + rankp = this->clayout->rank[0][iy]; + rankm = this->clayout->rank[0][this->clayout->sizes[0] - iy]; + // if my rank is plus or minus, then I should do actual work + if (this->clayout->myrank == rankp || + this->clayout->myrank == rankm) + { + #pragma omp parallel + { + const ptrdiff_t zstart = ptrdiff_t(OmpUtils::ForIntervalStart(this->clayout->sizes[1])); + const ptrdiff_t zend = ptrdiff_t(OmpUtils::ForIntervalEnd(this->clayout->sizes[1])); + // if my rank is rank plus, I should fill out the plus buffer + if (this->clayout->myrank == rankp) + { + ptrdiff_t iyy = iy - this->clayout->starts[0]; + #pragma omp simd + for (ptrdiff_t iz = zstart; iz < zend; iz++) + { + ptrdiff_t cindex = this->get_cindex(0, iyy, iz); + for (int cc = 0; cc < int(ncomp(fc)); cc++) + for (int imag_comp=0; imag_comp<2; imag_comp++) + (*(bufferp + ncomp(fc)*iz+cc))[imag_comp] = + (*(cdata + ncomp(fc)*cindex + cc))[imag_comp]; + } + } + // if my rank is rank minus, I should fill out the minus buffer + if (this->clayout->myrank == rankm) + { + ptrdiff_t iyy = (this->clayout->sizes[0] - iy) - this->clayout->starts[0]; + #pragma omp simd + for (ptrdiff_t iz = zstart; iz < zend; iz++) + { + ptrdiff_t cindex = this->get_cindex(0, iyy, iz); + for (int cc = 0; cc < int(ncomp(fc)); cc++) + for (int imag_comp=0; imag_comp<2; imag_comp++) + (*(bufferm + ncomp(fc)*iz+cc))[imag_comp] = + (*(cdata + ncomp(fc)*cindex + cc))[imag_comp]; + } + } + // after filling out buffers, synchronize threads + #pragma omp barrier + + // if ranks are different, send and receive + if (rankp != rankm) + { + // if my rank is rank plus, send buffer plus, receive buffer minus + if (this->clayout->myrank == rankp && omp_get_thread_num() == 0) + { + MPI_Send((void*)bufferp, + ncomp(fc)*this->clayout->sizes[1], + mpi_real_type<rnumber>::complex(), + rankm, 2*iy+0, + this->clayout->comm); + MPI_Recv((void*)bufferm, + ncomp(fc)*this->clayout->sizes[1], + mpi_real_type<rnumber>::complex(), + rankm, 2*iy+1, + this->clayout->comm, + mpistatus); + } + // if my rank is rank minus, receive buffer plus, send buffer minus + if (this->clayout->myrank == rankm && omp_get_thread_num() == 0) + { + MPI_Recv((void*)bufferp, + ncomp(fc)*this->clayout->sizes[1], + mpi_real_type<rnumber>::complex(), + rankp, 2*iy+0, + this->clayout->comm, + mpistatus); + MPI_Send((void*)bufferm, + ncomp(fc)*this->clayout->sizes[1], + mpi_real_type<rnumber>::complex(), + rankp, 2*iy+1, + this->clayout->comm); + } + } + // ensure buffers are updated by MPI transfer before threads + // start working again + #pragma omp barrier + + // if I my rank is either plus or minus, I should update my slice + if (this->clayout->myrank == rankp) + { + ptrdiff_t iyy = iy - this->clayout->starts[0]; + #pragma omp simd + for (ptrdiff_t iz = zstart; iz < zend; iz++) + { + // modulo operation makes sense only for slab decomposition + ptrdiff_t izz = (this->clayout->sizes[1] - iz) % this->clayout->sizes[1]; + ptrdiff_t cindex = this->get_cindex(0, iyy, iz); + for (int cc = 0; cc < int(ncomp(fc)); cc++) + { + (*(cdata + ncomp(fc)*cindex + cc))[0] = ((*(bufferp + ncomp(fc)*iz+cc))[0] + (*(bufferm + ncomp(fc)*izz+cc))[0])/2; + (*(cdata + ncomp(fc)*cindex + cc))[1] = ((*(bufferp + ncomp(fc)*iz+cc))[1] - (*(bufferm + ncomp(fc)*izz+cc))[1])/2; + } + } + } + // if I my rank is either plus or minus, I should update my slice + if (this->clayout->myrank == rankm) + { + ptrdiff_t iyy = (this->clayout->sizes[0] - iy) - this->clayout->starts[0]; + #pragma omp simd + for (ptrdiff_t iz = zstart; iz < zend; iz++) + { + // modulo operation makes sense only for slab decomposition + ptrdiff_t izz = (this->clayout->sizes[1] - iz) % this->clayout->sizes[1]; + ptrdiff_t cindex = this->get_cindex(0, iyy, izz); + for (int cc = 0; cc < int(ncomp(fc)); cc++) + { + (*(cdata + ncomp(fc)*cindex + cc))[0] = ((*(bufferp + ncomp(fc)*iz+cc))[0] + (*(bufferm + ncomp(fc)*izz+cc))[0])/2; + (*(cdata + ncomp(fc)*cindex + cc))[1] = -((*(bufferp + ncomp(fc)*iz+cc))[1] - (*(bufferm + ncomp(fc)*izz+cc))[1])/2; + } + } + } + }// end omp parallel region + }// end if + } + //fftw_interface<rnumber>::free(buffer); + delete[] bufferp; + delete[] bufferm; + delete mpistatus; + // symmetrize kx = 0, ky = 0 line + if (this->clayout->myrank == this->clayout->rank[0][0]) { for (ptrdiff_t iz = 1; iz < ptrdiff_t(this->clayout->sizes[1]/2); iz++) { diff --git a/cpp/field.hpp b/cpp/field.hpp index 547fabf466bc2e4c4997758d71ad5901436db827..a42d480111bd62339bf7d547d3c049685291de23 100644 --- a/cpp/field.hpp +++ b/cpp/field.hpp @@ -123,6 +123,7 @@ class field void normalize(); void symmetrize(); void symmetrize_FFT(); + void symmetrize_alternate(); void Hermitian_reflect(); /* stats */