Commit 6bdd1e1d authored by Cristian Lalescu's avatar Cristian Lalescu
Browse files

Merge branch 'feature/alternative-symmetrize' into develop

parents eae27b03 2a6a915f
Pipeline #108919 passed with stages
in 27 minutes and 49 seconds
......@@ -1502,6 +1502,24 @@ void field<rnumber, be, fc>::Hermitian_reflect()
finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
}
/** \brief Enforce Hermitian symmetry (slow, reference)
*
* TurTLE uses real-to-complex and complex-to-real FFTW transforms, because the
* equations are PDEs of real-valued fields.
* Hermitian symmetry means that Fourier-transformed real valued fields must
* respect the equation \f$\hat f(-\mathbf{k}) = {\hat f}^*(\mathbf{k})\f$.
*
* FFTW enforces this property mainly by only storing the positive half of the
* Fourier grid for the fastest array component. In TurTLE's case, this means
* \f$ k_x \f$.
* For the \f$ k_x = 0 \f$ plane, the symmetry must be enforced.
*
* This method uses a pair of backwards-forwards FFTs, which leads to FFTW
* effectively imposing Hermitian symmetry. It should be used as a reference
* implementation to calibrate against in the general case.
*
* */
template <typename rnumber,
field_backend be,
field_components fc>
......@@ -1557,10 +1575,33 @@ void field<rnumber, be, fc>::symmetrize_FFT()
return;
}
/** \brief Enforce Hermitian symmetry (unoptimized, amplitude-aware)
*
* TurTLE uses real-to-complex and complex-to-real FFTW transforms, because the
* equations are PDEs of real-valued fields.
* Hermitian symmetry means that Fourier-transformed real valued fields must
* respect the equation \f$\hat f(-\mathbf{k}) = {\hat f}^*(\mathbf{k})\f$.
*
* FFTW enforces this property mainly by only storing the positive half of the
* Fourier grid for the fastest array component. In TurTLE's case, this means
* \f$ k_x \f$.
* For the \f$ k_x = 0 \f$ plane, the symmetry must be enforced.
*
* This method is an alternative to the default arithmetic mean method, meant
* to be used in special circumstances where it is important to retain the
* exact amplitude of modes.
* Rather than an arithmetic mean, here we compute the amplitudes and phases
* for the \f$(0, k_y, k_z)\f$ and \f$(0, -k_y, -k_z)\f$ modes. We then compute
* a mean amplitude as the square root of the product of the two amplitudes,
* and a mean phase as the arithmetic mean of the two phases.
* When this method is applied to a field with fixed amplitudes, but random
* phases, it should preserve the spectrum of the initial field exactly.
*
* */
template <typename rnumber,
field_backend be,
field_components fc>
void field<rnumber, be, fc>::symmetrize()
void field<rnumber, be, fc>::symmetrize_alternate()
{
TIMEZONE("field::symmetrize");
start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
......@@ -1701,7 +1742,6 @@ void field<rnumber, be, fc>::symmetrize()
if (this->clayout->myrank == rankp)
{
ptrdiff_t iyy = iy - this->clayout->starts[0];
#pragma omp simd
for (ptrdiff_t iz = zstart; iz < zend; iz++)
{
// modulo operation makes sense only for slab decomposition
......@@ -1732,7 +1772,6 @@ void field<rnumber, be, fc>::symmetrize()
if (this->clayout->myrank == rankm)
{
ptrdiff_t iyy = (this->clayout->sizes[0] - iy) - this->clayout->starts[0];
#pragma omp simd
for (ptrdiff_t iz = zstart; iz < zend; iz++)
{
// modulo operation makes sense only for slab decomposition
......@@ -1768,6 +1807,249 @@ void field<rnumber, be, fc>::symmetrize()
delete mpistatus;
// symmetrize kx = 0, ky = 0 line
if (this->clayout->myrank == this->clayout->rank[0][0])
{
for (ptrdiff_t iz = 1; iz < ptrdiff_t(this->clayout->sizes[1]/2); iz++)
{
ptrdiff_t cindex0 = this->get_cindex(0, 0, iz);
ptrdiff_t cindex1 = this->get_cindex(0, 0, this->clayout->sizes[1] - iz);
for (int cc = 0; cc < int(ncomp(fc)); cc++)
{
double re = ((*(cdata + cc + ncomp(fc)*cindex0))[0] + (*(cdata + cc + ncomp(fc)*cindex1))[0])/2;
double im = ((*(cdata + cc + ncomp(fc)*cindex0))[1] - (*(cdata + cc + ncomp(fc)*cindex1))[1])/2;
const double ampp = sqrt(
(*(cdata + cc + ncomp(fc)*cindex0))[0]*(*(cdata + cc + ncomp(fc)*cindex0))[0] +
(*(cdata + cc + ncomp(fc)*cindex0))[1]*(*(cdata + cc + ncomp(fc)*cindex0))[1]);
const double phip = atan2(
(*(cdata + cc + ncomp(fc)*cindex0))[1],
(*(cdata + cc + ncomp(fc)*cindex0))[0]);
const double ampm = sqrt(
(*(cdata + cc + ncomp(fc)*cindex1))[0]*(*(cdata + cc + ncomp(fc)*cindex1))[0] +
(*(cdata + cc + ncomp(fc)*cindex1))[1]*(*(cdata + cc + ncomp(fc)*cindex1))[1]);
const double phim = atan2(
(*(cdata + cc + ncomp(fc)*cindex1))[1],
(*(cdata + cc + ncomp(fc)*cindex1))[0]);
const double amp = sqrt(ampp*ampm);
const double phi = (phip - phim)/2;
(*(cdata + cc + ncomp(fc)*cindex0))[0] = amp*cos(phi);
(*(cdata + cc + ncomp(fc)*cindex0))[1] = amp*sin(phi);
(*(cdata + cc + ncomp(fc)*cindex1))[0] = amp*cos(phi);
(*(cdata + cc + ncomp(fc)*cindex1))[1] = -amp*sin(phi);
}
}
}
finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
}
/** \brief Enforce Hermitian symmetry (fast and reasonable)
*
* TurTLE uses real-to-complex and complex-to-real FFTW transforms, because the
* equations are PDEs of real-valued fields.
* Hermitian symmetry means that Fourier-transformed real valued fields must
* respect the equation \f$\hat f(-\mathbf{k}) = {\hat f}^*(\mathbf{k})\f$.
*
* FFTW enforces this property mainly by only storing the positive half of the
* Fourier grid for the fastest array component. In TurTLE's case, this means
* \f$ k_x \f$.
* For the \f$ k_x = 0 \f$ plane, the symmetry must be enforced.
*
* This method uses an arithmetic mean of the \f$ (0, k_y, k_z)\f$ mode and
* the conjugate of the \f$(0, -k_y, -k_z) \f$ mode to generate the desired
* values. The method is fast (other than the required MPI communications).
*
* Note: the method is adequate either in cases where deviations from
* Hermitian symmetry are small, or in cases where deviations from correct
* physics is irrelevant.
* In practice: initial condition fields may be strongly perturbed by the
* application of this method, but they are unphysical anyway; during
* the quasistationary regime of some simulation, the method is applied
* regularly to all relevant fields, and deviations are expected to be small,
* i.e. effect on PDE approximations is negligible.
*
* */
template <typename rnumber,
field_backend be,
field_components fc>
void field<rnumber, be, fc>::symmetrize()
{
TIMEZONE("field::symmetrize");
start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
assert(!this->real_space_representation);
typename fftw_interface<rnumber>::complex *cdata = this->get_cdata();
// make 0 mode real
if (this->myrank == this->clayout->rank[0][0])
{
for (ptrdiff_t cc = 0; cc < ncomp(fc); cc++)
cdata[cc][1] = 0.0;
}
// put kx = nx/2 modes to 0
for (ptrdiff_t iy = 0; iy < ptrdiff_t(this->clayout->subsizes[0]); iy++)
for (ptrdiff_t iz = 0; iz < ptrdiff_t(this->clayout->subsizes[1]); iz++)
{
ptrdiff_t cindex = this->get_cindex(this->clayout->sizes[2]-1, iy, iz);
for (int cc = 0; cc < int(ncomp(fc)); cc++) {
(*(cdata + cc + ncomp(fc)*cindex))[0] = 0.0;
(*(cdata + cc + ncomp(fc)*cindex))[1] = 0.0;
}
}
// put ky = ny/2 modes to 0
if (this->clayout->myrank == this->clayout->rank[0][this->clayout->sizes[0]/2])
{
for (ptrdiff_t iz = 0; iz < ptrdiff_t(this->clayout->subsizes[1]); iz++)
for (ptrdiff_t ix = 0; ix < ptrdiff_t(this->clayout->subsizes[2]); ix++)
{
ptrdiff_t cindex = this->get_cindex(ix, this->clayout->sizes[0]/2-this->clayout->starts[0], iz);
for (int cc = 0; cc < int(ncomp(fc)); cc++) {
(*(cdata + cc + ncomp(fc)*cindex))[0] = 0.0;
(*(cdata + cc + ncomp(fc)*cindex))[1] = 0.0;
}
}
}
// put kz = nz/2 modes to 0
for (ptrdiff_t iy = 0; iy < ptrdiff_t(this->clayout->subsizes[0]); iy++)
for (ptrdiff_t ix = 0; ix < ptrdiff_t(this->clayout->subsizes[2]); ix++)
{
ptrdiff_t cindex = this->get_cindex(ix, iy, this->clayout->sizes[1]/2);
for (int cc = 0; cc < int(ncomp(fc)); cc++) {
(*(cdata + cc + ncomp(fc)*cindex))[0] = 0.0;
(*(cdata + cc + ncomp(fc)*cindex))[1] = 0.0;
}
}
// symmetrize kx = 0 plane, line by line, for ky != 0
MPI_Status *mpistatus = new MPI_Status;
// bufferp will hold data from "plus", i.e. iy
// bufferm will hold data from "minus", i.e. ny - iy
typename fftw_interface<rnumber>::complex *bufferp = new typename fftw_interface<rnumber>::complex[ncomp(fc)*this->clayout->sizes[1]];
typename fftw_interface<rnumber>::complex *bufferm = new typename fftw_interface<rnumber>::complex[ncomp(fc)*this->clayout->sizes[1]];
int rankp, rankm;
// for each ky slice except ky=0
for (ptrdiff_t iy = 1; iy < ptrdiff_t(this->clayout->sizes[0]/2); iy++)
{
// read rank plus and rank minus
rankp = this->clayout->rank[0][iy];
rankm = this->clayout->rank[0][this->clayout->sizes[0] - iy];
// if my rank is plus or minus, then I should do actual work
if (this->clayout->myrank == rankp ||
this->clayout->myrank == rankm)
{
#pragma omp parallel
{
const ptrdiff_t zstart = ptrdiff_t(OmpUtils::ForIntervalStart(this->clayout->sizes[1]));
const ptrdiff_t zend = ptrdiff_t(OmpUtils::ForIntervalEnd(this->clayout->sizes[1]));
// if my rank is rank plus, I should fill out the plus buffer
if (this->clayout->myrank == rankp)
{
ptrdiff_t iyy = iy - this->clayout->starts[0];
#pragma omp simd
for (ptrdiff_t iz = zstart; iz < zend; iz++)
{
ptrdiff_t cindex = this->get_cindex(0, iyy, iz);
for (int cc = 0; cc < int(ncomp(fc)); cc++)
for (int imag_comp=0; imag_comp<2; imag_comp++)
(*(bufferp + ncomp(fc)*iz+cc))[imag_comp] =
(*(cdata + ncomp(fc)*cindex + cc))[imag_comp];
}
}
// if my rank is rank minus, I should fill out the minus buffer
if (this->clayout->myrank == rankm)
{
ptrdiff_t iyy = (this->clayout->sizes[0] - iy) - this->clayout->starts[0];
#pragma omp simd
for (ptrdiff_t iz = zstart; iz < zend; iz++)
{
ptrdiff_t cindex = this->get_cindex(0, iyy, iz);
for (int cc = 0; cc < int(ncomp(fc)); cc++)
for (int imag_comp=0; imag_comp<2; imag_comp++)
(*(bufferm + ncomp(fc)*iz+cc))[imag_comp] =
(*(cdata + ncomp(fc)*cindex + cc))[imag_comp];
}
}
// after filling out buffers, synchronize threads
#pragma omp barrier
// if ranks are different, send and receive
if (rankp != rankm)
{
// if my rank is rank plus, send buffer plus, receive buffer minus
if (this->clayout->myrank == rankp && omp_get_thread_num() == 0)
{
MPI_Send((void*)bufferp,
ncomp(fc)*this->clayout->sizes[1],
mpi_real_type<rnumber>::complex(),
rankm, 2*iy+0,
this->clayout->comm);
MPI_Recv((void*)bufferm,
ncomp(fc)*this->clayout->sizes[1],
mpi_real_type<rnumber>::complex(),
rankm, 2*iy+1,
this->clayout->comm,
mpistatus);
}
// if my rank is rank minus, receive buffer plus, send buffer minus
if (this->clayout->myrank == rankm && omp_get_thread_num() == 0)
{
MPI_Recv((void*)bufferp,
ncomp(fc)*this->clayout->sizes[1],
mpi_real_type<rnumber>::complex(),
rankp, 2*iy+0,
this->clayout->comm,
mpistatus);
MPI_Send((void*)bufferm,
ncomp(fc)*this->clayout->sizes[1],
mpi_real_type<rnumber>::complex(),
rankp, 2*iy+1,
this->clayout->comm);
}
}
// ensure buffers are updated by MPI transfer before threads
// start working again
#pragma omp barrier
// if I my rank is either plus or minus, I should update my slice
if (this->clayout->myrank == rankp)
{
ptrdiff_t iyy = iy - this->clayout->starts[0];
#pragma omp simd
for (ptrdiff_t iz = zstart; iz < zend; iz++)
{
// modulo operation makes sense only for slab decomposition
ptrdiff_t izz = (this->clayout->sizes[1] - iz) % this->clayout->sizes[1];
ptrdiff_t cindex = this->get_cindex(0, iyy, iz);
for (int cc = 0; cc < int(ncomp(fc)); cc++)
{
(*(cdata + ncomp(fc)*cindex + cc))[0] = ((*(bufferp + ncomp(fc)*iz+cc))[0] + (*(bufferm + ncomp(fc)*izz+cc))[0])/2;
(*(cdata + ncomp(fc)*cindex + cc))[1] = ((*(bufferp + ncomp(fc)*iz+cc))[1] - (*(bufferm + ncomp(fc)*izz+cc))[1])/2;
}
}
}
// if I my rank is either plus or minus, I should update my slice
if (this->clayout->myrank == rankm)
{
ptrdiff_t iyy = (this->clayout->sizes[0] - iy) - this->clayout->starts[0];
#pragma omp simd
for (ptrdiff_t iz = zstart; iz < zend; iz++)
{
// modulo operation makes sense only for slab decomposition
ptrdiff_t izz = (this->clayout->sizes[1] - iz) % this->clayout->sizes[1];
ptrdiff_t cindex = this->get_cindex(0, iyy, izz);
for (int cc = 0; cc < int(ncomp(fc)); cc++)
{
(*(cdata + ncomp(fc)*cindex + cc))[0] = ((*(bufferp + ncomp(fc)*iz+cc))[0] + (*(bufferm + ncomp(fc)*izz+cc))[0])/2;
(*(cdata + ncomp(fc)*cindex + cc))[1] = -((*(bufferp + ncomp(fc)*iz+cc))[1] - (*(bufferm + ncomp(fc)*izz+cc))[1])/2;
}
}
}
}// end omp parallel region
}// end if
}
//fftw_interface<rnumber>::free(buffer);
delete[] bufferp;
delete[] bufferm;
delete mpistatus;
// symmetrize kx = 0, ky = 0 line
if (this->clayout->myrank == this->clayout->rank[0][0])
{
for (ptrdiff_t iz = 1; iz < ptrdiff_t(this->clayout->sizes[1]/2); iz++)
{
......
......@@ -123,6 +123,7 @@ class field
void normalize();
void symmetrize();
void symmetrize_FFT();
void symmetrize_alternate();
void Hermitian_reflect();
/* stats */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment