diff --git a/cpp/field.cpp b/cpp/field.cpp
index 65ac42990535a7a9d7964d87dcce680ec7843ec5..a10b9e964209e40e9ddc301a28145730b4de643a 100644
--- a/cpp/field.cpp
+++ b/cpp/field.cpp
@@ -1502,6 +1502,24 @@ void field<rnumber, be, fc>::Hermitian_reflect()
     finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
 }
 
+
+/** \brief Enforce Hermitian symmetry (slow, reference)
+ *
+ * TurTLE uses real-to-complex and complex-to-real FFTW transforms, because the
+ * equations are PDEs of real-valued fields.
+ * Hermitian symmetry means that Fourier-transformed real valued fields must
+ * respect the equation \f$\hat f(-\mathbf{k}) = {\hat f}^*(\mathbf{k})\f$.
+ *
+ * FFTW enforces this property mainly by only storing the positive half of the
+ * Fourier grid for the fastest array component. In TurTLE's case, this means
+ * \f$ k_x \f$.
+ * For the \f$ k_x = 0 \f$ plane, the symmetry must be enforced.
+ *
+ * This method uses a pair of backwards-forwards FFTs, which leads to FFTW
+ * effectively imposing Hermitian symmetry. It should be used as a reference
+ * implementation to calibrate against in the general case.
+ *
+ * */
 template <typename rnumber,
           field_backend be,
           field_components fc>
@@ -1557,10 +1575,33 @@ void field<rnumber, be, fc>::symmetrize_FFT()
     return;
 }
 
+/** \brief Enforce Hermitian symmetry (unoptimized, amplitude-aware)
+ *
+ * TurTLE uses real-to-complex and complex-to-real FFTW transforms, because the
+ * equations are PDEs of real-valued fields.
+ * Hermitian symmetry means that Fourier-transformed real valued fields must
+ * respect the equation \f$\hat f(-\mathbf{k}) = {\hat f}^*(\mathbf{k})\f$.
+ *
+ * FFTW enforces this property mainly by only storing the positive half of the
+ * Fourier grid for the fastest array component. In TurTLE's case, this means
+ * \f$ k_x \f$.
+ * For the \f$ k_x = 0 \f$ plane, the symmetry must be enforced.
+ *
+ * This method is an alternative to the default arithmetic mean method, meant
+ * to be used in special circumstances where it is important to retain the
+ * exact amplitude of modes.
+ * Rather than an arithmetic mean, here we compute the amplitudes and phases
+ * for the \f$(0, k_y, k_z)\f$ and \f$(0, -k_y, -k_z)\f$ modes. We then compute
+ * a mean amplitude as the square root of the product of the two amplitudes,
+ * and a mean phase as the arithmetic mean of the two phases.
+ * When this method is applied to a field with fixed amplitudes, but random
+ * phases, it should preserve the spectrum of the initial field exactly.
+ *
+ * */
 template <typename rnumber,
           field_backend be,
           field_components fc>
-void field<rnumber, be, fc>::symmetrize()
+void field<rnumber, be, fc>::symmetrize_alternate()
 {
     TIMEZONE("field::symmetrize");
     start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
@@ -1701,7 +1742,6 @@ void field<rnumber, be, fc>::symmetrize()
             if (this->clayout->myrank == rankp)
             {
                 ptrdiff_t iyy = iy - this->clayout->starts[0];
-                #pragma omp simd
                 for (ptrdiff_t iz = zstart; iz < zend; iz++)
                 {
                     // modulo operation makes sense only for slab decomposition
@@ -1732,7 +1772,6 @@ void field<rnumber, be, fc>::symmetrize()
             if (this->clayout->myrank == rankm)
             {
                 ptrdiff_t iyy = (this->clayout->sizes[0] - iy) - this->clayout->starts[0];
-                #pragma omp simd
                 for (ptrdiff_t iz = zstart; iz < zend; iz++)
                 {
                     // modulo operation makes sense only for slab decomposition
@@ -1768,6 +1807,249 @@ void field<rnumber, be, fc>::symmetrize()
     delete mpistatus;
     // symmetrize kx = 0, ky = 0 line
     if (this->clayout->myrank == this->clayout->rank[0][0])
+    {
+        for (ptrdiff_t iz = 1; iz < ptrdiff_t(this->clayout->sizes[1]/2); iz++)
+        {
+            ptrdiff_t cindex0 = this->get_cindex(0, 0, iz);
+            ptrdiff_t cindex1 = this->get_cindex(0, 0, this->clayout->sizes[1] - iz);
+            for (int cc = 0; cc < int(ncomp(fc)); cc++)
+            {
+                double re = ((*(cdata + cc + ncomp(fc)*cindex0))[0] + (*(cdata + cc + ncomp(fc)*cindex1))[0])/2;
+                double im = ((*(cdata + cc + ncomp(fc)*cindex0))[1] - (*(cdata + cc + ncomp(fc)*cindex1))[1])/2;
+                const double ampp = sqrt(
+                        (*(cdata + cc + ncomp(fc)*cindex0))[0]*(*(cdata + cc + ncomp(fc)*cindex0))[0] +
+                        (*(cdata + cc + ncomp(fc)*cindex0))[1]*(*(cdata + cc + ncomp(fc)*cindex0))[1]);
+                const double phip = atan2(
+                        (*(cdata + cc + ncomp(fc)*cindex0))[1],
+                        (*(cdata + cc + ncomp(fc)*cindex0))[0]);
+                const double ampm = sqrt(
+                        (*(cdata + cc + ncomp(fc)*cindex1))[0]*(*(cdata + cc + ncomp(fc)*cindex1))[0] +
+                        (*(cdata + cc + ncomp(fc)*cindex1))[1]*(*(cdata + cc + ncomp(fc)*cindex1))[1]);
+                const double phim = atan2(
+                        (*(cdata + cc + ncomp(fc)*cindex1))[1],
+                        (*(cdata + cc + ncomp(fc)*cindex1))[0]);
+                const double amp = sqrt(ampp*ampm);
+                const double phi = (phip - phim)/2;
+                (*(cdata + cc + ncomp(fc)*cindex0))[0] =  amp*cos(phi);
+                (*(cdata + cc + ncomp(fc)*cindex0))[1] =  amp*sin(phi);
+                (*(cdata + cc + ncomp(fc)*cindex1))[0] =  amp*cos(phi);
+                (*(cdata + cc + ncomp(fc)*cindex1))[1] = -amp*sin(phi);
+            }
+        }
+    }
+    finish_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
+}
+
+/** \brief Enforce Hermitian symmetry (fast and reasonable)
+ *
+ * TurTLE uses real-to-complex and complex-to-real FFTW transforms, because the
+ * equations are PDEs of real-valued fields.
+ * Hermitian symmetry means that Fourier-transformed real valued fields must
+ * respect the equation \f$\hat f(-\mathbf{k}) = {\hat f}^*(\mathbf{k})\f$.
+ *
+ * FFTW enforces this property mainly by only storing the positive half of the
+ * Fourier grid for the fastest array component. In TurTLE's case, this means
+ * \f$ k_x \f$.
+ * For the \f$ k_x = 0 \f$ plane, the symmetry must be enforced.
+ *
+ * This method uses an arithmetic mean of the \f$ (0, k_y, k_z)\f$ mode and
+ * the conjugate of the \f$(0, -k_y, -k_z) \f$ mode to generate the desired
+ * values. The method is fast (other than the required MPI communications).
+ *
+ * Note: the method is adequate either in cases where deviations from
+ * Hermitian symmetry are small, or in cases where deviations from correct
+ * physics is irrelevant.
+ * In practice: initial condition fields may be strongly perturbed by the
+ * application of this method, but they are unphysical anyway; during
+ * the quasistationary regime of some simulation, the method is applied
+ * regularly to all relevant fields, and deviations are expected to be small,
+ * i.e. effect on PDE approximations is negligible.
+ *
+ * */
+
+template <typename rnumber,
+          field_backend be,
+          field_components fc>
+void field<rnumber, be, fc>::symmetrize()
+{
+    TIMEZONE("field::symmetrize");
+    start_mpi_profiling_zone(turtle_mpi_pcontrol::FIELD);
+    assert(!this->real_space_representation);
+    typename fftw_interface<rnumber>::complex *cdata = this->get_cdata();
+
+    // make 0 mode real
+    if (this->myrank == this->clayout->rank[0][0])
+    {
+        for (ptrdiff_t cc = 0; cc < ncomp(fc); cc++)
+            cdata[cc][1] = 0.0;
+    }
+    // put kx = nx/2 modes to 0
+    for (ptrdiff_t iy = 0; iy < ptrdiff_t(this->clayout->subsizes[0]); iy++)
+    for (ptrdiff_t iz = 0; iz < ptrdiff_t(this->clayout->subsizes[1]); iz++)
+    {
+        ptrdiff_t cindex = this->get_cindex(this->clayout->sizes[2]-1, iy, iz);
+        for (int cc = 0; cc < int(ncomp(fc)); cc++) {
+            (*(cdata + cc + ncomp(fc)*cindex))[0] = 0.0;
+            (*(cdata + cc + ncomp(fc)*cindex))[1] = 0.0;
+        }
+    }
+    // put ky = ny/2 modes to 0
+    if (this->clayout->myrank == this->clayout->rank[0][this->clayout->sizes[0]/2])
+    {
+        for (ptrdiff_t iz = 0; iz < ptrdiff_t(this->clayout->subsizes[1]); iz++)
+        for (ptrdiff_t ix = 0; ix < ptrdiff_t(this->clayout->subsizes[2]); ix++)
+        {
+            ptrdiff_t cindex = this->get_cindex(ix, this->clayout->sizes[0]/2-this->clayout->starts[0], iz);
+            for (int cc = 0; cc < int(ncomp(fc)); cc++) {
+                (*(cdata + cc + ncomp(fc)*cindex))[0] = 0.0;
+                (*(cdata + cc + ncomp(fc)*cindex))[1] = 0.0;
+            }
+        }
+    }
+    // put kz = nz/2 modes to 0
+    for (ptrdiff_t iy = 0; iy < ptrdiff_t(this->clayout->subsizes[0]); iy++)
+    for (ptrdiff_t ix = 0; ix < ptrdiff_t(this->clayout->subsizes[2]); ix++)
+    {
+        ptrdiff_t cindex = this->get_cindex(ix, iy, this->clayout->sizes[1]/2);
+        for (int cc = 0; cc < int(ncomp(fc)); cc++) {
+            (*(cdata + cc + ncomp(fc)*cindex))[0] = 0.0;
+            (*(cdata + cc + ncomp(fc)*cindex))[1] = 0.0;
+        }
+    }
+
+    // symmetrize kx = 0 plane, line by line, for ky != 0
+    MPI_Status *mpistatus = new MPI_Status;
+    // bufferp will hold data from "plus", i.e. iy
+    // bufferm will hold data from "minus", i.e. ny - iy
+    typename fftw_interface<rnumber>::complex *bufferp = new typename fftw_interface<rnumber>::complex[ncomp(fc)*this->clayout->sizes[1]];
+    typename fftw_interface<rnumber>::complex *bufferm = new typename fftw_interface<rnumber>::complex[ncomp(fc)*this->clayout->sizes[1]];
+    int rankp, rankm;
+    // for each ky slice except ky=0
+    for (ptrdiff_t iy = 1; iy < ptrdiff_t(this->clayout->sizes[0]/2); iy++)
+    {
+        // read rank plus and rank minus
+        rankp = this->clayout->rank[0][iy];
+        rankm = this->clayout->rank[0][this->clayout->sizes[0] - iy];
+        // if my rank is plus or minus, then I should do actual work
+        if (this->clayout->myrank == rankp ||
+            this->clayout->myrank == rankm)
+        {
+            #pragma omp parallel
+        {
+            const ptrdiff_t zstart = ptrdiff_t(OmpUtils::ForIntervalStart(this->clayout->sizes[1]));
+            const ptrdiff_t zend   = ptrdiff_t(OmpUtils::ForIntervalEnd(this->clayout->sizes[1]));
+            // if my rank is rank plus, I should fill out the plus buffer
+            if (this->clayout->myrank == rankp)
+            {
+                ptrdiff_t iyy = iy - this->clayout->starts[0];
+                #pragma omp simd
+                for (ptrdiff_t iz = zstart; iz < zend; iz++)
+                {
+                    ptrdiff_t cindex = this->get_cindex(0, iyy, iz);
+                    for (int cc = 0; cc < int(ncomp(fc)); cc++)
+                        for (int imag_comp=0; imag_comp<2; imag_comp++)
+                            (*(bufferp + ncomp(fc)*iz+cc))[imag_comp] =
+                                (*(cdata + ncomp(fc)*cindex + cc))[imag_comp];
+                }
+            }
+            // if my rank is rank minus, I should fill out the minus buffer
+            if (this->clayout->myrank == rankm)
+            {
+                ptrdiff_t iyy = (this->clayout->sizes[0] - iy) - this->clayout->starts[0];
+                #pragma omp simd
+                for (ptrdiff_t iz = zstart; iz < zend; iz++)
+                {
+                    ptrdiff_t cindex = this->get_cindex(0, iyy, iz);
+                    for (int cc = 0; cc < int(ncomp(fc)); cc++)
+                        for (int imag_comp=0; imag_comp<2; imag_comp++)
+                            (*(bufferm + ncomp(fc)*iz+cc))[imag_comp] =
+                                (*(cdata + ncomp(fc)*cindex + cc))[imag_comp];
+                }
+            }
+            // after filling out buffers, synchronize threads
+            #pragma omp barrier
+
+            // if ranks are different, send and receive
+            if (rankp != rankm)
+            {
+                // if my rank is rank plus, send buffer plus, receive buffer minus
+                if (this->clayout->myrank == rankp && omp_get_thread_num() == 0)
+                {
+                    MPI_Send((void*)bufferp,
+                             ncomp(fc)*this->clayout->sizes[1],
+                             mpi_real_type<rnumber>::complex(),
+                             rankm, 2*iy+0,
+                             this->clayout->comm);
+                    MPI_Recv((void*)bufferm,
+                             ncomp(fc)*this->clayout->sizes[1],
+                             mpi_real_type<rnumber>::complex(),
+                             rankm, 2*iy+1,
+                             this->clayout->comm,
+                             mpistatus);
+                }
+                // if my rank is rank minus, receive buffer plus, send buffer minus
+                if (this->clayout->myrank == rankm && omp_get_thread_num() == 0)
+                {
+                    MPI_Recv((void*)bufferp,
+                             ncomp(fc)*this->clayout->sizes[1],
+                             mpi_real_type<rnumber>::complex(),
+                             rankp, 2*iy+0,
+                             this->clayout->comm,
+                             mpistatus);
+                    MPI_Send((void*)bufferm,
+                             ncomp(fc)*this->clayout->sizes[1],
+                             mpi_real_type<rnumber>::complex(),
+                             rankp, 2*iy+1,
+                             this->clayout->comm);
+                }
+            }
+            // ensure buffers are updated by MPI transfer before threads
+            // start working again
+            #pragma omp barrier
+
+            // if I my rank is either plus or minus, I should update my slice
+            if (this->clayout->myrank == rankp)
+            {
+                ptrdiff_t iyy = iy - this->clayout->starts[0];
+                #pragma omp simd
+                for (ptrdiff_t iz = zstart; iz < zend; iz++)
+                {
+                    // modulo operation makes sense only for slab decomposition
+                    ptrdiff_t izz = (this->clayout->sizes[1] - iz) % this->clayout->sizes[1];
+                    ptrdiff_t cindex = this->get_cindex(0, iyy, iz);
+                    for (int cc = 0; cc < int(ncomp(fc)); cc++)
+                    {
+                        (*(cdata + ncomp(fc)*cindex + cc))[0] =  ((*(bufferp + ncomp(fc)*iz+cc))[0] + (*(bufferm + ncomp(fc)*izz+cc))[0])/2;
+                        (*(cdata + ncomp(fc)*cindex + cc))[1] =  ((*(bufferp + ncomp(fc)*iz+cc))[1] - (*(bufferm + ncomp(fc)*izz+cc))[1])/2;
+                    }
+                }
+            }
+            // if I my rank is either plus or minus, I should update my slice
+            if (this->clayout->myrank == rankm)
+            {
+                ptrdiff_t iyy = (this->clayout->sizes[0] - iy) - this->clayout->starts[0];
+                #pragma omp simd
+                for (ptrdiff_t iz = zstart; iz < zend; iz++)
+                {
+                    // modulo operation makes sense only for slab decomposition
+                    ptrdiff_t izz = (this->clayout->sizes[1] - iz) % this->clayout->sizes[1];
+                    ptrdiff_t cindex = this->get_cindex(0, iyy, izz);
+                    for (int cc = 0; cc < int(ncomp(fc)); cc++)
+                    {
+                        (*(cdata + ncomp(fc)*cindex + cc))[0] =  ((*(bufferp + ncomp(fc)*iz+cc))[0] + (*(bufferm + ncomp(fc)*izz+cc))[0])/2;
+                        (*(cdata + ncomp(fc)*cindex + cc))[1] = -((*(bufferp + ncomp(fc)*iz+cc))[1] - (*(bufferm + ncomp(fc)*izz+cc))[1])/2;
+                    }
+                }
+            }
+        }// end omp parallel region
+        }// end if
+    }
+    //fftw_interface<rnumber>::free(buffer);
+    delete[] bufferp;
+    delete[] bufferm;
+    delete mpistatus;
+    // symmetrize kx = 0, ky = 0 line
+    if (this->clayout->myrank == this->clayout->rank[0][0])
     {
         for (ptrdiff_t iz = 1; iz < ptrdiff_t(this->clayout->sizes[1]/2); iz++)
         {
diff --git a/cpp/field.hpp b/cpp/field.hpp
index 547fabf466bc2e4c4997758d71ad5901436db827..a42d480111bd62339bf7d547d3c049685291de23 100644
--- a/cpp/field.hpp
+++ b/cpp/field.hpp
@@ -123,6 +123,7 @@ class field
         void normalize();
         void symmetrize();
         void symmetrize_FFT();
+        void symmetrize_alternate();
         void Hermitian_reflect();
 
         /* stats */