/********************************************************************** * * * Copyright 2015 Max Planck Institute * * for Dynamics and Self-Organization * * * * This file is part of bfps. * * * * bfps is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published * * by the Free Software Foundation, either version 3 of the License, * * or (at your option) any later version. * * * * bfps is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with bfps. If not, see <http://www.gnu.org/licenses/> * * * * Contact: Cristian.Lalescu@ds.mpg.de * * * **********************************************************************/ //#define NDEBUG #include <cassert> #include <cmath> #include <cstring> #include "base.hpp" #include "fluid_solver_base.hpp" #include "fftw_tools.hpp" template <class rnumber> void fluid_solver_base<rnumber>::fill_up_filename(const char *base_name, char *destination) { sprintf(destination, "%s_%s_i%.5x", this->name, base_name, this->iteration); } template <class rnumber> void fluid_solver_base<rnumber>::clean_up_real_space(rnumber *a, int howmany) { for (ptrdiff_t rindex = 0; rindex < this->cd->local_size*2; rindex += howmany*(this->rd->subsizes[2]+2)) std::fill_n(a+rindex+this->rd->subsizes[2]*howmany, 2*howmany, 0.0); } template <class rnumber> double fluid_solver_base<rnumber>::autocorrel(cnumber *a) { double *spec = fftw_alloc_real(this->nshells*9); double sum_local; this->cospectrum(a, a, spec); sum_local = 0.0; for (int n = 0; n < this->nshells; n++) { sum_local += spec[n*9] + spec[n*9 + 4] + spec[n*9 + 8]; } fftw_free(spec); return sum_local; } template <class rnumber> void fluid_solver_base<rnumber>::cospectrum(cnumber *a, cnumber *b, double *spec) { double *cospec_local = fftw_alloc_real(this->nshells*9); std::fill_n(cospec_local, this->nshells*9, 0); int tmp_int; CLOOP_K2_NXMODES( this, if (k2 <= this->kMspec2) { tmp_int = int(sqrt(k2)/this->dk)*9; for (int i=0; i<3; i++) for (int j=0; j<3; j++) { cospec_local[tmp_int+i*3+j] += nxmodes * ( (*(a + 3*cindex+i))[0] * (*(b + 3*cindex+j))[0] + (*(a + 3*cindex+i))[1] * (*(b + 3*cindex+j))[1]); } } ); MPI_Allreduce( (void*)cospec_local, (void*)spec, this->nshells*9, MPI_DOUBLE, MPI_SUM, this->cd->comm); fftw_free(cospec_local); } template <class rnumber> void fluid_solver_base<rnumber>::cospectrum(cnumber *a, cnumber *b, double *spec, const double k2exponent) { double *cospec_local = fftw_alloc_real(this->nshells*9); std::fill_n(cospec_local, this->nshells*9, 0); double factor = 1; int tmp_int; CLOOP_K2_NXMODES( this, if (k2 <= this->kMspec2) { factor = nxmodes*pow(k2, k2exponent); tmp_int = int(sqrt(k2)/this->dk)*9; for (int i=0; i<3; i++) for (int j=0; j<3; j++) { cospec_local[tmp_int+i*3+j] += factor * ( (*(a + 3*cindex+i))[0] * (*(b + 3*cindex+j))[0] + (*(a + 3*cindex+i))[1] * (*(b + 3*cindex+j))[1]); } } ); MPI_Allreduce( (void*)cospec_local, (void*)spec, this->nshells*9, MPI_DOUBLE, MPI_SUM, this->cd->comm); //for (int n=0; n<this->nshells; n++) //{ // spec[n] *= 12.5663706144*pow(this->kshell[n], 2) / this->nshell[n]; // /*is normalization needed? // * spec[n] /= this->normalization_factor*/ //} fftw_free(cospec_local); } template <class rnumber> template<int nvals> void fluid_solver_base<rnumber>::compute_rspace_stats( rnumber *a, double *moments, ptrdiff_t *hist, double max_estimate[], const int nbins) { double *local_moments = fftw_alloc_real(10*nvals); double val_tmp[nvals], binsize[nvals], pow_tmp[nvals]; ptrdiff_t *local_hist = new ptrdiff_t[nbins*nvals]; int bin; for (int i=0; i<nvals; i++) binsize[i] = 2*max_estimate[i] / nbins; std::fill_n(local_hist, nbins*nvals, 0); std::fill_n(local_moments, 10*nvals, 0); if (nvals == 4) local_moments[3] = max_estimate[3]; RLOOP( this, std::fill_n(pow_tmp, nvals, 1.0); if (nvals == 4) val_tmp[3] = 0.0; for (int i=0; i<3; i++) { val_tmp[i] = a[rindex*3+i]; if (nvals == 4) val_tmp[3] += val_tmp[i]*val_tmp[i]; } if (nvals == 4) { val_tmp[3] = sqrt(val_tmp[3]); if (val_tmp[3] < local_moments[0*nvals+3]) local_moments[0*nvals+3] = val_tmp[3]; if (val_tmp[3] > local_moments[9*nvals+3]) local_moments[9*nvals+3] = val_tmp[3]; bin = int(floor(val_tmp[3]*2/binsize[3])); if (bin >= 0 && bin < nbins) local_hist[bin*nvals+3]++; } for (int i=0; i<3; i++) { if (val_tmp[i] < local_moments[0*nvals+i]) local_moments[0*nvals+i] = val_tmp[i]; if (val_tmp[i] > local_moments[9*nvals+i]) local_moments[9*nvals+i] = val_tmp[i]; bin = int(floor((val_tmp[i] + max_estimate[i]) / binsize[i])); if (bin >= 0 && bin < nbins) local_hist[bin*nvals+i]++; } for (int n=1; n<9; n++) for (int i=0; i<nvals; i++) local_moments[n*nvals + i] += (pow_tmp[i] = val_tmp[i]*pow_tmp[i]); ); MPI_Allreduce( (void*)local_moments, (void*)moments, nvals, MPI_DOUBLE, MPI_MIN, this->cd->comm); MPI_Allreduce( (void*)(local_moments + nvals), (void*)(moments+nvals), 8*nvals, MPI_DOUBLE, MPI_SUM, this->cd->comm); MPI_Allreduce( (void*)(local_moments + 9*nvals), (void*)(moments+9*nvals), nvals, MPI_DOUBLE, MPI_MAX, this->cd->comm); MPI_Allreduce( (void*)local_hist, (void*)hist, nbins*nvals, MPI_INT64_T, MPI_SUM, this->cd->comm); for (int n=1; n<9; n++) for (int i=0; i<nvals; i++) moments[n*nvals + i] /= this->normalization_factor; fftw_free(local_moments); delete[] local_hist; } template <class rnumber> void fluid_solver_base<rnumber>::write_spectrum(const char *fname, cnumber *a, const double k2exponent) { double *spec = fftw_alloc_real(this->nshells); this->cospectrum(a, a, spec, k2exponent); if (this->cd->myrank == 0) { FILE *spec_file; char full_name[512]; sprintf(full_name, "%s_%s_spec", this->name, fname); spec_file = fopen(full_name, "ab"); fwrite((void*)&this->iteration, sizeof(int), 1, spec_file); fwrite((void*)spec, sizeof(double), this->nshells, spec_file); fclose(spec_file); } fftw_free(spec); } /*****************************************************************************/ /* macro for specializations to numeric types compatible with FFTW */ #define FLUID_SOLVER_BASE_DEFINITIONS(FFTW, R, MPI_RNUM, MPI_CNUM) \ \ template<> \ fluid_solver_base<R>::fluid_solver_base( \ const char *NAME, \ int nx, \ int ny, \ int nz, \ double DKX, \ double DKY, \ double DKZ, \ int DEALIAS_TYPE, \ unsigned FFTW_PLAN_RIGOR) \ { \ strncpy(this->name, NAME, 256); \ this->name[255] = '\0'; \ this->iteration = 0; \ this->fftw_plan_rigor = FFTW_PLAN_RIGOR; \ \ int ntmp[4]; \ ntmp[0] = nz; \ ntmp[1] = ny; \ ntmp[2] = nx; \ ntmp[3] = 3; \ this->rd = new field_descriptor<R>( \ 4, ntmp, MPI_RNUM, MPI_COMM_WORLD);\ this->normalization_factor = (this->rd->full_size/3); \ ntmp[0] = ny; \ ntmp[1] = nz; \ ntmp[2] = nx/2 + 1; \ ntmp[3] = 3; \ this->cd = new field_descriptor<R>( \ 4, ntmp, MPI_CNUM, this->rd->comm);\ \ this->dkx = DKX; \ this->dky = DKY; \ this->dkz = DKZ; \ this->kx = new double[this->cd->sizes[2]]; \ this->ky = new double[this->cd->subsizes[0]]; \ this->kz = new double[this->cd->sizes[1]]; \ this->dealias_type = DEALIAS_TYPE; \ switch(this->dealias_type) \ { \ /* HL07 smooth filter */ \ case 1: \ this->kMx = this->dkx*(int(this->rd->sizes[2] / 2)-1); \ this->kMy = this->dky*(int(this->rd->sizes[1] / 2)-1); \ this->kMz = this->dkz*(int(this->rd->sizes[0] / 2)-1); \ break; \ default: \ this->kMx = this->dkx*(int(this->rd->sizes[2] / 3)-1); \ this->kMy = this->dky*(int(this->rd->sizes[1] / 3)-1); \ this->kMz = this->dkz*(int(this->rd->sizes[0] / 3)-1); \ } \ int i, ii; \ for (i = 0; i<this->cd->sizes[2]; i++) \ this->kx[i] = i*this->dkx; \ for (i = 0; i<this->cd->subsizes[0]; i++) \ { \ ii = i + this->cd->starts[0]; \ if (ii <= this->rd->sizes[1]/2) \ this->ky[i] = this->dky*ii; \ else \ this->ky[i] = this->dky*(ii - this->rd->sizes[1]); \ } \ for (i = 0; i<this->cd->sizes[1]; i++) \ { \ if (i <= this->rd->sizes[0]/2) \ this->kz[i] = this->dkz*i; \ else \ this->kz[i] = this->dkz*(i - this->rd->sizes[0]); \ } \ this->kM = this->kMx; \ if (this->kM < this->kMy) this->kM = this->kMy; \ if (this->kM < this->kMz) this->kM = this->kMz; \ this->kM2 = this->kM * this->kM; \ this->kMspec = this->kM; \ this->kMspec2 = this->kM2; \ this->dk = this->dkx; \ if (this->dk > this->dky) this->dk = this->dky; \ if (this->dk > this->dkz) this->dk = this->dkz; \ this->dk2 = this->dk*this->dk; \ DEBUG_MSG( \ "kM = %g, kM2 = %g, dk = %g, dk2 = %g\n", \ this->kM, this->kM2, this->dk, this->dk2); \ /* spectra stuff */ \ this->nshells = int(this->kMspec / this->dk) + 2; \ DEBUG_MSG( \ "kMspec = %g, kMspec2 = %g, nshells = %ld\n", \ this->kMspec, this->kMspec2, this->nshells); \ this->kshell = new double[this->nshells]; \ std::fill_n(this->kshell, this->nshells, 0.0); \ this->nshell = new int64_t[this->nshells]; \ std::fill_n(this->nshell, this->nshells, 0); \ double *kshell_local = new double[this->nshells]; \ std::fill_n(kshell_local, this->nshells, 0.0); \ int64_t *nshell_local = new int64_t[this->nshells]; \ std::fill_n(nshell_local, this->nshells, 0.0); \ double knorm; \ CLOOP_K2_NXMODES( \ this, \ if (k2 < this->kM2) \ { \ knorm = sqrt(k2); \ nshell_local[int(knorm/this->dk)] += nxmodes; \ kshell_local[int(knorm/this->dk)] += nxmodes*knorm; \ } \ this->Fourier_filter[int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.)); \ ); \ \ MPI_Allreduce( \ (void*)(nshell_local), \ (void*)(this->nshell), \ this->nshells, \ MPI_INT64_T, MPI_SUM, this->cd->comm); \ MPI_Allreduce( \ (void*)(kshell_local), \ (void*)(this->kshell), \ this->nshells, \ MPI_DOUBLE, MPI_SUM, this->cd->comm); \ for (int n=0; n<this->nshells; n++) \ { \ this->kshell[n] /= this->nshell[n]; \ } \ delete[] nshell_local; \ delete[] kshell_local; \ } \ \ template<> \ fluid_solver_base<R>::~fluid_solver_base() \ { \ delete[] this->kshell; \ delete[] this->nshell; \ \ delete[] this->kx; \ delete[] this->ky; \ delete[] this->kz; \ \ delete this->cd; \ delete this->rd; \ } \ \ template<> \ void fluid_solver_base<R>::low_pass_Fourier(FFTW(complex) *a, const int howmany, const double kmax) \ { \ const double km2 = kmax*kmax; \ const int howmany2 = 2*howmany; \ /*DEBUG_MSG("entered low_pass_Fourier, kmax=%lg km2=%lg howmany2=%d\n", kmax, km2, howmany2);*/ \ CLOOP_K2( \ this, \ /*DEBUG_MSG("kx=%lg ky=%lg kz=%lg k2=%lg\n", \ this->kx[xindex], \ this->ky[yindex], \ this->kz[zindex], \ k2);*/ \ if (k2 >= km2) \ std::fill_n((R*)(a + howmany*cindex), howmany2, 0.0); \ );\ } \ \ template<> \ void fluid_solver_base<R>::dealias(FFTW(complex) *a, const int howmany) \ { \ if (this->dealias_type == 0) \ { \ this->low_pass_Fourier(a, howmany, this->kM); \ return; \ } \ double tval; \ CLOOP_K2( \ this, \ tval = this->Fourier_filter[int(round(k2/this->dk2))]; \ for (int tcounter = 0; tcounter < howmany; tcounter++) \ for (int i=0; i<2; i++) \ a[howmany*cindex+tcounter][i] *= tval; \ ); \ } \ \ template<> \ void fluid_solver_base<R>::force_divfree(FFTW(complex) *a) \ { \ FFTW(complex) tval; \ CLOOP_K2( \ this, \ if (k2 > 0) \ { \ tval[0] = (this->kx[xindex]*((*(a + cindex*3 ))[0]) + \ this->ky[yindex]*((*(a + cindex*3+1))[0]) + \ this->kz[zindex]*((*(a + cindex*3+2))[0]) ) / k2; \ tval[1] = (this->kx[xindex]*((*(a + cindex*3 ))[1]) + \ this->ky[yindex]*((*(a + cindex*3+1))[1]) + \ this->kz[zindex]*((*(a + cindex*3+2))[1]) ) / k2; \ for (int imag_part=0; imag_part<2; imag_part++) \ { \ a[cindex*3 ][imag_part] -= tval[imag_part]*this->kx[xindex]; \ a[cindex*3+1][imag_part] -= tval[imag_part]*this->ky[yindex]; \ a[cindex*3+2][imag_part] -= tval[imag_part]*this->kz[zindex]; \ } \ } \ );\ if (this->cd->myrank == this->cd->rank[0]) \ std::fill_n((R*)(a), 6, 0.0); \ } \ \ template<> \ void fluid_solver_base<R>::compute_vector_gradient(FFTW(complex) *A, FFTW(complex) *cvec) \ { \ ptrdiff_t tindex; \ std::fill_n((R*)A, 3*2*this->cd->local_size, 0.0); \ FFTW(complex) *dx_u, *dy_u, *dz_u; \ dx_u = A; \ dy_u = A + this->cd->local_size; \ dz_u = A + 2*this->cd->local_size; \ CLOOP_K2( \ this, \ if (k2 <= this->kM2) \ { \ tindex = 3*cindex; \ for (int cc=0; cc<3; cc++) \ { \ dx_u[tindex + cc][0] = -this->kx[xindex]*cvec[tindex+cc][1]; \ dx_u[tindex + cc][1] = this->kx[xindex]*cvec[tindex+cc][0]; \ dy_u[tindex + cc][0] = -this->ky[yindex]*cvec[tindex+cc][1]; \ dy_u[tindex + cc][1] = this->ky[yindex]*cvec[tindex+cc][0]; \ dz_u[tindex + cc][0] = -this->kz[zindex]*cvec[tindex+cc][1]; \ dz_u[tindex + cc][1] = this->kz[zindex]*cvec[tindex+cc][0]; \ } \ } \ ); \ } \ \ template<> \ void fluid_solver_base<R>::symmetrize(FFTW(complex) *data, const int howmany) \ { \ ptrdiff_t ii, cc; \ MPI_Status *mpistatus = new MPI_Status; \ if (this->cd->myrank == this->cd->rank[0]) \ { \ for (cc = 0; cc < howmany; cc++) \ data[cc][1] = 0.0; \ for (ii = 1; ii < this->cd->sizes[1]/2; ii++) \ for (cc = 0; cc < howmany; cc++) { \ ( *(data + cc + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]))[0] = \ (*(data + cc + howmany*( ii)*this->cd->sizes[2]))[0]; \ ( *(data + cc + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]))[1] = \ -(*(data + cc + howmany*( ii)*this->cd->sizes[2]))[1]; \ } \ } \ FFTW(complex) *buffer; \ buffer = FFTW(alloc_complex)(howmany*this->cd->sizes[1]); \ ptrdiff_t yy; \ /*ptrdiff_t tindex;*/ \ int ranksrc, rankdst; \ for (yy = 1; yy < this->cd->sizes[0]/2; yy++) { \ ranksrc = this->cd->rank[yy]; \ rankdst = this->cd->rank[this->cd->sizes[0] - yy]; \ if (this->cd->myrank == ranksrc) \ for (ii = 0; ii < this->cd->sizes[1]; ii++) \ for (cc = 0; cc < howmany; cc++) \ for (int imag_comp=0; imag_comp<2; imag_comp++) \ (*(buffer + howmany*ii+cc))[imag_comp] = \ (*(data + howmany*((yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[imag_comp]; \ if (ranksrc != rankdst) \ { \ if (this->cd->myrank == ranksrc) \ MPI_Send((void*)buffer, \ howmany*this->cd->sizes[1], MPI_CNUM, rankdst, yy, \ this->cd->comm); \ if (this->cd->myrank == rankdst) \ MPI_Recv((void*)buffer, \ howmany*this->cd->sizes[1], MPI_CNUM, ranksrc, yy, \ this->cd->comm, mpistatus); \ } \ if (this->cd->myrank == rankdst) \ { \ for (ii = 1; ii < this->cd->sizes[1]; ii++) \ for (cc = 0; cc < howmany; cc++) \ { \ (*(data + howmany*((this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[0] = \ (*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[0]; \ (*(data + howmany*((this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[1] = \ -(*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[1]; \ } \ for (cc = 0; cc < howmany; cc++) \ { \ (*((data + cc + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2])))[0] = (*(buffer + cc))[0]; \ (*((data + cc + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2])))[1] = -(*(buffer + cc))[1]; \ } \ } \ } \ FFTW(free)(buffer); \ delete mpistatus; \ /* put asymmetric data to 0 */\ /*if (this->cd->myrank == this->cd->rank[this->cd->sizes[0]/2]) \ { \ tindex = howmany*(this->cd->sizes[0]/2 - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2]; \ for (ii = 0; ii < this->cd->sizes[1]; ii++) \ { \ std::fill_n((R*)(data + tindex), howmany*2*this->cd->sizes[2], 0.0); \ tindex += howmany*this->cd->sizes[2]; \ } \ } \ tindex = howmany*(); \ std::fill_n((R*)(data + tindex), howmany*2, 0.0);*/ \ } \ \ template<> \ int fluid_solver_base<R>::read_base(const char *fname, R *data) \ { \ char full_name[512]; \ sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); \ return this->rd->read(full_name, (void*)data); \ } \ \ template<> \ int fluid_solver_base<R>::read_base(const char *fname, FFTW(complex) *data) \ { \ char full_name[512]; \ sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); \ return this->cd->read(full_name, (void*)data); \ } \ \ template<> \ int fluid_solver_base<R>::write_base(const char *fname, R *data) \ { \ char full_name[512]; \ sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); \ return this->rd->write(full_name, (void*)data); \ } \ \ template<> \ int fluid_solver_base<R>::write_base(const char *fname, FFTW(complex) *data) \ { \ char full_name[512]; \ sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); \ return this->cd->write(full_name, (void*)data); \ } \ \ /* finally, force generation of code */ \ template class fluid_solver_base<R>; \ /*****************************************************************************/ /*****************************************************************************/ /* now actually use the macro defined above */ FLUID_SOLVER_BASE_DEFINITIONS( FFTW_MANGLE_FLOAT, float, MPI_FLOAT, MPI_COMPLEX) FLUID_SOLVER_BASE_DEFINITIONS( FFTW_MANGLE_DOUBLE, double, MPI_DOUBLE, BFPS_MPICXX_DOUBLE_COMPLEX) /*****************************************************************************/