diff --git a/bfps/cpp/base.hpp b/bfps/cpp/base.hpp index ee2d74d5b751451e9bb34600a0e2b09891a73d1f..6ed24ab7781031f1a750e97183f3ac2b8e76ffc6 100644 --- a/bfps/cpp/base.hpp +++ b/bfps/cpp/base.hpp @@ -42,6 +42,9 @@ inline int MOD(int a, int n) return ((a%n) + n) % n; } +///////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////// + #ifdef OMPI_MPI_H #define BFPS_MPICXX_DOUBLE_COMPLEX MPI_DOUBLE_COMPLEX @@ -52,6 +55,37 @@ inline int MOD(int a, int n) #endif//OMPI_MPI_H +template <class realtype> +class mpi_real_type; + +template <> +class mpi_real_type<float> +{ +public: + static constexpr MPI_Datatype real(){ + return MPI_FLOAT; + } + + static constexpr MPI_Datatype complex(){ + return MPI_COMPLEX; + } +}; + +template <> +class mpi_real_type<double> +{ +public: + static constexpr MPI_Datatype real(){ + return MPI_DOUBLE; + } + + static constexpr MPI_Datatype complex(){ + return BFPS_MPICXX_DOUBLE_COMPLEX; + } +}; + +///////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////// #ifndef NDEBUG diff --git a/bfps/cpp/fftw_interface.hpp b/bfps/cpp/fftw_interface.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ec318b3b4b2f1c2888b4a0c07a3bfd50892026ec --- /dev/null +++ b/bfps/cpp/fftw_interface.hpp @@ -0,0 +1,163 @@ +/********************************************************************** +* * +* Copyright 2015 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ + +#ifndef FFTW_INTERFACE_HPP +#define FFTW_INTERFACE_HPP + +#include <fftw3-mpi.h> + +template <class realtype> +class fftw_interface; + +template <> +class fftw_interface<float> +{ +public: + using real = float; + using complex = fftwf_complex; + using plan = fftwf_plan; + using iodim = fftwf_iodim; + + static complex* alloc_complex(const size_t in_size){ + return fftwf_alloc_complex(in_size); + } + + static real* alloc_real(const size_t in_size){ + return fftwf_alloc_real(in_size); + } + + static void free(void* ptr){ + fftwf_free(ptr); + } + + static void execute(plan in_plan){ + fftwf_execute(in_plan); + } + + static void destroy_plan(plan in_plan){ + fftwf_destroy_plan(in_plan); + } + + template <class ... Params> + static plan mpi_plan_transpose(Params ... params){ + return fftwf_mpi_plan_transpose(params...); + } + + template <class ... Params> + static plan mpi_plan_many_transpose(Params ... params){ + return fftwf_mpi_plan_many_transpose(params...); + } + + template <class ... Params> + static plan plan_guru_r2r(Params ... params){ + return fftwf_plan_guru_r2r(params...); + } + + template <class ... Params> + static plan plan_guru_dft(Params ... params){ + return fftwf_plan_guru_dft(params...); + } + + template <class ... Params> + static plan mpi_plan_many_dft_c2r(Params ... params){ + return fftwf_mpi_plan_many_dft_c2r(params...); + } + + template <class ... Params> + static plan mpi_plan_many_dft_r2c(Params ... params){ + return fftwf_mpi_plan_many_dft_r2c(params...); + } + + template <class ... Params> + static plan mpi_plan_dft_c2r_3d(Params ... params){ + return fftwf_mpi_plan_dft_c2r_3d(params...); + } +}; + +template <> +class fftw_interface<double> +{ +public: + using real = double; + using complex = fftw_complex; + using plan = fftw_plan; + using iodim = fftw_iodim; + + static complex* alloc_complex(const size_t in_size){ + return fftw_alloc_complex(in_size); + } + + static real* alloc_real(const size_t in_size){ + return fftw_alloc_real(in_size); + } + + static void free(void* ptr){ + fftw_free(ptr); + } + + static void execute(plan in_plan){ + fftw_execute(in_plan); + } + + static void destroy_plan(plan in_plan){ + fftw_destroy_plan(in_plan); + } + + template <class ... Params> + static plan mpi_plan_transpose(Params ... params){ + return fftw_mpi_plan_transpose(params...); + } + + template <class ... Params> + static plan mpi_plan_many_transpose(Params ... params){ + return fftw_mpi_plan_many_transpose(params...); + } + + template <class ... Params> + static plan plan_guru_r2r(Params ... params){ + return fftw_plan_guru_r2r(params...); + } + + template <class ... Params> + static plan plan_guru_dft(Params ... params){ + return fftw_plan_guru_dft(params...); + } + + template <class ... Params> + static plan mpi_plan_many_dft_c2r(Params ... params){ + return fftw_mpi_plan_many_dft_c2r(params...); + } + + template <class ... Params> + static plan mpi_plan_many_dft_r2c(Params ... params){ + return fftw_mpi_plan_many_dft_r2c(params...); + } + + template <class ... Params> + static plan mpi_plan_dft_c2r_3d(Params ... params){ + return fftw_mpi_plan_dft_c2r_3d(params...); + } +}; + +#endif // FFTW_INTERFACE_HPP diff --git a/bfps/cpp/fftw_tools.cpp b/bfps/cpp/fftw_tools.cpp index f6eacbf1dfe2dfe31e603e9239c42d4639327d3d..61e03d292f81aed1fa4b2dfcab880fb7105b676e 100644 --- a/bfps/cpp/fftw_tools.cpp +++ b/bfps/cpp/fftw_tools.cpp @@ -27,6 +27,7 @@ #include <iostream> #include "base.hpp" #include "fftw_tools.hpp" +#include "fftw_interface.hpp" #define NDEBUG @@ -51,150 +52,171 @@ int clip_zero_padding( return EXIT_SUCCESS; } +template +int clip_zero_padding<float>( + field_descriptor<float> *f, + float *a, + int howmany); +template +int clip_zero_padding<double>( + field_descriptor<double> *f, + double *a, + int howmany); + + + +template <class rnumber> +int copy_complex_array( + field_descriptor<rnumber> *fi, + rnumber (*ai)[2], +field_descriptor<rnumber> *fo, +rnumber (*ao)[2], +int howmany) +{ + DEBUG_MSG("entered copy_complex_array\n"); + typename fftw_interface<rnumber>::complex *buffer; + buffer = fftw_interface<rnumber>::alloc_complex(fi->slice_size*howmany); + + int min_fast_dim; + min_fast_dim = + (fi->sizes[2] > fo->sizes[2]) ? + fo->sizes[2] : fi->sizes[2]; -#define TOOLS_IMPLEMENTATION(FFTW, R, MPI_RNUM, MPI_CNUM) \ -template <> \ -int copy_complex_array<R>( \ - field_descriptor<R> *fi, \ - R (*ai)[2], \ - field_descriptor<R> *fo, \ - R (*ao)[2], \ - int howmany) \ -{ \ - DEBUG_MSG("entered copy_complex_array\n"); \ - FFTW(complex) *buffer; \ - buffer = FFTW(alloc_complex)(fi->slice_size*howmany); \ - \ - int min_fast_dim; \ - min_fast_dim = \ - (fi->sizes[2] > fo->sizes[2]) ? \ - fo->sizes[2] : fi->sizes[2]; \ - \ /* clean up destination, in case we're padding with zeros - (even if only for one dimension) */ \ - std::fill_n((R*)ao, fo->local_size*2, 0.0); \ - \ - int64_t ii0, ii1; \ - int64_t oi0, oi1; \ - int64_t delta1, delta0; \ - int irank, orank; \ - delta0 = (fo->sizes[0] - fi->sizes[0]); \ - delta1 = (fo->sizes[1] - fi->sizes[1]); \ - for (ii0=0; ii0 < fi->sizes[0]; ii0++) \ - { \ - if (ii0 <= fi->sizes[0]/2) \ - { \ - oi0 = ii0; \ - if (oi0 > fo->sizes[0]/2) \ - continue; \ - } \ - else \ - { \ - oi0 = ii0 + delta0; \ - if ((oi0 < 0) || ((fo->sizes[0] - oi0) >= fo->sizes[0]/2)) \ - continue; \ - } \ - irank = fi->rank[ii0]; \ - orank = fo->rank[oi0]; \ - if ((irank == orank) && \ - (irank == fi->myrank)) \ - { \ - std::copy( \ - (R*)(ai + (ii0 - fi->starts[0] )*fi->slice_size), \ - (R*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size), \ - (R*)buffer); \ - } \ - else \ - { \ - if (fi->myrank == irank) \ - { \ - MPI_Send( \ - (void*)(ai + (ii0-fi->starts[0])*fi->slice_size), \ - fi->slice_size, \ - MPI_CNUM, \ - orank, \ - ii0, \ - fi->comm); \ - } \ - if (fi->myrank == orank) \ - { \ - MPI_Recv( \ - (void*)(buffer), \ - fi->slice_size, \ - MPI_CNUM, \ - irank, \ - ii0, \ - fi->comm, \ - MPI_STATUS_IGNORE); \ - } \ - } \ - if (fi->myrank == orank) \ - { \ - for (ii1 = 0; ii1 < fi->sizes[1]; ii1++) \ - { \ - if (ii1 <= fi->sizes[1]/2) \ - { \ - oi1 = ii1; \ - if (oi1 > fo->sizes[1]/2) \ - continue; \ - } \ - else \ - { \ - oi1 = ii1 + delta1; \ - if ((oi1 < 0) || ((fo->sizes[1] - oi1) >= fo->sizes[1]/2)) \ - continue; \ - } \ - std::copy( \ - (R*)(buffer + (ii1*fi->sizes[2]*howmany)), \ - (R*)(buffer + (ii1*fi->sizes[2] + min_fast_dim)*howmany), \ - (R*)(ao + \ - ((oi0 - fo->starts[0])*fo->sizes[1] + \ - oi1)*fo->sizes[2]*howmany)); \ - } \ - } \ - } \ - fftw_free(buffer); \ - MPI_Barrier(fi->comm); \ - \ - DEBUG_MSG("exiting copy_complex_array\n"); \ - return EXIT_SUCCESS; \ -} \ - \ -template <> \ -int get_descriptors_3D<R>( \ - int n0, int n1, int n2, \ - field_descriptor<R> **fr, \ - field_descriptor<R> **fc) \ -{ \ - int ntmp[3]; \ - ntmp[0] = n0; \ - ntmp[1] = n1; \ - ntmp[2] = n2; \ - *fr = new field_descriptor<R>(3, ntmp, MPI_RNUM, MPI_COMM_WORLD); \ - ntmp[0] = n0; \ - ntmp[1] = n1; \ - ntmp[2] = n2/2+1; \ - *fc = new field_descriptor<R>(3, ntmp, MPI_CNUM, MPI_COMM_WORLD); \ - return EXIT_SUCCESS; \ -} \ - \ -template \ -int clip_zero_padding<R>( \ - field_descriptor<R> *f, \ - R *a, \ - int howmany); \ - - - -TOOLS_IMPLEMENTATION( - FFTW_MANGLE_FLOAT, - float, - MPI_FLOAT, - MPI_COMPLEX) -TOOLS_IMPLEMENTATION( - FFTW_MANGLE_DOUBLE, - double, - MPI_DOUBLE, - BFPS_MPICXX_DOUBLE_COMPLEX) + (even if only for one dimension) */ + std::fill_n((rnumber*)ao, fo->local_size*2, 0.0); + + int64_t ii0, ii1; + int64_t oi0, oi1; + int64_t delta1, delta0; + int irank, orank; + delta0 = (fo->sizes[0] - fi->sizes[0]); + delta1 = (fo->sizes[1] - fi->sizes[1]); + for (ii0=0; ii0 < fi->sizes[0]; ii0++) + { + if (ii0 <= fi->sizes[0]/2) + { + oi0 = ii0; + if (oi0 > fo->sizes[0]/2) + continue; + } + else + { + oi0 = ii0 + delta0; + if ((oi0 < 0) || ((fo->sizes[0] - oi0) >= fo->sizes[0]/2)) + continue; + } + irank = fi->rank[ii0]; + orank = fo->rank[oi0]; + if ((irank == orank) && + (irank == fi->myrank)) + { + std::copy( + (rnumber*)(ai + (ii0 - fi->starts[0] )*fi->slice_size), + (rnumber*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size), + (rnumber*)buffer); + } + else + { + if (fi->myrank == irank) + { + MPI_Send( + (void*)(ai + (ii0-fi->starts[0])*fi->slice_size), + fi->slice_size, + mpi_real_type<rnumber>::complex(), + orank, + ii0, + fi->comm); + } + if (fi->myrank == orank) + { + MPI_Recv( + (void*)(buffer), + fi->slice_size, + mpi_real_type<rnumber>::complex(), + irank, + ii0, + fi->comm, + MPI_STATUS_IGNORE); + } + } + if (fi->myrank == orank) + { + for (ii1 = 0; ii1 < fi->sizes[1]; ii1++) + { + if (ii1 <= fi->sizes[1]/2) + { + oi1 = ii1; + if (oi1 > fo->sizes[1]/2) + continue; + } + else + { + oi1 = ii1 + delta1; + if ((oi1 < 0) || ((fo->sizes[1] - oi1) >= fo->sizes[1]/2)) + continue; + } + std::copy( + (rnumber*)(buffer + (ii1*fi->sizes[2]*howmany)), + (rnumber*)(buffer + (ii1*fi->sizes[2] + min_fast_dim)*howmany), + (rnumber*)(ao + + ((oi0 - fo->starts[0])*fo->sizes[1] + + oi1)*fo->sizes[2]*howmany)); + } + } + } + fftw_interface<rnumber>::free(buffer); + MPI_Barrier(fi->comm); + + DEBUG_MSG("exiting copy_complex_array\n"); + return EXIT_SUCCESS; +} + +template +int copy_complex_array<float>( + field_descriptor<float> *fi, + float (*ai)[2], + field_descriptor<float> *fo, + float (*ao)[2], + int howmany); + +template +int copy_complex_array<double>( + field_descriptor<double> *fi, + double (*ai)[2], + field_descriptor<double> *fo, + double (*ao)[2], + int howmany); + + +template <class rnumber> +int get_descriptors_3D( + int n0, int n1, int n2, + field_descriptor<rnumber> **fr, + field_descriptor<rnumber> **fc) +{ + int ntmp[3]; + ntmp[0] = n0; + ntmp[1] = n1; + ntmp[2] = n2; + *fr = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), MPI_COMM_WORLD); + ntmp[0] = n0; + ntmp[1] = n1; + ntmp[2] = n2/2+1; + *fc = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::complex(), MPI_COMM_WORLD); + return EXIT_SUCCESS; +} + +template +int get_descriptors_3D<float>( + int n0, int n1, int n2, + field_descriptor<float> **fr, + field_descriptor<float> **fc); + +template +int get_descriptors_3D<double>( + int n0, int n1, int n2, + field_descriptor<double> **fr, + field_descriptor<double> **fc); diff --git a/bfps/cpp/field_descriptor.cpp b/bfps/cpp/field_descriptor.cpp index b5025835903a37ea5384cb4102c716f527aabfe5..8e96ba64eb6fb8fd4e4c4adee31dee19c2dd3fd4 100644 --- a/bfps/cpp/field_descriptor.cpp +++ b/bfps/cpp/field_descriptor.cpp @@ -31,476 +31,461 @@ #include <iostream> #include "base.hpp" #include "field_descriptor.hpp" +#include "fftw_interface.hpp" /*****************************************************************************/ /* macro for specializations to numeric types compatible with FFTW */ -#define CLASS_IMPLEMENTATION(FFTW, R, MPI_RNUM, MPI_CNUM) \ - \ -template<> \ -field_descriptor<R>::field_descriptor( \ - int ndims, \ - int *n, \ - MPI_Datatype element_type, \ - MPI_Comm COMM_TO_USE) \ -{ \ - DEBUG_MSG("entered field_descriptor::field_descriptor\n"); \ - this->comm = COMM_TO_USE; \ - MPI_Comm_rank(this->comm, &this->myrank); \ - MPI_Comm_size(this->comm, &this->nprocs); \ - this->ndims = ndims; \ - this->sizes = new int[ndims]; \ - this->subsizes = new int[ndims]; \ - this->starts = new int[ndims]; \ - int tsizes [ndims]; \ - int tsubsizes[ndims]; \ - int tstarts [ndims]; \ - ptrdiff_t *nfftw = new ptrdiff_t[ndims]; \ - ptrdiff_t local_n0, local_0_start; \ - for (int i = 0; i < this->ndims; i++) \ - nfftw[i] = n[i]; \ - this->local_size = fftw_mpi_local_size_many( \ - this->ndims, \ - nfftw, \ - 1, \ - FFTW_MPI_DEFAULT_BLOCK, \ - this->comm, \ - &local_n0, \ - &local_0_start); \ - this->sizes[0] = n[0]; \ - this->subsizes[0] = (int)local_n0; \ - this->starts[0] = (int)local_0_start; \ - DEBUG_MSG_WAIT( \ - this->comm, \ - "first subsizes[0] = %d %d %d\n", \ - this->subsizes[0], \ - tsubsizes[0], \ - (int)local_n0); \ - tsizes[0] = n[0]; \ - tsubsizes[0] = (int)local_n0; \ - tstarts[0] = (int)local_0_start; \ - DEBUG_MSG_WAIT( \ - this->comm, \ - "second subsizes[0] = %d %d %d\n", \ - this->subsizes[0], \ - tsubsizes[0], \ - (int)local_n0); \ - this->mpi_dtype = element_type; \ - this->slice_size = 1; \ - this->full_size = this->sizes[0]; \ - for (int i = 1; i < this->ndims; i++) \ - { \ - this->sizes[i] = n[i]; \ - this->subsizes[i] = n[i]; \ - this->starts[i] = 0; \ - this->slice_size *= this->subsizes[i]; \ - this->full_size *= this->sizes[i]; \ - tsizes[i] = this->sizes[i]; \ - tsubsizes[i] = this->subsizes[i]; \ - tstarts[i] = this->starts[i]; \ - } \ - tsizes[ndims-1] *= sizeof(R); \ - tsubsizes[ndims-1] *= sizeof(R); \ - tstarts[ndims-1] *= sizeof(R); \ - if (this->mpi_dtype == MPI_CNUM) \ - { \ - tsizes[ndims-1] *= 2; \ - tsubsizes[ndims-1] *= 2; \ - tstarts[ndims-1] *= 2; \ - } \ - int local_zero_array[this->nprocs], zero_array[this->nprocs]; \ - for (int i=0; i<this->nprocs; i++) \ - local_zero_array[i] = 0; \ - local_zero_array[this->myrank] = (this->subsizes[0] == 0) ? 1 : 0; \ - MPI_Allreduce( \ - local_zero_array, \ - zero_array, \ - this->nprocs, \ - MPI_INT, \ - MPI_SUM, \ - this->comm); \ - int no_of_excluded_ranks = 0; \ - for (int i = 0; i<this->nprocs; i++) \ - no_of_excluded_ranks += zero_array[i]; \ - DEBUG_MSG_WAIT( \ - this->comm, \ - "subsizes[0] = %d %d\n", \ - this->subsizes[0], \ - tsubsizes[0]); \ - if (no_of_excluded_ranks == 0) \ - { \ - this->io_comm = this->comm; \ - this->io_nprocs = this->nprocs; \ - this->io_myrank = this->myrank; \ - } \ - else \ - { \ - int excluded_rank[no_of_excluded_ranks]; \ - for (int i=0, j=0; i<this->nprocs; i++) \ - if (zero_array[i]) \ - { \ - excluded_rank[j] = i; \ - j++; \ - } \ - MPI_Group tgroup0, tgroup; \ - MPI_Comm_group(this->comm, &tgroup0); \ - MPI_Group_excl(tgroup0, no_of_excluded_ranks, excluded_rank, &tgroup); \ - MPI_Comm_create(this->comm, tgroup, &this->io_comm); \ - MPI_Group_free(&tgroup0); \ - MPI_Group_free(&tgroup); \ - if (this->subsizes[0] > 0) \ - { \ - MPI_Comm_rank(this->io_comm, &this->io_myrank); \ - MPI_Comm_size(this->io_comm, &this->io_nprocs); \ - } \ - else \ - { \ - this->io_myrank = MPI_PROC_NULL; \ - this->io_nprocs = -1; \ - } \ - } \ - DEBUG_MSG_WAIT( \ - this->comm, \ - "inside field_descriptor constructor, about to call " \ - "MPI_Type_create_subarray " \ - "%d %d %d\n", \ - this->sizes[0], \ - this->subsizes[0], \ - this->starts[0]); \ - for (int i=0; i<this->ndims; i++) \ - DEBUG_MSG_WAIT( \ - this->comm, \ - "tsizes " \ - "%d %d %d\n", \ - tsizes[i], \ - tsubsizes[i], \ - tstarts[i]); \ - if (this->subsizes[0] > 0) \ - { \ - DEBUG_MSG("creating subarray\n"); \ - MPI_Type_create_subarray( \ - ndims, \ - tsizes, \ - tsubsizes, \ - tstarts, \ - MPI_ORDER_C, \ - MPI_UNSIGNED_CHAR, \ - &this->mpi_array_dtype); \ - MPI_Type_commit(&this->mpi_array_dtype); \ - } \ - this->rank = new int[this->sizes[0]]; \ - int *local_rank = new int[this->sizes[0]]; \ - std::fill_n(local_rank, this->sizes[0], 0); \ - for (int i = 0; i < this->sizes[0]; i++) \ - if (i >= this->starts[0] && i < this->starts[0] + this->subsizes[0]) \ - local_rank[i] = this->myrank; \ - MPI_Allreduce( \ - local_rank, \ - this->rank, \ - this->sizes[0], \ - MPI_INT, \ - MPI_SUM, \ - this->comm); \ - delete[] local_rank; \ - this->all_start0 = new int[this->nprocs]; \ - int *local_start0 = new int[this->nprocs]; \ - std::fill_n(local_start0, this->nprocs, 0); \ - for (int i = 0; i < this->nprocs; i++) \ - if (this->myrank == i) \ - local_start0[i] = this->starts[0]; \ - MPI_Allreduce( \ - local_start0, \ - this->all_start0, \ - this->nprocs, \ - MPI_INT, \ - MPI_SUM, \ - this->comm); \ - delete[] local_start0; \ - this->all_size0 = new int[this->nprocs]; \ - int *local_size0 = new int[this->nprocs]; \ - std::fill_n(local_size0, this->nprocs, 0); \ - for (int i = 0; i < this->nprocs; i++) \ - if (this->myrank == i) \ - local_size0[i] = this->subsizes[0]; \ - MPI_Allreduce( \ - local_size0, \ - this->all_size0, \ - this->nprocs, \ - MPI_INT, \ - MPI_SUM, \ - this->comm); \ - delete[] local_size0; \ - DEBUG_MSG("exiting field_descriptor constructor\n"); \ -} \ - \ -template <> \ -int field_descriptor<R>::read( \ - const char *fname, \ - void *buffer) \ -{ \ - DEBUG_MSG("entered field_descriptor::read\n"); \ - char representation[] = "native"; \ - if (this->subsizes[0] > 0) \ - { \ - MPI_Info info; \ - MPI_Info_create(&info); \ - MPI_File f; \ - ptrdiff_t read_size = this->local_size*sizeof(R); \ - DEBUG_MSG("read size is %ld\n", read_size); \ - char ffname[200]; \ - if (this->mpi_dtype == MPI_CNUM) \ - read_size *= 2; \ - DEBUG_MSG("read size is %ld\n", read_size); \ - sprintf(ffname, "%s", fname); \ - \ - MPI_File_open( \ - this->io_comm, \ - ffname, \ - MPI_MODE_RDONLY, \ - info, \ - &f); \ - DEBUG_MSG("opened file\n"); \ - MPI_File_set_view( \ - f, \ - 0, \ - MPI_UNSIGNED_CHAR, \ - this->mpi_array_dtype, \ - representation, \ - info); \ - DEBUG_MSG("view is set\n"); \ - MPI_File_read_all( \ - f, \ - buffer, \ - read_size, \ - MPI_UNSIGNED_CHAR, \ - MPI_STATUS_IGNORE); \ - DEBUG_MSG("info is read\n"); \ - MPI_File_close(&f); \ - } \ - DEBUG_MSG("finished with field_descriptor::read\n"); \ - return EXIT_SUCCESS; \ -} \ - \ -template <> \ -int field_descriptor<R>::write( \ - const char *fname, \ - void *buffer) \ -{ \ - char representation[] = "native"; \ - if (this->subsizes[0] > 0) \ - { \ - MPI_Info info; \ - MPI_Info_create(&info); \ - MPI_File f; \ - ptrdiff_t read_size = this->local_size*sizeof(R); \ - char ffname[200]; \ - if (this->mpi_dtype == MPI_CNUM) \ - read_size *= 2; \ - sprintf(ffname, "%s", fname); \ - \ - MPI_File_open( \ - this->io_comm, \ - ffname, \ - MPI_MODE_CREATE | MPI_MODE_WRONLY, \ - info, \ - &f); \ - MPI_File_set_view( \ - f, \ - 0, \ - MPI_UNSIGNED_CHAR, \ - this->mpi_array_dtype, \ - representation, \ - info); \ - MPI_File_write_all( \ - f, \ - buffer, \ - read_size, \ - MPI_UNSIGNED_CHAR, \ - MPI_STATUS_IGNORE); \ - MPI_File_close(&f); \ - } \ - \ - return EXIT_SUCCESS; \ -} \ - \ -template <> \ -int field_descriptor<R>::transpose( \ - R *input, \ - R *output) \ -{ \ - /* IMPORTANT NOTE: \ - for 3D transposition, the input data is messed up */ \ - FFTW(plan) tplan; \ - if (this->ndims == 3) \ - { \ - /* transpose the two local dimensions 1 and 2 */ \ - R *atmp; \ - atmp = FFTW(alloc_real)(this->slice_size); \ - for (int k = 0; k < this->subsizes[0]; k++) \ - { \ - /* put transposed slice in atmp */ \ - for (int j = 0; j < this->sizes[1]; j++) \ - for (int i = 0; i < this->sizes[2]; i++) \ - atmp[i*this->sizes[1] + j] = \ - input[(k*this->sizes[1] + j)*this->sizes[2] + i]; \ - /* copy back transposed slice */ \ - std::copy( \ - atmp, \ - atmp + this->slice_size, \ - input + k*this->slice_size); \ - } \ - FFTW(free)(atmp); \ - } \ - tplan = FFTW(mpi_plan_transpose)( \ - this->sizes[0], this->slice_size, \ - input, output, \ - this->comm, \ - FFTW_ESTIMATE); \ - FFTW(execute)(tplan); \ - FFTW(destroy_plan)(tplan); \ - return EXIT_SUCCESS; \ -} \ - \ -template<> \ -int field_descriptor<R>::transpose( \ - FFTW(complex) *input, \ - FFTW(complex) *output) \ -{ \ - switch (this->ndims) \ - { \ - case 2: \ - /* do a global transpose over the 2 dimensions */ \ - if (output == NULL) \ - { \ - std::cerr << "bad arguments for transpose.\n" << std::endl; \ - return EXIT_FAILURE; \ - } \ - FFTW(plan) tplan; \ - tplan = FFTW(mpi_plan_many_transpose)( \ - this->sizes[0], this->sizes[1], 2, \ - FFTW_MPI_DEFAULT_BLOCK, \ - FFTW_MPI_DEFAULT_BLOCK, \ - (R*)input, (R*)output, \ - this->comm, \ - FFTW_ESTIMATE); \ - FFTW(execute)(tplan); \ - FFTW(destroy_plan)(tplan); \ - break; \ - case 3: \ - /* transpose the two local dimensions 1 and 2 */ \ - FFTW(complex) *atmp; \ - atmp = FFTW(alloc_complex)(this->slice_size); \ - for (int k = 0; k < this->subsizes[0]; k++) \ - { \ - /* put transposed slice in atmp */ \ - for (int j = 0; j < this->sizes[1]; j++) \ - for (int i = 0; i < this->sizes[2]; i++) \ - { \ - atmp[i*this->sizes[1] + j][0] = \ - input[(k*this->sizes[1] + j)*this->sizes[2] + i][0]; \ - atmp[i*this->sizes[1] + j][1] = \ - input[(k*this->sizes[1] + j)*this->sizes[2] + i][1]; \ - } \ - /* copy back transposed slice */ \ - std::copy( \ - (R*)(atmp), \ - (R*)(atmp + this->slice_size), \ - (R*)(input + k*this->slice_size)); \ - } \ - FFTW(free)(atmp); \ - break; \ - default: \ - return EXIT_FAILURE; \ - break; \ - } \ - return EXIT_SUCCESS; \ -} \ - \ -template<> \ -int field_descriptor<R>::interleave( \ - R *a, \ - int dim) \ -{ \ -/* the following is copied from \ - * http://agentzlerich.blogspot.com/2010/01/using-fftw-for-in-place-matrix.html \ - * */ \ - FFTW(iodim) howmany_dims[2]; \ - howmany_dims[0].n = dim; \ - howmany_dims[0].is = this->local_size; \ - howmany_dims[0].os = 1; \ - howmany_dims[1].n = this->local_size; \ - howmany_dims[1].is = 1; \ - howmany_dims[1].os = dim; \ - const int howmany_rank = sizeof(howmany_dims)/sizeof(howmany_dims[0]); \ - \ - FFTW(plan) tmp = FFTW(plan_guru_r2r)( \ - /*rank*/0, \ - /*dims*/NULL, \ - howmany_rank, \ - howmany_dims, \ - a, \ - a, \ - /*kind*/NULL, \ - FFTW_ESTIMATE); \ - FFTW(execute)(tmp); \ - FFTW(destroy_plan)(tmp); \ - return EXIT_SUCCESS; \ -} \ - \ -template<> \ -int field_descriptor<R>::interleave( \ - FFTW(complex) *a, \ - int dim) \ -{ \ - FFTW(iodim) howmany_dims[2]; \ - howmany_dims[0].n = dim; \ - howmany_dims[0].is = this->local_size; \ - howmany_dims[0].os = 1; \ - howmany_dims[1].n = this->local_size; \ - howmany_dims[1].is = 1; \ - howmany_dims[1].os = dim; \ - const int howmany_rank = sizeof(howmany_dims)/sizeof(howmany_dims[0]); \ - \ - FFTW(plan) tmp = FFTW(plan_guru_dft)( \ - /*rank*/0, \ - /*dims*/NULL, \ - howmany_rank, \ - howmany_dims, \ - a, \ - a, \ - +1, \ - FFTW_ESTIMATE); \ - FFTW(execute)(tmp); \ - FFTW(destroy_plan)(tmp); \ - return EXIT_SUCCESS; \ -} \ - \ -template<> \ -field_descriptor<R>* field_descriptor<R>::get_transpose() \ -{ \ - int n[this->ndims]; \ - for (int i=0; i<this->ndims; i++) \ - n[i] = this->sizes[this->ndims - i - 1]; \ - return new field_descriptor<R>(this->ndims, n, this->mpi_dtype, this->comm); \ -} \ -/*****************************************************************************/ +template <class rnumber> +field_descriptor<rnumber>::field_descriptor( + int ndims, + int *n, + MPI_Datatype element_type, + MPI_Comm COMM_TO_USE) +{ + DEBUG_MSG("entered field_descriptor::field_descriptor\n"); + this->comm = COMM_TO_USE; + MPI_Comm_rank(this->comm, &this->myrank); + MPI_Comm_size(this->comm, &this->nprocs); + this->ndims = ndims; + this->sizes = new int[ndims]; + this->subsizes = new int[ndims]; + this->starts = new int[ndims]; + int tsizes [ndims]; + int tsubsizes[ndims]; + int tstarts [ndims]; + ptrdiff_t *nfftw = new ptrdiff_t[ndims]; + ptrdiff_t local_n0, local_0_start; + for (int i = 0; i < this->ndims; i++) + nfftw[i] = n[i]; + this->local_size = fftw_mpi_local_size_many( + this->ndims, + nfftw, + 1, + FFTW_MPI_DEFAULT_BLOCK, + this->comm, + &local_n0, + &local_0_start); + this->sizes[0] = n[0]; + this->subsizes[0] = (int)local_n0; + this->starts[0] = (int)local_0_start; + DEBUG_MSG_WAIT( + this->comm, + "first subsizes[0] = %d %d %d\n", + this->subsizes[0], + tsubsizes[0], + (int)local_n0); + tsizes[0] = n[0]; + tsubsizes[0] = (int)local_n0; + tstarts[0] = (int)local_0_start; + DEBUG_MSG_WAIT( + this->comm, + "second subsizes[0] = %d %d %d\n", + this->subsizes[0], + tsubsizes[0], + (int)local_n0); + this->mpi_dtype = element_type; + this->slice_size = 1; + this->full_size = this->sizes[0]; + for (int i = 1; i < this->ndims; i++) + { + this->sizes[i] = n[i]; + this->subsizes[i] = n[i]; + this->starts[i] = 0; + this->slice_size *= this->subsizes[i]; + this->full_size *= this->sizes[i]; + tsizes[i] = this->sizes[i]; + tsubsizes[i] = this->subsizes[i]; + tstarts[i] = this->starts[i]; + } + tsizes[ndims-1] *= sizeof(rnumber); + tsubsizes[ndims-1] *= sizeof(rnumber); + tstarts[ndims-1] *= sizeof(rnumber); + if (this->mpi_dtype == mpi_real_type<rnumber>::complex()) + { + tsizes[ndims-1] *= 2; + tsubsizes[ndims-1] *= 2; + tstarts[ndims-1] *= 2; + } + int local_zero_array[this->nprocs], zero_array[this->nprocs]; + for (int i=0; i<this->nprocs; i++) + local_zero_array[i] = 0; + local_zero_array[this->myrank] = (this->subsizes[0] == 0) ? 1 : 0; + MPI_Allreduce( + local_zero_array, + zero_array, + this->nprocs, + MPI_INT, + MPI_SUM, + this->comm); + int no_of_excluded_ranks = 0; + for (int i = 0; i<this->nprocs; i++) + no_of_excluded_ranks += zero_array[i]; + DEBUG_MSG_WAIT( + this->comm, + "subsizes[0] = %d %d\n", + this->subsizes[0], + tsubsizes[0]); + if (no_of_excluded_ranks == 0) + { + this->io_comm = this->comm; + this->io_nprocs = this->nprocs; + this->io_myrank = this->myrank; + } + else + { + int excluded_rank[no_of_excluded_ranks]; + for (int i=0, j=0; i<this->nprocs; i++) + if (zero_array[i]) + { + excluded_rank[j] = i; + j++; + } + MPI_Group tgroup0, tgroup; + MPI_Comm_group(this->comm, &tgroup0); + MPI_Group_excl(tgroup0, no_of_excluded_ranks, excluded_rank, &tgroup); + MPI_Comm_create(this->comm, tgroup, &this->io_comm); + MPI_Group_free(&tgroup0); + MPI_Group_free(&tgroup); + if (this->subsizes[0] > 0) + { + MPI_Comm_rank(this->io_comm, &this->io_myrank); + MPI_Comm_size(this->io_comm, &this->io_nprocs); + } + else + { + this->io_myrank = MPI_PROC_NULL; + this->io_nprocs = -1; + } + } + DEBUG_MSG_WAIT( + this->comm, + "inside field_descriptor constructor, about to call " + "MPI_Type_create_subarray " + "%d %d %d\n", + this->sizes[0], + this->subsizes[0], + this->starts[0]); + for (int i=0; i<this->ndims; i++) + DEBUG_MSG_WAIT( + this->comm, + "tsizes " + "%d %d %d\n", + tsizes[i], + tsubsizes[i], + tstarts[i]); + if (this->subsizes[0] > 0) + { + DEBUG_MSG("creating subarray\n"); + MPI_Type_create_subarray( + ndims, + tsizes, + tsubsizes, + tstarts, + MPI_ORDER_C, + MPI_UNSIGNED_CHAR, + &this->mpi_array_dtype); + MPI_Type_commit(&this->mpi_array_dtype); + } + this->rank = new int[this->sizes[0]]; + int *local_rank = new int[this->sizes[0]]; + std::fill_n(local_rank, this->sizes[0], 0); + for (int i = 0; i < this->sizes[0]; i++) + if (i >= this->starts[0] && i < this->starts[0] + this->subsizes[0]) + local_rank[i] = this->myrank; + MPI_Allreduce( + local_rank, + this->rank, + this->sizes[0], + MPI_INT, + MPI_SUM, + this->comm); + delete[] local_rank; + this->all_start0 = new int[this->nprocs]; + int *local_start0 = new int[this->nprocs]; + std::fill_n(local_start0, this->nprocs, 0); + for (int i = 0; i < this->nprocs; i++) + if (this->myrank == i) + local_start0[i] = this->starts[0]; + MPI_Allreduce( + local_start0, + this->all_start0, + this->nprocs, + MPI_INT, + MPI_SUM, + this->comm); + delete[] local_start0; + this->all_size0 = new int[this->nprocs]; + int *local_size0 = new int[this->nprocs]; + std::fill_n(local_size0, this->nprocs, 0); + for (int i = 0; i < this->nprocs; i++) + if (this->myrank == i) + local_size0[i] = this->subsizes[0]; + MPI_Allreduce( + local_size0, + this->all_size0, + this->nprocs, + MPI_INT, + MPI_SUM, + this->comm); + delete[] local_size0; + DEBUG_MSG("exiting field_descriptor constructor\n"); +} +template <class rnumber> +int field_descriptor<rnumber>::read( + const char *fname, + void *buffer) +{ + DEBUG_MSG("entered field_descriptor::read\n"); + char representation[] = "native"; + if (this->subsizes[0] > 0) + { + MPI_Info info; + MPI_Info_create(&info); + MPI_File f; + ptrdiff_t read_size = this->local_size*sizeof(rnumber); + DEBUG_MSG("read size is %ld\n", read_size); + char ffname[200]; + if (this->mpi_dtype == mpi_real_type<rnumber>::complex()) + read_size *= 2; + DEBUG_MSG("read size is %ld\n", read_size); + sprintf(ffname, "%s", fname); + MPI_File_open( + this->io_comm, + ffname, + MPI_MODE_RDONLY, + info, + &f); + DEBUG_MSG("opened file\n"); + MPI_File_set_view( + f, + 0, + MPI_UNSIGNED_CHAR, + this->mpi_array_dtype, + representation, + info); + DEBUG_MSG("view is set\n"); + MPI_File_read_all( + f, + buffer, + read_size, + MPI_UNSIGNED_CHAR, + MPI_STATUS_IGNORE); + DEBUG_MSG("info is read\n"); + MPI_File_close(&f); + } + DEBUG_MSG("finished with field_descriptor::read\n"); + return EXIT_SUCCESS; +} + +template <class rnumber> +int field_descriptor<rnumber>::write( + const char *fname, + void *buffer) +{ + char representation[] = "native"; + if (this->subsizes[0] > 0) + { + MPI_Info info; + MPI_Info_create(&info); + MPI_File f; + ptrdiff_t read_size = this->local_size*sizeof(rnumber); + char ffname[200]; + if (this->mpi_dtype == mpi_real_type<rnumber>::complex()) + read_size *= 2; + sprintf(ffname, "%s", fname); + + MPI_File_open( + this->io_comm, + ffname, + MPI_MODE_CREATE | MPI_MODE_WRONLY, + info, + &f); + MPI_File_set_view( + f, + 0, + MPI_UNSIGNED_CHAR, + this->mpi_array_dtype, + representation, + info); + MPI_File_write_all( + f, + buffer, + read_size, + MPI_UNSIGNED_CHAR, + MPI_STATUS_IGNORE); + MPI_File_close(&f); + } + + return EXIT_SUCCESS; +} + +template <class rnumber> +int field_descriptor<rnumber>::transpose( + rnumber *input, + rnumber *output) +{ + /* IMPORTANT NOTE: + for 3D transposition, the input data is messed up */ + typename fftw_interface<rnumber>::plan tplan; + if (this->ndims == 3) + { + /* transpose the two local dimensions 1 and 2 */ + rnumber *atmp; + atmp = fftw_interface<rnumber>::alloc_real(this->slice_size); + for (int k = 0; k < this->subsizes[0]; k++) + { + /* put transposed slice in atmp */ + for (int j = 0; j < this->sizes[1]; j++) + for (int i = 0; i < this->sizes[2]; i++) + atmp[i*this->sizes[1] + j] = + input[(k*this->sizes[1] + j)*this->sizes[2] + i]; + /* copy back transposed slice */ + std::copy( + atmp, + atmp + this->slice_size, + input + k*this->slice_size); + } + fftw_interface<rnumber>::free(atmp); + } + tplan = fftw_interface<rnumber>::mpi_plan_transpose( + this->sizes[0], this->slice_size, + input, output, + this->comm, + FFTW_ESTIMATE); + fftw_interface<rnumber>::execute(tplan); + fftw_interface<rnumber>::destroy_plan(tplan); + return EXIT_SUCCESS; +} + +template <class rnumber> +int field_descriptor<rnumber>::transpose( + typename fftw_interface<rnumber>::complex *input, + typename fftw_interface<rnumber>::complex *output) +{ + switch (this->ndims) + { + case 2: + /* do a global transpose over the 2 dimensions */ + if (output == NULL) + { + std::cerr << "bad arguments for transpose.\n" << std::endl; + return EXIT_FAILURE; + } + typename fftw_interface<rnumber>::plan tplan; + tplan = fftw_interface<rnumber>::mpi_plan_many_transpose( + this->sizes[0], this->sizes[1], 2, + FFTW_MPI_DEFAULT_BLOCK, + FFTW_MPI_DEFAULT_BLOCK, + (rnumber*)input, (rnumber*)output, + this->comm, + FFTW_ESTIMATE); + fftw_interface<rnumber>::execute(tplan); + fftw_interface<rnumber>::destroy_plan(tplan); + break; + case 3: + /* transpose the two local dimensions 1 and 2 */ + typename fftw_interface<rnumber>::complex *atmp; + atmp = fftw_interface<rnumber>::alloc_complex(this->slice_size); + for (int k = 0; k < this->subsizes[0]; k++) + { + /* put transposed slice in atmp */ + for (int j = 0; j < this->sizes[1]; j++) + for (int i = 0; i < this->sizes[2]; i++) + { + atmp[i*this->sizes[1] + j][0] = + input[(k*this->sizes[1] + j)*this->sizes[2] + i][0]; + atmp[i*this->sizes[1] + j][1] = + input[(k*this->sizes[1] + j)*this->sizes[2] + i][1]; + } + /* copy back transposed slice */ + std::copy( + (rnumber*)(atmp), + (rnumber*)(atmp + this->slice_size), + (rnumber*)(input + k*this->slice_size)); + } + fftw_interface<rnumber>::free(atmp); + break; + default: + return EXIT_FAILURE; + break; + } + return EXIT_SUCCESS; +} + +template <class rnumber> +int field_descriptor<rnumber>::interleave( + rnumber *a, + int dim) +{ + /* the following is copied from + * http://agentzlerich.blogspot.com/2010/01/using-fftw-for-in-place-matrix.html + * */ + typename fftw_interface<rnumber>::iodim howmany_dims[2]; + howmany_dims[0].n = dim; + howmany_dims[0].is = this->local_size; + howmany_dims[0].os = 1; + howmany_dims[1].n = this->local_size; + howmany_dims[1].is = 1; + howmany_dims[1].os = dim; + const int howmany_rank = sizeof(howmany_dims)/sizeof(howmany_dims[0]); + + typename fftw_interface<rnumber>::plan tmp = fftw_interface<rnumber>::plan_guru_r2r( + /*rank*/0, + /*dims*/nullptr, + howmany_rank, + howmany_dims, + a, + a, + /*kind*/nullptr, + FFTW_ESTIMATE); + fftw_interface<rnumber>::execute(tmp); + fftw_interface<rnumber>::destroy_plan(tmp); + return EXIT_SUCCESS; +} + +template <class rnumber> +int field_descriptor<rnumber>::interleave( + typename fftw_interface<rnumber>::complex *a, + int dim) +{ + typename fftw_interface<rnumber>::iodim howmany_dims[2]; + howmany_dims[0].n = dim; + howmany_dims[0].is = this->local_size; + howmany_dims[0].os = 1; + howmany_dims[1].n = this->local_size; + howmany_dims[1].is = 1; + howmany_dims[1].os = dim; + const int howmany_rank = sizeof(howmany_dims)/sizeof(howmany_dims[0]); + + typename fftw_interface<rnumber>::plan tmp = fftw_interface<rnumber>::plan_guru_dft( + /*rank*/0, + /*dims*/nullptr, + howmany_rank, + howmany_dims, + a, + a, + +1, + FFTW_ESTIMATE); + fftw_interface<rnumber>::execute(tmp); + fftw_interface<rnumber>::destroy_plan(tmp); + return EXIT_SUCCESS; +} + +template <class rnumber> +field_descriptor<rnumber>* field_descriptor<rnumber>::get_transpose() +{ + int n[this->ndims]; + for (int i=0; i<this->ndims; i++) + n[i] = this->sizes[this->ndims - i - 1]; + return new field_descriptor<rnumber>(this->ndims, n, this->mpi_dtype, this->comm); +} /*****************************************************************************/ -/* now actually use the macro defined above */ -CLASS_IMPLEMENTATION( - FFTW_MANGLE_FLOAT, - float, - MPI_FLOAT, - MPI_COMPLEX) -CLASS_IMPLEMENTATION( - FFTW_MANGLE_DOUBLE, - double, - MPI_DOUBLE, - BFPS_MPICXX_DOUBLE_COMPLEX) /*****************************************************************************/ @@ -511,23 +496,23 @@ template <class rnumber> field_descriptor<rnumber>::~field_descriptor() { DEBUG_MSG_WAIT( - MPI_COMM_WORLD, - this->io_comm == MPI_COMM_NULL ? "null\n" : "not null\n"); + MPI_COMM_WORLD, + this->io_comm == MPI_COMM_NULL ? "null\n" : "not null\n"); DEBUG_MSG_WAIT( - MPI_COMM_WORLD, - "subsizes[0] = %d \n", this->subsizes[0]); + MPI_COMM_WORLD, + "subsizes[0] = %d \n", this->subsizes[0]); if (this->subsizes[0] > 0) { DEBUG_MSG_WAIT( - this->io_comm, - "deallocating mpi_array_dtype\n"); + this->io_comm, + "deallocating mpi_array_dtype\n"); MPI_Type_free(&this->mpi_array_dtype); } if (this->nprocs != this->io_nprocs && this->io_myrank != MPI_PROC_NULL) { DEBUG_MSG_WAIT( - this->io_comm, - "freeing io_comm\n"); + this->io_comm, + "freeing io_comm\n"); MPI_Comm_free(&this->io_comm); } delete[] this->sizes; diff --git a/bfps/cpp/field_descriptor.hpp b/bfps/cpp/field_descriptor.hpp index bfcf52ed415ddb90bd77a6c6793974aea6a94734..2fb491bca7c130704fc5de5d22c3393cb196eec7 100644 --- a/bfps/cpp/field_descriptor.hpp +++ b/bfps/cpp/field_descriptor.hpp @@ -26,6 +26,7 @@ #include <mpi.h> #include <fftw3-mpi.h> +#include "fftw_interface.hpp" #ifndef FIELD_DESCRIPTOR @@ -85,14 +86,14 @@ class field_descriptor rnumber *input, rnumber *output); int transpose( - cnumber *input, - cnumber *output = NULL); + typename fftw_interface<rnumber>::complex *input, + typename fftw_interface<rnumber>::complex *output = NULL); int interleave( rnumber *input, int dim); int interleave( - cnumber *input, + typename fftw_interface<rnumber>::complex *input, int dim); }; diff --git a/bfps/cpp/fluid_solver.cpp b/bfps/cpp/fluid_solver.cpp index 61d83fe38ab632d39ee76ad2f131fb41dce80e7c..9456cc186caefcaba4fa10af1a027a565a16c2a9 100644 --- a/bfps/cpp/fluid_solver.cpp +++ b/bfps/cpp/fluid_solver.cpp @@ -48,911 +48,936 @@ void fluid_solver<rnumber>::impose_zero_modes() /*****************************************************************************/ /* macro for specializations to numeric types compatible with FFTW */ -#define FLUID_SOLVER_DEFINITIONS(FFTW, R, MPI_RNUM, MPI_CNUM) \ - \ -template<> \ -fluid_solver<R>::fluid_solver( \ - const char *NAME, \ - int nx, \ - int ny, \ - int nz, \ - double DKX, \ - double DKY, \ - double DKZ, \ - int DEALIAS_TYPE, \ - unsigned FFTW_PLAN_RIGOR) : fluid_solver_base<R>( \ - NAME, \ - nx , ny , nz, \ - DKX, DKY, DKZ, \ - DEALIAS_TYPE, \ - FFTW_PLAN_RIGOR) \ -{ \ - this->cvorticity = FFTW(alloc_complex)(this->cd->local_size);\ - this->cvelocity = FFTW(alloc_complex)(this->cd->local_size);\ - this->rvorticity = FFTW(alloc_real)(this->cd->local_size*2);\ - /*this->rvelocity = (R*)(this->cvelocity);*/\ - this->rvelocity = FFTW(alloc_real)(this->cd->local_size*2);\ - \ - this->ru = this->rvelocity;\ - this->cu = this->cvelocity;\ - \ - this->rv[0] = this->rvorticity;\ - this->rv[3] = this->rvorticity;\ - this->cv[0] = this->cvorticity;\ - this->cv[3] = this->cvorticity;\ - \ - this->cv[1] = FFTW(alloc_complex)(this->cd->local_size);\ - this->cv[2] = this->cv[1];\ - this->rv[1] = FFTW(alloc_real)(this->cd->local_size*2);\ - this->rv[2] = this->rv[1];\ - \ - this->c2r_vorticity = new FFTW(plan);\ - this->r2c_vorticity = new FFTW(plan);\ - this->c2r_velocity = new FFTW(plan);\ - this->r2c_velocity = new FFTW(plan);\ - \ - ptrdiff_t sizes[] = {nz, \ - ny, \ - nx};\ - \ - *(FFTW(plan)*)this->c2r_vorticity = FFTW(mpi_plan_many_dft_c2r)( \ - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ - this->cvorticity, this->rvorticity, \ - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); \ - \ - *(FFTW(plan)*)this->r2c_vorticity = FFTW(mpi_plan_many_dft_r2c)( \ - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ - this->rvorticity, this->cvorticity, \ - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); \ - \ - *(FFTW(plan)*)this->c2r_velocity = FFTW(mpi_plan_many_dft_c2r)( \ - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ - this->cvelocity, this->rvelocity, \ - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); \ - \ - *(FFTW(plan)*)this->r2c_velocity = FFTW(mpi_plan_many_dft_r2c)( \ - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ - this->rvelocity, this->cvelocity, \ - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); \ - \ - this->uc2r = this->c2r_velocity;\ - this->ur2c = this->r2c_velocity;\ - this->vc2r[0] = this->c2r_vorticity;\ - this->vr2c[0] = this->r2c_vorticity;\ - \ - this->vc2r[1] = new FFTW(plan);\ - this->vr2c[1] = new FFTW(plan);\ - this->vc2r[2] = new FFTW(plan);\ - this->vr2c[2] = new FFTW(plan);\ - \ - *(FFTW(plan)*)(this->vc2r[1]) = FFTW(mpi_plan_many_dft_c2r)( \ - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ - this->cv[1], this->rv[1], \ - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); \ - \ - *(FFTW(plan)*)this->vc2r[2] = FFTW(mpi_plan_many_dft_c2r)( \ - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ - this->cv[2], this->rv[2], \ - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); \ - \ - *(FFTW(plan)*)this->vr2c[1] = FFTW(mpi_plan_many_dft_r2c)( \ - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ - this->rv[1], this->cv[1], \ - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); \ - \ - *(FFTW(plan)*)this->vr2c[2] = FFTW(mpi_plan_many_dft_r2c)( \ - 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, \ - this->rv[2], this->cv[2], \ - MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); \ - \ - /* ``physical'' parameters etc, initialized here just in case */ \ - \ - this->nu = 0.1; \ - this->fmode = 1; \ - this->famplitude = 1.0; \ - this->fk0 = 0; \ - this->fk1 = 3.0; \ - /* initialization of fields must be done AFTER planning */ \ - std::fill_n((R*)this->cvorticity, this->cd->local_size*2, 0.0); \ - std::fill_n((R*)this->cvelocity, this->cd->local_size*2, 0.0); \ - std::fill_n(this->rvelocity, this->cd->local_size*2, 0.0); \ - std::fill_n(this->rvorticity, this->cd->local_size*2, 0.0); \ - std::fill_n((R*)this->cv[1], this->cd->local_size*2, 0.0); \ - std::fill_n(this->rv[1], this->cd->local_size*2, 0.0); \ - std::fill_n(this->rv[2], this->cd->local_size*2, 0.0); \ -} \ - \ -template<> \ -fluid_solver<R>::~fluid_solver() \ -{ \ - FFTW(destroy_plan)(*(FFTW(plan)*)this->c2r_vorticity);\ - FFTW(destroy_plan)(*(FFTW(plan)*)this->r2c_vorticity);\ - FFTW(destroy_plan)(*(FFTW(plan)*)this->c2r_velocity );\ - FFTW(destroy_plan)(*(FFTW(plan)*)this->r2c_velocity );\ - FFTW(destroy_plan)(*(FFTW(plan)*)this->vc2r[1]);\ - FFTW(destroy_plan)(*(FFTW(plan)*)this->vr2c[1]);\ - FFTW(destroy_plan)(*(FFTW(plan)*)this->vc2r[2]);\ - FFTW(destroy_plan)(*(FFTW(plan)*)this->vr2c[2]);\ - \ - delete (FFTW(plan)*)this->c2r_vorticity;\ - delete (FFTW(plan)*)this->r2c_vorticity;\ - delete (FFTW(plan)*)this->c2r_velocity ;\ - delete (FFTW(plan)*)this->r2c_velocity ;\ - delete (FFTW(plan)*)this->vc2r[1];\ - delete (FFTW(plan)*)this->vr2c[1];\ - delete (FFTW(plan)*)this->vc2r[2];\ - delete (FFTW(plan)*)this->vr2c[2];\ - \ - FFTW(free)(this->cv[1]);\ - FFTW(free)(this->rv[1]);\ - FFTW(free)(this->cvorticity);\ - FFTW(free)(this->rvorticity);\ - FFTW(free)(this->cvelocity);\ - FFTW(free)(this->rvelocity);\ -} \ - \ -template<> \ -void fluid_solver<R>::compute_vorticity() \ -{ \ - ptrdiff_t tindex; \ - CLOOP_K2( \ - this, \ - tindex = 3*cindex; \ - if (k2 <= this->kM2) \ - { \ - this->cvorticity[tindex+0][0] = -(this->ky[yindex]*this->cu[tindex+2][1] - this->kz[zindex]*this->cu[tindex+1][1]); \ - this->cvorticity[tindex+1][0] = -(this->kz[zindex]*this->cu[tindex+0][1] - this->kx[xindex]*this->cu[tindex+2][1]); \ - this->cvorticity[tindex+2][0] = -(this->kx[xindex]*this->cu[tindex+1][1] - this->ky[yindex]*this->cu[tindex+0][1]); \ - this->cvorticity[tindex+0][1] = (this->ky[yindex]*this->cu[tindex+2][0] - this->kz[zindex]*this->cu[tindex+1][0]); \ - this->cvorticity[tindex+1][1] = (this->kz[zindex]*this->cu[tindex+0][0] - this->kx[xindex]*this->cu[tindex+2][0]); \ - this->cvorticity[tindex+2][1] = (this->kx[xindex]*this->cu[tindex+1][0] - this->ky[yindex]*this->cu[tindex+0][0]); \ - } \ - else \ - std::fill_n((R*)(this->cvorticity+tindex), 6, 0.0); \ - ); \ - this->symmetrize(this->cvorticity, 3); \ -} \ - \ -template<> \ -void fluid_solver<R>::compute_velocity(FFTW(complex) *vorticity) \ -{ \ - ptrdiff_t tindex; \ - CLOOP_K2( \ - this, \ - tindex = 3*cindex; \ - if (k2 <= this->kM2 && k2 > 0) \ - { \ - this->cu[tindex+0][0] = -(this->ky[yindex]*vorticity[tindex+2][1] - this->kz[zindex]*vorticity[tindex+1][1]) / k2; \ - this->cu[tindex+1][0] = -(this->kz[zindex]*vorticity[tindex+0][1] - this->kx[xindex]*vorticity[tindex+2][1]) / k2; \ - this->cu[tindex+2][0] = -(this->kx[xindex]*vorticity[tindex+1][1] - this->ky[yindex]*vorticity[tindex+0][1]) / k2; \ - this->cu[tindex+0][1] = (this->ky[yindex]*vorticity[tindex+2][0] - this->kz[zindex]*vorticity[tindex+1][0]) / k2; \ - this->cu[tindex+1][1] = (this->kz[zindex]*vorticity[tindex+0][0] - this->kx[xindex]*vorticity[tindex+2][0]) / k2; \ - this->cu[tindex+2][1] = (this->kx[xindex]*vorticity[tindex+1][0] - this->ky[yindex]*vorticity[tindex+0][0]) / k2; \ - } \ - else \ - std::fill_n((R*)(this->cu+tindex), 6, 0.0); \ - ); \ - /*this->symmetrize(this->cu, 3);*/ \ -} \ - \ -template<> \ -void fluid_solver<R>::ift_velocity() \ -{ \ - FFTW(execute)(*((FFTW(plan)*)this->c2r_velocity )); \ -} \ - \ -template<> \ -void fluid_solver<R>::ift_vorticity() \ -{ \ - std::fill_n(this->rvorticity, this->cd->local_size*2, 0.0); \ - FFTW(execute)(*((FFTW(plan)*)this->c2r_vorticity )); \ -} \ - \ -template<> \ -void fluid_solver<R>::dft_velocity() \ -{ \ - FFTW(execute)(*((FFTW(plan)*)this->r2c_velocity )); \ -} \ - \ -template<> \ -void fluid_solver<R>::dft_vorticity() \ -{ \ - std::fill_n((R*)this->cvorticity, this->cd->local_size*2, 0.0); \ - FFTW(execute)(*((FFTW(plan)*)this->r2c_vorticity )); \ -} \ - \ -template<> \ -void fluid_solver<R>::add_forcing(\ - FFTW(complex) *acc_field, FFTW(complex) *vort_field, R factor) \ -{ \ - if (strcmp(this->forcing_type, "none") == 0) \ - return; \ - if (strcmp(this->forcing_type, "Kolmogorov") == 0) \ - { \ - ptrdiff_t cindex; \ - if (this->cd->myrank == this->cd->rank[this->fmode]) \ - { \ - cindex = ((this->fmode - this->cd->starts[0]) * this->cd->sizes[1])*this->cd->sizes[2]*3; \ - acc_field[cindex+2][0] -= this->famplitude*factor/2; \ - } \ - if (this->cd->myrank == this->cd->rank[this->cd->sizes[0] - this->fmode]) \ - { \ - cindex = ((this->cd->sizes[0] - this->fmode - this->cd->starts[0]) * this->cd->sizes[1])*this->cd->sizes[2]*3; \ - acc_field[cindex+2][0] -= this->famplitude*factor/2; \ - } \ - return; \ - } \ - if (strcmp(this->forcing_type, "linear") == 0) \ - { \ - double knorm; \ - CLOOP( \ - this, \ - knorm = sqrt(this->kx[xindex]*this->kx[xindex] + \ - this->ky[yindex]*this->ky[yindex] + \ - this->kz[zindex]*this->kz[zindex]); \ - if ((this->fk0 <= knorm) && \ - (this->fk1 >= knorm)) \ - for (int c=0; c<3; c++) \ - for (int i=0; i<2; i++) \ - acc_field[cindex*3+c][i] += this->famplitude*vort_field[cindex*3+c][i]*factor; \ - ); \ - return; \ - } \ -} \ - \ -template<> \ -void fluid_solver<R>::omega_nonlin( \ - int src) \ -{ \ - assert(src >= 0 && src < 3); \ - this->compute_velocity(this->cv[src]); \ - /* get fields from Fourier space to real space */ \ - FFTW(execute)(*((FFTW(plan)*)this->c2r_velocity )); \ - FFTW(execute)(*((FFTW(plan)*)this->vc2r[src])); \ - /* compute cross product $u \times \omega$, and normalize */ \ - R tmp[3][2]; \ - ptrdiff_t tindex; \ - RLOOP ( \ - this, \ - tindex = 3*rindex; \ - for (int cc=0; cc<3; cc++) \ - tmp[cc][0] = (this->ru[tindex+(cc+1)%3]*this->rv[src][tindex+(cc+2)%3] - \ - this->ru[tindex+(cc+2)%3]*this->rv[src][tindex+(cc+1)%3]); \ - for (int cc=0; cc<3; cc++) \ - this->ru[(3*rindex)+cc] = tmp[cc][0] / this->normalization_factor; \ - ); \ - /* go back to Fourier space */ \ - this->clean_up_real_space(this->ru, 3); \ - FFTW(execute)(*((FFTW(plan)*)this->r2c_velocity )); \ - this->dealias(this->cu, 3); \ - /* $\imath k \times Fourier(u \times \omega)$ */ \ - CLOOP( \ - this, \ - tindex = 3*cindex; \ - { \ - tmp[0][0] = -(this->ky[yindex]*this->cu[tindex+2][1] - this->kz[zindex]*this->cu[tindex+1][1]); \ - tmp[1][0] = -(this->kz[zindex]*this->cu[tindex+0][1] - this->kx[xindex]*this->cu[tindex+2][1]); \ - tmp[2][0] = -(this->kx[xindex]*this->cu[tindex+1][1] - this->ky[yindex]*this->cu[tindex+0][1]); \ - tmp[0][1] = (this->ky[yindex]*this->cu[tindex+2][0] - this->kz[zindex]*this->cu[tindex+1][0]); \ - tmp[1][1] = (this->kz[zindex]*this->cu[tindex+0][0] - this->kx[xindex]*this->cu[tindex+2][0]); \ - tmp[2][1] = (this->kx[xindex]*this->cu[tindex+1][0] - this->ky[yindex]*this->cu[tindex+0][0]); \ - } \ - for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) \ - this->cu[tindex+cc][i] = tmp[cc][i]; \ - ); \ - this->add_forcing(this->cu, this->cv[src], 1.0); \ - this->force_divfree(this->cu); \ -} \ - \ -template<> \ -void fluid_solver<R>::step(double dt) \ -{ \ - double factor0, factor1; \ - std::fill_n((R*)this->cv[1], this->cd->local_size*2, 0.0); \ - this->omega_nonlin(0); \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2) \ - { \ - factor0 = exp(-this->nu * k2 * dt); \ - for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) \ - this->cv[1][3*cindex+cc][i] = (this->cv[0][3*cindex+cc][i] + \ - dt*this->cu[3*cindex+cc][i])*factor0; \ - } \ - ); \ - \ - this->omega_nonlin(1); \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2) \ - { \ - factor0 = exp(-this->nu * k2 * dt/2); \ - factor1 = exp( this->nu * k2 * dt/2); \ - for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) \ - this->cv[2][3*cindex+cc][i] = (3*this->cv[0][3*cindex+cc][i]*factor0 + \ - (this->cv[1][3*cindex+cc][i] + \ - dt*this->cu[3*cindex+cc][i])*factor1)*0.25; \ - } \ - ); \ - \ - this->omega_nonlin(2); \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2) \ - { \ - factor0 = exp(-this->nu * k2 * dt * 0.5); \ - for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) \ - this->cv[3][3*cindex+cc][i] = (this->cv[0][3*cindex+cc][i]*factor0 + \ - 2*(this->cv[2][3*cindex+cc][i] + \ - dt*this->cu[3*cindex+cc][i]))*factor0/3; \ - } \ - ); \ - \ - this->force_divfree(this->cvorticity); \ - this->symmetrize(this->cvorticity, 3); \ - this->iteration++; \ -} \ - \ -template<> \ -int fluid_solver<R>::read(char field, char representation) \ -{ \ - char fname[512]; \ - int read_result; \ - if (field == 'v') \ - { \ - if (representation == 'c') \ - { \ - this->fill_up_filename("cvorticity", fname); \ - read_result = this->cd->read(fname, (void*)this->cvorticity); \ - if (read_result != EXIT_SUCCESS) \ - return read_result; \ - } \ - if (representation == 'r') \ - { \ - read_result = this->read_base("rvorticity", this->rvorticity); \ - if (read_result != EXIT_SUCCESS) \ - return read_result; \ - else \ - FFTW(execute)(*((FFTW(plan)*)this->r2c_vorticity )); \ - } \ - this->low_pass_Fourier(this->cvorticity, 3, this->kM); \ - this->force_divfree(this->cvorticity); \ - this->symmetrize(this->cvorticity, 3); \ - return EXIT_SUCCESS; \ - } \ - if ((field == 'u') && (representation == 'c')) \ - { \ - read_result = this->read_base("cvelocity", this->cvelocity); \ - this->low_pass_Fourier(this->cvelocity, 3, this->kM); \ - this->force_divfree(this->cvorticity); \ - this->symmetrize(this->cvorticity, 3); \ - return read_result; \ - } \ - if ((field == 'u') && (representation == 'r')) \ - return this->read_base("rvelocity", this->rvelocity); \ - return EXIT_FAILURE; \ -} \ - \ -template<> \ -int fluid_solver<R>::write(char field, char representation) \ -{ \ - char fname[512]; \ - if ((field == 'v') && (representation == 'c')) \ - { \ - this->fill_up_filename("cvorticity", fname); \ - return this->cd->write(fname, (void*)this->cvorticity); \ - } \ - if ((field == 'v') && (representation == 'r')) \ - { \ - FFTW(execute)(*((FFTW(plan)*)this->c2r_vorticity )); \ - clip_zero_padding<R>(this->rd, this->rvorticity, 3); \ - this->fill_up_filename("rvorticity", fname); \ - return this->rd->write(fname, this->rvorticity); \ - } \ - this->compute_velocity(this->cvorticity); \ - if ((field == 'u') && (representation == 'c')) \ - { \ - this->fill_up_filename("cvelocity", fname); \ - return this->cd->write(fname, this->cvelocity); \ - } \ - if ((field == 'u') && (representation == 'r')) \ - { \ - this->ift_velocity(); \ - clip_zero_padding<R>(this->rd, this->rvelocity, 3); \ - this->fill_up_filename("rvelocity", fname); \ - return this->rd->write(fname, this->rvelocity); \ - } \ - return EXIT_FAILURE; \ -} \ - \ -template<> \ -int fluid_solver<R>::write_rTrS2() \ -{ \ - char fname[512]; \ - this->fill_up_filename("rTrS2", fname); \ - FFTW(complex) *ca; \ - R *ra; \ - ca = FFTW(alloc_complex)(this->cd->local_size*3); \ - ra = (R*)(ca); \ - this->compute_velocity(this->cvorticity); \ - this->compute_vector_gradient(ca, this->cvelocity); \ - for (int cc=0; cc<3; cc++) \ - { \ - std::copy( \ - (R*)(ca + cc*this->cd->local_size), \ - (R*)(ca + (cc+1)*this->cd->local_size), \ - (R*)this->cv[1]); \ - FFTW(execute)(*((FFTW(plan)*)this->vc2r[1])); \ - std::copy( \ - this->rv[1], \ - this->rv[1] + this->cd->local_size*2, \ - ra + cc*this->cd->local_size*2); \ - } \ - /* velocity gradient is now stored, in real space, in ra */ \ - R *dx_u, *dy_u, *dz_u; \ - dx_u = ra; \ - dy_u = ra + 2*this->cd->local_size; \ - dz_u = ra + 4*this->cd->local_size; \ - R *trS2 = FFTW(alloc_real)((this->cd->local_size/3)*2); \ - double average_local = 0; \ - RLOOP( \ - this, \ - R AxxAxx; \ - R AyyAyy; \ - R AzzAzz; \ - R Sxy; \ - R Syz; \ - R Szx; \ - ptrdiff_t tindex = 3*rindex; \ - AxxAxx = dx_u[tindex+0]*dx_u[tindex+0]; \ - AyyAyy = dy_u[tindex+1]*dy_u[tindex+1]; \ - AzzAzz = dz_u[tindex+2]*dz_u[tindex+2]; \ - Sxy = dx_u[tindex+1]+dy_u[tindex+0]; \ - Syz = dy_u[tindex+2]+dz_u[tindex+1]; \ - Szx = dz_u[tindex+0]+dx_u[tindex+2]; \ - trS2[rindex] = (AxxAxx + AyyAyy + AzzAzz + \ - (Sxy*Sxy + Syz*Syz + Szx*Szx)/2); \ - average_local += trS2[rindex]; \ - ); \ - double average; \ - MPI_Allreduce( \ - &average_local, \ - &average, \ - 1, \ - MPI_DOUBLE, MPI_SUM, this->cd->comm); \ - DEBUG_MSG("average TrS2 is %g\n", average); \ - FFTW(free)(ca); \ - /* output goes here */ \ - int ntmp[3]; \ - ntmp[0] = this->rd->sizes[0]; \ - ntmp[1] = this->rd->sizes[1]; \ - ntmp[2] = this->rd->sizes[2]; \ - field_descriptor<R> *scalar_descriptor = new field_descriptor<R>(3, ntmp, MPI_RNUM, this->cd->comm); \ - clip_zero_padding<R>(scalar_descriptor, trS2, 1); \ - int return_value = scalar_descriptor->write(fname, trS2); \ - delete scalar_descriptor; \ - FFTW(free)(trS2); \ - return return_value; \ -} \ - \ -template<> \ -int fluid_solver<R>::write_renstrophy() \ -{ \ - char fname[512]; \ - this->fill_up_filename("renstrophy", fname); \ - R *enstrophy = FFTW(alloc_real)((this->cd->local_size/3)*2); \ - this->ift_vorticity(); \ - double average_local = 0; \ - RLOOP( \ - this, \ - ptrdiff_t tindex = 3*rindex; \ - enstrophy[rindex] = ( \ - this->rvorticity[tindex+0]*this->rvorticity[tindex+0] + \ - this->rvorticity[tindex+1]*this->rvorticity[tindex+1] + \ - this->rvorticity[tindex+2]*this->rvorticity[tindex+2] \ - )/2; \ - average_local += enstrophy[rindex]; \ - ); \ - double average; \ - MPI_Allreduce( \ - &average_local, \ - &average, \ - 1, \ - MPI_DOUBLE, MPI_SUM, this->cd->comm); \ - DEBUG_MSG("average enstrophy is %g\n", average); \ - /* output goes here */ \ - int ntmp[3]; \ - ntmp[0] = this->rd->sizes[0]; \ - ntmp[1] = this->rd->sizes[1]; \ - ntmp[2] = this->rd->sizes[2]; \ - field_descriptor<R> *scalar_descriptor = new field_descriptor<R>(3, ntmp, MPI_RNUM, this->cd->comm); \ - clip_zero_padding<R>(scalar_descriptor, enstrophy, 1); \ - int return_value = scalar_descriptor->write(fname, enstrophy); \ - delete scalar_descriptor; \ - FFTW(free)(enstrophy); \ - return return_value; \ -} \ - \ -template<> \ -void fluid_solver<R>::compute_pressure(FFTW(complex) *pressure) \ -{ \ - /* assume velocity is already in real space representation */ \ - ptrdiff_t tindex; \ - \ - /* diagonal terms 11 22 33 */\ - RLOOP ( \ - this, \ - tindex = 3*rindex; \ - for (int cc=0; cc<3; cc++) \ - this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+cc]; \ - ); \ - this->clean_up_real_space(this->rv[1], 3); \ - FFTW(execute)(*((FFTW(plan)*)this->vr2c[1])); \ - this->dealias(this->cv[1], 3); \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2 && k2 > 0) \ - { \ - tindex = 3*cindex; \ - for (int i=0; i<2; i++) \ - { \ - pressure[cindex][i] = -(this->kx[xindex]*this->kx[xindex]*this->cv[1][tindex+0][i] + \ - this->ky[yindex]*this->ky[yindex]*this->cv[1][tindex+1][i] + \ - this->kz[zindex]*this->kz[zindex]*this->cv[1][tindex+2][i]); \ - } \ - } \ - else \ - std::fill_n((R*)(pressure+cindex), 2, 0.0); \ - ); \ - /* off-diagonal terms 12 23 31 */\ - RLOOP ( \ - this, \ - tindex = 3*rindex; \ - for (int cc=0; cc<3; cc++) \ - this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+(cc+1)%3]; \ - ); \ - this->clean_up_real_space(this->rv[1], 3); \ - FFTW(execute)(*((FFTW(plan)*)this->vr2c[1])); \ - this->dealias(this->cv[1], 3); \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2 && k2 > 0) \ - { \ - tindex = 3*cindex; \ - for (int i=0; i<2; i++) \ - { \ - pressure[cindex][i] -= 2*(this->kx[xindex]*this->ky[yindex]*this->cv[1][tindex+0][i] + \ - this->ky[yindex]*this->kz[zindex]*this->cv[1][tindex+1][i] + \ - this->kz[zindex]*this->kx[xindex]*this->cv[1][tindex+2][i]); \ - pressure[cindex][i] /= this->normalization_factor*k2; \ - } \ - } \ - ); \ -} \ - \ -template<> \ -void fluid_solver<R>::compute_gradient_statistics( \ - FFTW(complex) *vec, \ - double *gradu_moments, \ - double *trS2QR_moments, \ - ptrdiff_t *gradu_hist, \ - ptrdiff_t *trS2QR_hist, \ - ptrdiff_t *QR2D_hist, \ - double trS2QR_max_estimates[], \ - double gradu_max_estimates[], \ - int nbins, \ - int QR2D_nbins) \ -{ \ - FFTW(complex) *ca; \ - R *ra; \ - ca = FFTW(alloc_complex)(this->cd->local_size*3); \ - ra = (R*)(ca); \ - this->compute_vector_gradient(ca, vec); \ - for (int cc=0; cc<3; cc++) \ - { \ - std::copy( \ - (R*)(ca + cc*this->cd->local_size), \ - (R*)(ca + (cc+1)*this->cd->local_size), \ - (R*)this->cv[1]); \ - FFTW(execute)(*((FFTW(plan)*)this->vc2r[1])); \ - std::copy( \ - this->rv[1], \ - this->rv[1] + this->cd->local_size*2, \ - ra + cc*this->cd->local_size*2); \ - } \ - /* velocity gradient is now stored, in real space, in ra */ \ - std::fill_n(this->rv[1], 2*this->cd->local_size, 0.0); \ - R *dx_u, *dy_u, *dz_u; \ - dx_u = ra; \ - dy_u = ra + 2*this->cd->local_size; \ - dz_u = ra + 4*this->cd->local_size; \ - double binsize[2]; \ - double tmp_max_estimate[3]; \ - tmp_max_estimate[0] = trS2QR_max_estimates[0]; \ - tmp_max_estimate[1] = trS2QR_max_estimates[1]; \ - tmp_max_estimate[2] = trS2QR_max_estimates[2]; \ - binsize[0] = 2*tmp_max_estimate[2] / QR2D_nbins; \ - binsize[1] = 2*tmp_max_estimate[1] / QR2D_nbins; \ - ptrdiff_t *local_hist = new ptrdiff_t[QR2D_nbins*QR2D_nbins]; \ - std::fill_n(local_hist, QR2D_nbins*QR2D_nbins, 0); \ - RLOOP( \ - this, \ - R AxxAxx; \ - R AyyAyy; \ - R AzzAzz; \ - R AxyAyx; \ - R AyzAzy; \ - R AzxAxz; \ - R Sxy; \ - R Syz; \ - R Szx; \ - ptrdiff_t tindex = 3*rindex; \ - AxxAxx = dx_u[tindex+0]*dx_u[tindex+0]; \ - AyyAyy = dy_u[tindex+1]*dy_u[tindex+1]; \ - AzzAzz = dz_u[tindex+2]*dz_u[tindex+2]; \ - AxyAyx = dx_u[tindex+1]*dy_u[tindex+0]; \ - AyzAzy = dy_u[tindex+2]*dz_u[tindex+1]; \ - AzxAxz = dz_u[tindex+0]*dx_u[tindex+2]; \ - this->rv[1][tindex+1] = - (AxxAxx + AyyAyy + AzzAzz)/2 - AxyAyx - AyzAzy - AzxAxz; \ - this->rv[1][tindex+2] = - (dx_u[tindex+0]*(AxxAxx/3 + AxyAyx + AzxAxz) + \ - dy_u[tindex+1]*(AyyAyy/3 + AxyAyx + AyzAzy) + \ - dz_u[tindex+2]*(AzzAzz/3 + AzxAxz + AyzAzy) + \ - dx_u[tindex+1]*dy_u[tindex+2]*dz_u[tindex+0] + \ - dx_u[tindex+2]*dy_u[tindex+0]*dz_u[tindex+1]); \ - int bin0 = int(floor((this->rv[1][tindex+2] + tmp_max_estimate[2]) / binsize[0])); \ - int bin1 = int(floor((this->rv[1][tindex+1] + tmp_max_estimate[1]) / binsize[1])); \ - if ((bin0 >= 0 && bin0 < QR2D_nbins) && \ - (bin1 >= 0 && bin1 < QR2D_nbins)) \ - local_hist[bin1*QR2D_nbins + bin0]++; \ - Sxy = dx_u[tindex+1]+dy_u[tindex+0]; \ - Syz = dy_u[tindex+2]+dz_u[tindex+1]; \ - Szx = dz_u[tindex+0]+dx_u[tindex+2]; \ - this->rv[1][tindex] = (AxxAxx + AyyAyy + AzzAzz + \ - (Sxy*Sxy + Syz*Syz + Szx*Szx)/2); \ - ); \ - MPI_Allreduce( \ - local_hist, \ - QR2D_hist, \ - QR2D_nbins * QR2D_nbins, \ - MPI_INT64_T, MPI_SUM, this->cd->comm); \ - delete[] local_hist; \ - this->compute_rspace_stats3( \ - this->rv[1], \ - trS2QR_moments, \ - trS2QR_hist, \ - tmp_max_estimate, \ - nbins); \ - double *tmp_moments = new double[10*3]; \ - ptrdiff_t *tmp_hist = new ptrdiff_t[nbins*3]; \ - for (int cc=0; cc<3; cc++) \ - { \ - tmp_max_estimate[0] = gradu_max_estimates[cc*3 + 0]; \ - tmp_max_estimate[1] = gradu_max_estimates[cc*3 + 1]; \ - tmp_max_estimate[2] = gradu_max_estimates[cc*3 + 2]; \ - this->compute_rspace_stats3( \ - dx_u + cc*2*this->cd->local_size, \ - tmp_moments, \ - tmp_hist, \ - tmp_max_estimate, \ - nbins); \ - for (int n = 0; n < 10; n++) \ - for (int i = 0; i < 3 ; i++) \ - { \ - gradu_moments[(n*3 + cc)*3 + i] = tmp_moments[n*3 + i]; \ - } \ - for (int n = 0; n < nbins; n++) \ - for (int i = 0; i < 3; i++) \ - { \ - gradu_hist[(n*3 + cc)*3 + i] = tmp_hist[n*3 + i]; \ - } \ - } \ - delete[] tmp_moments; \ - delete[] tmp_hist; \ - FFTW(free)(ca); \ -} \ - \ -template<> \ -void fluid_solver<R>::compute_Lagrangian_acceleration(R (*acceleration)[2]) \ -{ \ - ptrdiff_t tindex; \ - FFTW(complex) *pressure; \ - pressure = FFTW(alloc_complex)(this->cd->local_size/3); \ - this->compute_velocity(this->cvorticity); \ - this->ift_velocity(); \ - this->compute_pressure(pressure); \ - this->compute_velocity(this->cvorticity); \ - std::fill_n((R*)this->cv[1], 2*this->cd->local_size, 0.0); \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2) \ - { \ - tindex = 3*cindex; \ - for (int cc=0; cc<3; cc++) \ - for (int i=0; i<2; i++) \ - this->cv[1][tindex+cc][i] = - this->nu*k2*this->cu[tindex+cc][i]; \ - if (strcmp(this->forcing_type, "linear") == 0) \ - { \ - double knorm = sqrt(k2); \ - if ((this->fk0 <= knorm) && \ - (this->fk1 >= knorm)) \ - for (int c=0; c<3; c++) \ - for (int i=0; i<2; i++) \ - this->cv[1][tindex+c][i] += this->famplitude*this->cu[tindex+c][i]; \ - } \ - this->cv[1][tindex+0][0] += this->kx[xindex]*pressure[cindex][1]; \ - this->cv[1][tindex+1][0] += this->ky[yindex]*pressure[cindex][1]; \ - this->cv[1][tindex+2][0] += this->kz[zindex]*pressure[cindex][1]; \ - this->cv[1][tindex+0][1] -= this->kx[xindex]*pressure[cindex][0]; \ - this->cv[1][tindex+1][1] -= this->ky[yindex]*pressure[cindex][0]; \ - this->cv[1][tindex+2][1] -= this->kz[zindex]*pressure[cindex][0]; \ - } \ - ); \ - std::copy( \ - (R*)this->cv[1], \ - (R*)(this->cv[1] + this->cd->local_size), \ - (R*)acceleration); \ - FFTW(free)(pressure); \ -} \ - \ -template<> \ -void fluid_solver<R>::compute_Eulerian_acceleration(FFTW(complex) *acceleration) \ -{ \ - std::fill_n((R*)(acceleration), 2*this->cd->local_size, 0.0); \ - ptrdiff_t tindex; \ - this->compute_velocity(this->cvorticity); \ - /* put in linear terms */ \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2) \ - { \ - tindex = 3*cindex; \ - for (int cc=0; cc<3; cc++) \ - for (int i=0; i<2; i++) \ - acceleration[tindex+cc][i] = - this->nu*k2*this->cu[tindex+cc][i]; \ - if (strcmp(this->forcing_type, "linear") == 0) \ - { \ - double knorm = sqrt(k2); \ - if ((this->fk0 <= knorm) && \ - (this->fk1 >= knorm)) \ - { \ - for (int c=0; c<3; c++) \ - for (int i=0; i<2; i++) \ - acceleration[tindex+c][i] += this->famplitude*this->cu[tindex+c][i]; \ - } \ - } \ - } \ - ); \ - this->ift_velocity(); \ - /* compute uu */ \ - /* 11 22 33 */ \ - RLOOP ( \ - this, \ - tindex = 3*rindex; \ - for (int cc=0; cc<3; cc++) \ - this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+cc] / this->normalization_factor; \ - ); \ - this->clean_up_real_space(this->rv[1], 3); \ - FFTW(execute)(*((FFTW(plan)*)this->vr2c[1])); \ - this->dealias(this->cv[1], 3); \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2) \ - { \ - tindex = 3*cindex; \ - acceleration[tindex+0][0] += \ - this->kx[xindex]*this->cv[1][tindex+0][1]; \ - acceleration[tindex+0][1] += \ - -this->kx[xindex]*this->cv[1][tindex+0][0]; \ - acceleration[tindex+1][0] += \ - this->ky[yindex]*this->cv[1][tindex+1][1]; \ - acceleration[tindex+1][1] += \ - -this->ky[yindex]*this->cv[1][tindex+1][0]; \ - acceleration[tindex+2][0] += \ - this->kz[zindex]*this->cv[1][tindex+2][1]; \ - acceleration[tindex+2][1] += \ - -this->kz[zindex]*this->cv[1][tindex+2][0]; \ - } \ - ); \ - /* 12 23 31 */ \ - RLOOP ( \ - this, \ - tindex = 3*rindex; \ - for (int cc=0; cc<3; cc++) \ - this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+(cc+1)%3] / this->normalization_factor; \ - ); \ - this->clean_up_real_space(this->rv[1], 3); \ - FFTW(execute)(*((FFTW(plan)*)this->vr2c[1])); \ - this->dealias(this->cv[1], 3); \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2) \ - { \ - tindex = 3*cindex; \ - acceleration[tindex+0][0] += \ - (this->ky[yindex]*this->cv[1][tindex+0][1] + \ - this->kz[zindex]*this->cv[1][tindex+2][1]); \ - acceleration[tindex+0][1] += \ - - (this->ky[yindex]*this->cv[1][tindex+0][0] + \ - this->kz[zindex]*this->cv[1][tindex+2][0]); \ - acceleration[tindex+1][0] += \ - (this->kz[zindex]*this->cv[1][tindex+1][1] + \ - this->kx[xindex]*this->cv[1][tindex+0][1]); \ - acceleration[tindex+1][1] += \ - - (this->kz[zindex]*this->cv[1][tindex+1][0] + \ - this->kx[xindex]*this->cv[1][tindex+0][0]); \ - acceleration[tindex+2][0] += \ - (this->kx[xindex]*this->cv[1][tindex+2][1] + \ - this->ky[yindex]*this->cv[1][tindex+1][1]); \ - acceleration[tindex+2][1] += \ - - (this->kx[xindex]*this->cv[1][tindex+2][0] + \ - this->ky[yindex]*this->cv[1][tindex+1][0]); \ - } \ - ); \ - if (this->cd->myrank == this->cd->rank[0]) \ - std::fill_n((R*)(acceleration), 6, 0.0); \ - this->force_divfree(acceleration); \ -} \ - \ -template<> \ -void fluid_solver<R>::compute_Lagrangian_acceleration(R *acceleration) \ -{ \ - this->compute_Lagrangian_acceleration((FFTW(complex)*)acceleration); \ - FFTW(execute)(*((FFTW(plan)*)this->vc2r[1])); \ - std::copy( \ - this->rv[1], \ - this->rv[1] + 2*this->cd->local_size, \ - acceleration); \ -} \ - \ -template<> \ -int fluid_solver<R>::write_rpressure() \ -{ \ - char fname[512]; \ - FFTW(complex) *pressure; \ - pressure = FFTW(alloc_complex)(this->cd->local_size/3); \ - this->compute_velocity(this->cvorticity); \ - this->ift_velocity(); \ - this->compute_pressure(pressure); \ - this->fill_up_filename("rpressure", fname); \ - R *rpressure = FFTW(alloc_real)((this->cd->local_size/3)*2); \ - FFTW(plan) c2r; \ - c2r = FFTW(mpi_plan_dft_c2r_3d)( \ - this->rd->sizes[0], this->rd->sizes[1], this->rd->sizes[2], \ - pressure, rpressure, this->cd->comm, \ - this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); \ - FFTW(execute)(c2r); \ - /* output goes here */ \ - int ntmp[3]; \ - ntmp[0] = this->rd->sizes[0]; \ - ntmp[1] = this->rd->sizes[1]; \ - ntmp[2] = this->rd->sizes[2]; \ - field_descriptor<R> *scalar_descriptor = new field_descriptor<R>(3, ntmp, MPI_RNUM, this->cd->comm); \ - clip_zero_padding<R>(scalar_descriptor, rpressure, 1); \ - int return_value = scalar_descriptor->write(fname, rpressure); \ - delete scalar_descriptor; \ - FFTW(destroy_plan)(c2r); \ - FFTW(free)(pressure); \ - FFTW(free)(rpressure); \ - return return_value; \ -} \ +template <class rnumber> +fluid_solver<rnumber>::fluid_solver( + const char *NAME, + int nx, + int ny, + int nz, + double DKX, + double DKY, + double DKZ, + int DEALIAS_TYPE, + unsigned FFTW_PLAN_RIGOR) : fluid_solver_base<rnumber>( + NAME, + nx , ny , nz, + DKX, DKY, DKZ, + DEALIAS_TYPE, + FFTW_PLAN_RIGOR) +{ + this->cvorticity = fftw_interface<rnumber>::alloc_complex(this->cd->local_size); + this->cvelocity = fftw_interface<rnumber>::alloc_complex(this->cd->local_size); + this->rvorticity = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2); + /*this->rvelocity = (rnumber*)(this->cvelocity);*/ + this->rvelocity = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2); -/*****************************************************************************/ + this->ru = this->rvelocity; + this->cu = this->cvelocity; + this->rv[0] = this->rvorticity; + this->rv[3] = this->rvorticity; + this->cv[0] = this->cvorticity; + this->cv[3] = this->cvorticity; + this->cv[1] = fftw_interface<rnumber>::alloc_complex(this->cd->local_size); + this->cv[2] = this->cv[1]; + this->rv[1] = fftw_interface<rnumber>::alloc_real(this->cd->local_size*2); + this->rv[2] = this->rv[1]; + + this->c2r_vorticity = new typename fftw_interface<rnumber>::plan; + this->r2c_vorticity = new typename fftw_interface<rnumber>::plan; + this->c2r_velocity = new typename fftw_interface<rnumber>::plan; + this->r2c_velocity = new typename fftw_interface<rnumber>::plan; + + ptrdiff_t sizes[] = {nz, + ny, + nx}; + + *this->c2r_vorticity = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + this->cvorticity, this->rvorticity, + MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); + + *this->r2c_vorticity = fftw_interface<rnumber>::mpi_plan_many_dft_r2c( + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + this->rvorticity, this->cvorticity, + MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); + + *this->c2r_velocity = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + this->cvelocity, this->rvelocity, + MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); + + *this->r2c_velocity = fftw_interface<rnumber>::mpi_plan_many_dft_r2c( + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + this->rvelocity, this->cvelocity, + MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); + + this->uc2r = this->c2r_velocity; + this->ur2c = this->r2c_velocity; + this->vc2r[0] = this->c2r_vorticity; + this->vr2c[0] = this->r2c_vorticity; + + this->vc2r[1] = new typename fftw_interface<rnumber>::plan; + this->vr2c[1] = new typename fftw_interface<rnumber>::plan; + this->vc2r[2] = new typename fftw_interface<rnumber>::plan; + this->vr2c[2] = new typename fftw_interface<rnumber>::plan; + + *(this->vc2r[1]) = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + this->cv[1], this->rv[1], + MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); + + *this->vc2r[2] = fftw_interface<rnumber>::mpi_plan_many_dft_c2r( + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + this->cv[2], this->rv[2], + MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); + + *this->vr2c[1] = fftw_interface<rnumber>::mpi_plan_many_dft_r2c( + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + this->rv[1], this->cv[1], + MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); + + *this->vr2c[2] = fftw_interface<rnumber>::mpi_plan_many_dft_r2c( + 3, sizes, 3, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, + this->rv[2], this->cv[2], + MPI_COMM_WORLD, this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT); + + /* ``physical'' parameters etc, initialized here just in case */ + + this->nu = 0.1; + this->fmode = 1; + this->famplitude = 1.0; + this->fk0 = 0; + this->fk1 = 3.0; + /* initialization of fields must be done AFTER planning */ + std::fill_n((rnumber*)this->cvorticity, this->cd->local_size*2, 0.0); + std::fill_n((rnumber*)this->cvelocity, this->cd->local_size*2, 0.0); + std::fill_n(this->rvelocity, this->cd->local_size*2, 0.0); + std::fill_n(this->rvorticity, this->cd->local_size*2, 0.0); + std::fill_n((rnumber*)this->cv[1], this->cd->local_size*2, 0.0); + std::fill_n(this->rv[1], this->cd->local_size*2, 0.0); + std::fill_n(this->rv[2], this->cd->local_size*2, 0.0); +} + +template <class rnumber> +fluid_solver<rnumber>::~fluid_solver() +{ + fftw_interface<rnumber>::destroy_plan(*this->c2r_vorticity); + fftw_interface<rnumber>::destroy_plan(*this->r2c_vorticity); + fftw_interface<rnumber>::destroy_plan(*this->c2r_velocity ); + fftw_interface<rnumber>::destroy_plan(*this->r2c_velocity ); + fftw_interface<rnumber>::destroy_plan(*this->vc2r[1]); + fftw_interface<rnumber>::destroy_plan(*this->vr2c[1]); + fftw_interface<rnumber>::destroy_plan(*this->vc2r[2]); + fftw_interface<rnumber>::destroy_plan(*this->vr2c[2]); + + delete this->c2r_vorticity; + delete this->r2c_vorticity; + delete this->c2r_velocity ; + delete this->r2c_velocity ; + delete this->vc2r[1]; + delete this->vr2c[1]; + delete this->vc2r[2]; + delete this->vr2c[2]; + + fftw_interface<rnumber>::free(this->cv[1]); + fftw_interface<rnumber>::free(this->rv[1]); + fftw_interface<rnumber>::free(this->cvorticity); + fftw_interface<rnumber>::free(this->rvorticity); + fftw_interface<rnumber>::free(this->cvelocity); + fftw_interface<rnumber>::free(this->rvelocity); +} + +template <class rnumber> +void fluid_solver<rnumber>::compute_vorticity() +{ + ptrdiff_t tindex; + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ + tindex = 3*cindex; + if (k2 <= this->kM2) + { + this->cvorticity[tindex+0][0] = -(this->ky[yindex]*this->cu[tindex+2][1] - this->kz[zindex]*this->cu[tindex+1][1]); + this->cvorticity[tindex+1][0] = -(this->kz[zindex]*this->cu[tindex+0][1] - this->kx[xindex]*this->cu[tindex+2][1]); + this->cvorticity[tindex+2][0] = -(this->kx[xindex]*this->cu[tindex+1][1] - this->ky[yindex]*this->cu[tindex+0][1]); + this->cvorticity[tindex+0][1] = (this->ky[yindex]*this->cu[tindex+2][0] - this->kz[zindex]*this->cu[tindex+1][0]); + this->cvorticity[tindex+1][1] = (this->kz[zindex]*this->cu[tindex+0][0] - this->kx[xindex]*this->cu[tindex+2][0]); + this->cvorticity[tindex+2][1] = (this->kx[xindex]*this->cu[tindex+1][0] - this->ky[yindex]*this->cu[tindex+0][0]); + } + else + std::fill_n((rnumber*)(this->cvorticity+tindex), 6, 0.0); + } + ); + this->symmetrize(this->cvorticity, 3); +} + +template <class rnumber> +void fluid_solver<rnumber>::compute_velocity(rnumber (*__restrict__ vorticity)[2]) +{ + ptrdiff_t tindex; + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ + tindex = 3*cindex; + if (k2 <= this->kM2 && k2 > 0) + { + this->cu[tindex+0][0] = -(this->ky[yindex]*vorticity[tindex+2][1] - this->kz[zindex]*vorticity[tindex+1][1]) / k2; + this->cu[tindex+1][0] = -(this->kz[zindex]*vorticity[tindex+0][1] - this->kx[xindex]*vorticity[tindex+2][1]) / k2; + this->cu[tindex+2][0] = -(this->kx[xindex]*vorticity[tindex+1][1] - this->ky[yindex]*vorticity[tindex+0][1]) / k2; + this->cu[tindex+0][1] = (this->ky[yindex]*vorticity[tindex+2][0] - this->kz[zindex]*vorticity[tindex+1][0]) / k2; + this->cu[tindex+1][1] = (this->kz[zindex]*vorticity[tindex+0][0] - this->kx[xindex]*vorticity[tindex+2][0]) / k2; + this->cu[tindex+2][1] = (this->kx[xindex]*vorticity[tindex+1][0] - this->ky[yindex]*vorticity[tindex+0][0]) / k2; + } + else + std::fill_n((rnumber*)(this->cu+tindex), 6, 0.0); + } + ); + /*this->symmetrize(this->cu, 3);*/ +} + +template <class rnumber> +void fluid_solver<rnumber>::ift_velocity() +{ + fftw_interface<rnumber>::execute(*(this->c2r_velocity )); +} + +template <class rnumber> +void fluid_solver<rnumber>::ift_vorticity() +{ + std::fill_n(this->rvorticity, this->cd->local_size*2, 0.0); + fftw_interface<rnumber>::execute(*(this->c2r_vorticity )); +} + +template <class rnumber> +void fluid_solver<rnumber>::dft_velocity() +{ + fftw_interface<rnumber>::execute(*(this->r2c_velocity )); +} + +template <class rnumber> +void fluid_solver<rnumber>::dft_vorticity() +{ + std::fill_n((rnumber*)this->cvorticity, this->cd->local_size*2, 0.0); + fftw_interface<rnumber>::execute(*(this->r2c_vorticity )); +} + +template <class rnumber> +void fluid_solver<rnumber>::add_forcing( + rnumber (*__restrict__ acc_field)[2], rnumber (*__restrict__ vort_field)[2], rnumber factor) +{ + if (strcmp(this->forcing_type, "none") == 0) + return; + if (strcmp(this->forcing_type, "Kolmogorov") == 0) + { + ptrdiff_t cindex; + if (this->cd->myrank == this->cd->rank[this->fmode]) + { + cindex = ((this->fmode - this->cd->starts[0]) * this->cd->sizes[1])*this->cd->sizes[2]*3; + acc_field[cindex+2][0] -= this->famplitude*factor/2; + } + if (this->cd->myrank == this->cd->rank[this->cd->sizes[0] - this->fmode]) + { + cindex = ((this->cd->sizes[0] - this->fmode - this->cd->starts[0]) * this->cd->sizes[1])*this->cd->sizes[2]*3; + acc_field[cindex+2][0] -= this->famplitude*factor/2; + } + return; + } + if (strcmp(this->forcing_type, "linear") == 0) + { + double knorm; + CLOOP( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ + knorm = sqrt(this->kx[xindex]*this->kx[xindex] + + this->ky[yindex]*this->ky[yindex] + + this->kz[zindex]*this->kz[zindex]); + if ((this->fk0 <= knorm) && + (this->fk1 >= knorm)) + for (int c=0; c<3; c++) + for (int i=0; i<2; i++) + acc_field[cindex*3+c][i] += this->famplitude*vort_field[cindex*3+c][i]*factor; + } + ); + return; + } +} + +template <class rnumber> +void fluid_solver<rnumber>::omega_nonlin( + int src) +{ + assert(src >= 0 && src < 3); + this->compute_velocity(this->cv[src]); + /* get fields from Fourier space to real space */ + fftw_interface<rnumber>::execute(*(this->c2r_velocity )); + fftw_interface<rnumber>::execute(*(this->vc2r[src])); + /* compute cross product $u \times \omega$, and normalize */ + rnumber tmp[3][2]; + ptrdiff_t tindex; + RLOOP ( + this, + [&](ptrdiff_t rindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ + tindex = 3*rindex; + for (int cc=0; cc<3; cc++) + tmp[cc][0] = (this->ru[tindex+(cc+1)%3]*this->rv[src][tindex+(cc+2)%3] - + this->ru[tindex+(cc+2)%3]*this->rv[src][tindex+(cc+1)%3]); + for (int cc=0; cc<3; cc++) + this->ru[(3*rindex)+cc] = tmp[cc][0] / this->normalization_factor; + } + ); + /* go back to Fourier space */ + this->clean_up_real_space(this->ru, 3); + fftw_interface<rnumber>::execute(*(this->r2c_velocity )); + this->dealias(this->cu, 3); + /* $\imath k \times Fourier(u \times \omega)$ */ + CLOOP( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ + tindex = 3*cindex; + { + tmp[0][0] = -(this->ky[yindex]*this->cu[tindex+2][1] - this->kz[zindex]*this->cu[tindex+1][1]); + tmp[1][0] = -(this->kz[zindex]*this->cu[tindex+0][1] - this->kx[xindex]*this->cu[tindex+2][1]); + tmp[2][0] = -(this->kx[xindex]*this->cu[tindex+1][1] - this->ky[yindex]*this->cu[tindex+0][1]); + tmp[0][1] = (this->ky[yindex]*this->cu[tindex+2][0] - this->kz[zindex]*this->cu[tindex+1][0]); + tmp[1][1] = (this->kz[zindex]*this->cu[tindex+0][0] - this->kx[xindex]*this->cu[tindex+2][0]); + tmp[2][1] = (this->kx[xindex]*this->cu[tindex+1][0] - this->ky[yindex]*this->cu[tindex+0][0]); + } + for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) + this->cu[tindex+cc][i] = tmp[cc][i]; + } + ); + this->add_forcing(this->cu, this->cv[src], 1.0); + this->force_divfree(this->cu); +} + +template <class rnumber> +void fluid_solver<rnumber>::step(double dt) +{ + double factor0, factor1; + std::fill_n((rnumber*)this->cv[1], this->cd->local_size*2, 0.0); + this->omega_nonlin(0); + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ + if (k2 <= this->kM2) + { + factor0 = exp(-this->nu * k2 * dt); + for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) + this->cv[1][3*cindex+cc][i] = (this->cv[0][3*cindex+cc][i] + + dt*this->cu[3*cindex+cc][i])*factor0; + } + } + ); + + this->omega_nonlin(1); + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ + if (k2 <= this->kM2) + { + factor0 = exp(-this->nu * k2 * dt/2); + factor1 = exp( this->nu * k2 * dt/2); + for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) + this->cv[2][3*cindex+cc][i] = (3*this->cv[0][3*cindex+cc][i]*factor0 + + (this->cv[1][3*cindex+cc][i] + + dt*this->cu[3*cindex+cc][i])*factor1)*0.25; + } + } + ); + + this->omega_nonlin(2); + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ + if (k2 <= this->kM2) + { + factor0 = exp(-this->nu * k2 * dt * 0.5); + for (int cc=0; cc<3; cc++) for (int i=0; i<2; i++) + this->cv[3][3*cindex+cc][i] = (this->cv[0][3*cindex+cc][i]*factor0 + + 2*(this->cv[2][3*cindex+cc][i] + + dt*this->cu[3*cindex+cc][i]))*factor0/3; + } + } + ); + + this->force_divfree(this->cvorticity); + this->symmetrize(this->cvorticity, 3); + this->iteration++; +} + +template <class rnumber> +int fluid_solver<rnumber>::read(char field, char representation) +{ + char fname[512]; + int read_result; + if (field == 'v') + { + if (representation == 'c') + { + this->fill_up_filename("cvorticity", fname); + read_result = this->cd->read(fname, (void*)this->cvorticity); + if (read_result != EXIT_SUCCESS) + return read_result; + } + if (representation == 'r') + { + read_result = this->read_base("rvorticity", this->rvorticity); + if (read_result != EXIT_SUCCESS) + return read_result; + else + fftw_interface<rnumber>::execute(*(this->r2c_vorticity )); + } + this->low_pass_Fourier(this->cvorticity, 3, this->kM); + this->force_divfree(this->cvorticity); + this->symmetrize(this->cvorticity, 3); + return EXIT_SUCCESS; + } + if ((field == 'u') && (representation == 'c')) + { + read_result = this->read_base("cvelocity", this->cvelocity); + this->low_pass_Fourier(this->cvelocity, 3, this->kM); + this->force_divfree(this->cvorticity); + this->symmetrize(this->cvorticity, 3); + return read_result; + } + if ((field == 'u') && (representation == 'r')) + return this->read_base("rvelocity", this->rvelocity); + return EXIT_FAILURE; +} + +template <class rnumber> +int fluid_solver<rnumber>::write(char field, char representation) +{ + char fname[512]; + if ((field == 'v') && (representation == 'c')) + { + this->fill_up_filename("cvorticity", fname); + return this->cd->write(fname, (void*)this->cvorticity); + } + if ((field == 'v') && (representation == 'r')) + { + fftw_interface<rnumber>::execute(*(this->c2r_vorticity )); + clip_zero_padding<rnumber>(this->rd, this->rvorticity, 3); + this->fill_up_filename("rvorticity", fname); + return this->rd->write(fname, this->rvorticity); + } + this->compute_velocity(this->cvorticity); + if ((field == 'u') && (representation == 'c')) + { + this->fill_up_filename("cvelocity", fname); + return this->cd->write(fname, this->cvelocity); + } + if ((field == 'u') && (representation == 'r')) + { + this->ift_velocity(); + clip_zero_padding<rnumber>(this->rd, this->rvelocity, 3); + this->fill_up_filename("rvelocity", fname); + return this->rd->write(fname, this->rvelocity); + } + return EXIT_FAILURE; +} + +template <class rnumber> +int fluid_solver<rnumber>::write_rTrS2() +{ + char fname[512]; + this->fill_up_filename("rTrS2", fname); + typename fftw_interface<rnumber>::complex *ca; + rnumber *ra; + ca = fftw_interface<rnumber>::alloc_complex(this->cd->local_size*3); + ra = (rnumber*)(ca); + this->compute_velocity(this->cvorticity); + this->compute_vector_gradient(ca, this->cvelocity); + for (int cc=0; cc<3; cc++) + { + std::copy( + (rnumber*)(ca + cc*this->cd->local_size), + (rnumber*)(ca + (cc+1)*this->cd->local_size), + (rnumber*)this->cv[1]); + fftw_interface<rnumber>::execute(*(this->vc2r[1])); + std::copy( + this->rv[1], + this->rv[1] + this->cd->local_size*2, + ra + cc*this->cd->local_size*2); + } + /* velocity gradient is now stored, in real space, in ra */ + rnumber *dx_u, *dy_u, *dz_u; + dx_u = ra; + dy_u = ra + 2*this->cd->local_size; + dz_u = ra + 4*this->cd->local_size; + rnumber *trS2 = fftw_interface<rnumber>::alloc_real((this->cd->local_size/3)*2); + double average_local = 0; + RLOOP( + this, + [&](ptrdiff_t rindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ + rnumber AxxAxx; + rnumber AyyAyy; + rnumber AzzAzz; + rnumber Sxy; + rnumber Syz; + rnumber Szx; + ptrdiff_t tindex = 3*rindex; + AxxAxx = dx_u[tindex+0]*dx_u[tindex+0]; + AyyAyy = dy_u[tindex+1]*dy_u[tindex+1]; + AzzAzz = dz_u[tindex+2]*dz_u[tindex+2]; + Sxy = dx_u[tindex+1]+dy_u[tindex+0]; + Syz = dy_u[tindex+2]+dz_u[tindex+1]; + Szx = dz_u[tindex+0]+dx_u[tindex+2]; + trS2[rindex] = (AxxAxx + AyyAyy + AzzAzz + + (Sxy*Sxy + Syz*Syz + Szx*Szx)/2); + average_local += trS2[rindex]; + } + ); + double average; + MPI_Allreduce( + &average_local, + &average, + 1, + MPI_DOUBLE, MPI_SUM, this->cd->comm); + DEBUG_MSG("average TrS2 is %g\n", average); + fftw_interface<rnumber>::free(ca); + /* output goes here */ + int ntmp[3]; + ntmp[0] = this->rd->sizes[0]; + ntmp[1] = this->rd->sizes[1]; + ntmp[2] = this->rd->sizes[2]; + field_descriptor<rnumber> *scalar_descriptor = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), this->cd->comm); + clip_zero_padding<rnumber>(scalar_descriptor, trS2, 1); + int return_value = scalar_descriptor->write(fname, trS2); + delete scalar_descriptor; + fftw_interface<rnumber>::free(trS2); + return return_value; +} + +template <class rnumber> +int fluid_solver<rnumber>::write_renstrophy() +{ + char fname[512]; + this->fill_up_filename("renstrophy", fname); + rnumber *enstrophy = fftw_interface<rnumber>::alloc_real((this->cd->local_size/3)*2); + this->ift_vorticity(); + double average_local = 0; + RLOOP( + this, + [&](ptrdiff_t rindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ + ptrdiff_t tindex = 3*rindex; + enstrophy[rindex] = ( + this->rvorticity[tindex+0]*this->rvorticity[tindex+0] + + this->rvorticity[tindex+1]*this->rvorticity[tindex+1] + + this->rvorticity[tindex+2]*this->rvorticity[tindex+2] + )/2; + average_local += enstrophy[rindex]; + } + ); + double average; + MPI_Allreduce( + &average_local, + &average, + 1, + MPI_DOUBLE, MPI_SUM, this->cd->comm); + DEBUG_MSG("average enstrophy is %g\n", average); + /* output goes here */ + int ntmp[3]; + ntmp[0] = this->rd->sizes[0]; + ntmp[1] = this->rd->sizes[1]; + ntmp[2] = this->rd->sizes[2]; + field_descriptor<rnumber> *scalar_descriptor = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), this->cd->comm); + clip_zero_padding<rnumber>(scalar_descriptor, enstrophy, 1); + int return_value = scalar_descriptor->write(fname, enstrophy); + delete scalar_descriptor; + fftw_interface<rnumber>::free(enstrophy); + return return_value; +} + +template <class rnumber> +void fluid_solver<rnumber>::compute_pressure(rnumber (*__restrict__ pressure)[2]) +{ + /* assume velocity is already in real space representation */ + ptrdiff_t tindex; + + /* diagonal terms 11 22 33 */ + RLOOP ( + this, + [&](ptrdiff_t rindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ + tindex = 3*rindex; + for (int cc=0; cc<3; cc++) + this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+cc]; + } + ); + this->clean_up_real_space(this->rv[1], 3); + fftw_interface<rnumber>::execute(*(this->vr2c[1])); + this->dealias(this->cv[1], 3); + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ + if (k2 <= this->kM2 && k2 > 0) + { + tindex = 3*cindex; + for (int i=0; i<2; i++) + { + pressure[cindex][i] = -(this->kx[xindex]*this->kx[xindex]*this->cv[1][tindex+0][i] + + this->ky[yindex]*this->ky[yindex]*this->cv[1][tindex+1][i] + + this->kz[zindex]*this->kz[zindex]*this->cv[1][tindex+2][i]); + } + } + else + std::fill_n((rnumber*)(pressure+cindex), 2, 0.0); + } + ); + /* off-diagonal terms 12 23 31 */ + RLOOP ( + this, + [&](ptrdiff_t rindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ + tindex = 3*rindex; + for (int cc=0; cc<3; cc++) + this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+(cc+1)%3]; + } + ); + this->clean_up_real_space(this->rv[1], 3); + fftw_interface<rnumber>::execute(*(this->vr2c[1])); + this->dealias(this->cv[1], 3); + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ + if (k2 <= this->kM2 && k2 > 0) + { + tindex = 3*cindex; + for (int i=0; i<2; i++) + { + pressure[cindex][i] -= 2*(this->kx[xindex]*this->ky[yindex]*this->cv[1][tindex+0][i] + + this->ky[yindex]*this->kz[zindex]*this->cv[1][tindex+1][i] + + this->kz[zindex]*this->kx[xindex]*this->cv[1][tindex+2][i]); + pressure[cindex][i] /= this->normalization_factor*k2; + } + } + } + ); +} + +template <class rnumber> +void fluid_solver<rnumber>::compute_gradient_statistics( + rnumber (*__restrict__ vec)[2], +double *gradu_moments, +double *trS2QR_moments, +ptrdiff_t *gradu_hist, +ptrdiff_t *trS2QR_hist, +ptrdiff_t *QR2D_hist, +double trS2QR_max_estimates[], +double gradu_max_estimates[], +int nbins, +int QR2D_nbins) +{ + typename fftw_interface<rnumber>::complex *ca; + rnumber *ra; + ca = fftw_interface<rnumber>::alloc_complex(this->cd->local_size*3); + ra = (rnumber*)(ca); + this->compute_vector_gradient(ca, vec); + for (int cc=0; cc<3; cc++) + { + std::copy( + (rnumber*)(ca + cc*this->cd->local_size), + (rnumber*)(ca + (cc+1)*this->cd->local_size), + (rnumber*)this->cv[1]); + fftw_interface<rnumber>::execute(*(this->vc2r[1])); + std::copy( + this->rv[1], + this->rv[1] + this->cd->local_size*2, + ra + cc*this->cd->local_size*2); + } + /* velocity gradient is now stored, in real space, in ra */ + std::fill_n(this->rv[1], 2*this->cd->local_size, 0.0); + rnumber *dx_u, *dy_u, *dz_u; + dx_u = ra; + dy_u = ra + 2*this->cd->local_size; + dz_u = ra + 4*this->cd->local_size; + double binsize[2]; + double tmp_max_estimate[3]; + tmp_max_estimate[0] = trS2QR_max_estimates[0]; + tmp_max_estimate[1] = trS2QR_max_estimates[1]; + tmp_max_estimate[2] = trS2QR_max_estimates[2]; + binsize[0] = 2*tmp_max_estimate[2] / QR2D_nbins; + binsize[1] = 2*tmp_max_estimate[1] / QR2D_nbins; + ptrdiff_t *local_hist = new ptrdiff_t[QR2D_nbins*QR2D_nbins]; + std::fill_n(local_hist, QR2D_nbins*QR2D_nbins, 0); + RLOOP( + this, + [&](ptrdiff_t rindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ + rnumber AxxAxx; + rnumber AyyAyy; + rnumber AzzAzz; + rnumber AxyAyx; + rnumber AyzAzy; + rnumber AzxAxz; + rnumber Sxy; + rnumber Syz; + rnumber Szx; + ptrdiff_t tindex = 3*rindex; + AxxAxx = dx_u[tindex+0]*dx_u[tindex+0]; + AyyAyy = dy_u[tindex+1]*dy_u[tindex+1]; + AzzAzz = dz_u[tindex+2]*dz_u[tindex+2]; + AxyAyx = dx_u[tindex+1]*dy_u[tindex+0]; + AyzAzy = dy_u[tindex+2]*dz_u[tindex+1]; + AzxAxz = dz_u[tindex+0]*dx_u[tindex+2]; + this->rv[1][tindex+1] = - (AxxAxx + AyyAyy + AzzAzz)/2 - AxyAyx - AyzAzy - AzxAxz; + this->rv[1][tindex+2] = - (dx_u[tindex+0]*(AxxAxx/3 + AxyAyx + AzxAxz) + + dy_u[tindex+1]*(AyyAyy/3 + AxyAyx + AyzAzy) + + dz_u[tindex+2]*(AzzAzz/3 + AzxAxz + AyzAzy) + + dx_u[tindex+1]*dy_u[tindex+2]*dz_u[tindex+0] + + dx_u[tindex+2]*dy_u[tindex+0]*dz_u[tindex+1]); + int bin0 = int(floor((this->rv[1][tindex+2] + tmp_max_estimate[2]) / binsize[0])); + int bin1 = int(floor((this->rv[1][tindex+1] + tmp_max_estimate[1]) / binsize[1])); + if ((bin0 >= 0 && bin0 < QR2D_nbins) && + (bin1 >= 0 && bin1 < QR2D_nbins)) + local_hist[bin1*QR2D_nbins + bin0]++; + Sxy = dx_u[tindex+1]+dy_u[tindex+0]; + Syz = dy_u[tindex+2]+dz_u[tindex+1]; + Szx = dz_u[tindex+0]+dx_u[tindex+2]; + this->rv[1][tindex] = (AxxAxx + AyyAyy + AzzAzz + + (Sxy*Sxy + Syz*Syz + Szx*Szx)/2); + } + ); + MPI_Allreduce( + local_hist, + QR2D_hist, + QR2D_nbins * QR2D_nbins, + MPI_INT64_T, MPI_SUM, this->cd->comm); + delete[] local_hist; + this->compute_rspace_stats3( + this->rv[1], + trS2QR_moments, + trS2QR_hist, + tmp_max_estimate, + nbins); + double *tmp_moments = new double[10*3]; + ptrdiff_t *tmp_hist = new ptrdiff_t[nbins*3]; + for (int cc=0; cc<3; cc++) + { + tmp_max_estimate[0] = gradu_max_estimates[cc*3 + 0]; + tmp_max_estimate[1] = gradu_max_estimates[cc*3 + 1]; + tmp_max_estimate[2] = gradu_max_estimates[cc*3 + 2]; + this->compute_rspace_stats3( + dx_u + cc*2*this->cd->local_size, + tmp_moments, + tmp_hist, + tmp_max_estimate, + nbins); + for (int n = 0; n < 10; n++) + for (int i = 0; i < 3 ; i++) + { + gradu_moments[(n*3 + cc)*3 + i] = tmp_moments[n*3 + i]; + } + for (int n = 0; n < nbins; n++) + for (int i = 0; i < 3; i++) + { + gradu_hist[(n*3 + cc)*3 + i] = tmp_hist[n*3 + i]; + } + } + delete[] tmp_moments; + delete[] tmp_hist; + fftw_interface<rnumber>::free(ca); +} + +template <class rnumber> +void fluid_solver<rnumber>::compute_Lagrangian_acceleration(rnumber (*acceleration)[2]) +{ + ptrdiff_t tindex; + typename fftw_interface<rnumber>::complex *pressure; + pressure = fftw_interface<rnumber>::alloc_complex(this->cd->local_size/3); + this->compute_velocity(this->cvorticity); + this->ift_velocity(); + this->compute_pressure(pressure); + this->compute_velocity(this->cvorticity); + std::fill_n((rnumber*)this->cv[1], 2*this->cd->local_size, 0.0); + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ + if (k2 <= this->kM2) + { + tindex = 3*cindex; + for (int cc=0; cc<3; cc++) + for (int i=0; i<2; i++) + this->cv[1][tindex+cc][i] = - this->nu*k2*this->cu[tindex+cc][i]; + if (strcmp(this->forcing_type, "linear") == 0) + { + double knorm = sqrt(k2); + if ((this->fk0 <= knorm) && + (this->fk1 >= knorm)) + for (int c=0; c<3; c++) + for (int i=0; i<2; i++) + this->cv[1][tindex+c][i] += this->famplitude*this->cu[tindex+c][i]; + } + this->cv[1][tindex+0][0] += this->kx[xindex]*pressure[cindex][1]; + this->cv[1][tindex+1][0] += this->ky[yindex]*pressure[cindex][1]; + this->cv[1][tindex+2][0] += this->kz[zindex]*pressure[cindex][1]; + this->cv[1][tindex+0][1] -= this->kx[xindex]*pressure[cindex][0]; + this->cv[1][tindex+1][1] -= this->ky[yindex]*pressure[cindex][0]; + this->cv[1][tindex+2][1] -= this->kz[zindex]*pressure[cindex][0]; + } + } + ); + std::copy( + (rnumber*)this->cv[1], + (rnumber*)(this->cv[1] + this->cd->local_size), + (rnumber*)acceleration); + fftw_interface<rnumber>::free(pressure); +} + +template <class rnumber> +void fluid_solver<rnumber>::compute_Eulerian_acceleration(rnumber (*__restrict__ acceleration)[2]) +{ + std::fill_n((rnumber*)(acceleration), 2*this->cd->local_size, 0.0); + ptrdiff_t tindex; + this->compute_velocity(this->cvorticity); + /* put in linear terms */ + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ + if (k2 <= this->kM2) + { + tindex = 3*cindex; + for (int cc=0; cc<3; cc++) + for (int i=0; i<2; i++) + acceleration[tindex+cc][i] = - this->nu*k2*this->cu[tindex+cc][i]; + if (strcmp(this->forcing_type, "linear") == 0) + { + double knorm = sqrt(k2); + if ((this->fk0 <= knorm) && + (this->fk1 >= knorm)) + { + for (int c=0; c<3; c++) + for (int i=0; i<2; i++) + acceleration[tindex+c][i] += this->famplitude*this->cu[tindex+c][i]; + } + } + } + } + ); + this->ift_velocity(); + /* compute uu */ + /* 11 22 33 */ + RLOOP ( + this, + [&](ptrdiff_t rindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ + tindex = 3*rindex; + for (int cc=0; cc<3; cc++) + this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+cc] / this->normalization_factor; + } + ); + this->clean_up_real_space(this->rv[1], 3); + fftw_interface<rnumber>::execute(*(this->vr2c[1])); + this->dealias(this->cv[1], 3); + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ + if (k2 <= this->kM2) + { + tindex = 3*cindex; + acceleration[tindex+0][0] += + this->kx[xindex]*this->cv[1][tindex+0][1]; + acceleration[tindex+0][1] += + -this->kx[xindex]*this->cv[1][tindex+0][0]; + acceleration[tindex+1][0] += + this->ky[yindex]*this->cv[1][tindex+1][1]; + acceleration[tindex+1][1] += + -this->ky[yindex]*this->cv[1][tindex+1][0]; + acceleration[tindex+2][0] += + this->kz[zindex]*this->cv[1][tindex+2][1]; + acceleration[tindex+2][1] += + -this->kz[zindex]*this->cv[1][tindex+2][0]; + } + } + ); + /* 12 23 31 */ + RLOOP ( + this, + [&](ptrdiff_t rindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ + tindex = 3*rindex; + for (int cc=0; cc<3; cc++) + this->rv[1][tindex+cc] = this->ru[tindex+cc]*this->ru[tindex+(cc+1)%3] / this->normalization_factor; + } + ); + this->clean_up_real_space(this->rv[1], 3); + fftw_interface<rnumber>::execute(*(this->vr2c[1])); + this->dealias(this->cv[1], 3); + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex, double k2){ + if (k2 <= this->kM2) + { + tindex = 3*cindex; + acceleration[tindex+0][0] += + (this->ky[yindex]*this->cv[1][tindex+0][1] + + this->kz[zindex]*this->cv[1][tindex+2][1]); + acceleration[tindex+0][1] += + - (this->ky[yindex]*this->cv[1][tindex+0][0] + + this->kz[zindex]*this->cv[1][tindex+2][0]); + acceleration[tindex+1][0] += + (this->kz[zindex]*this->cv[1][tindex+1][1] + + this->kx[xindex]*this->cv[1][tindex+0][1]); + acceleration[tindex+1][1] += + - (this->kz[zindex]*this->cv[1][tindex+1][0] + + this->kx[xindex]*this->cv[1][tindex+0][0]); + acceleration[tindex+2][0] += + (this->kx[xindex]*this->cv[1][tindex+2][1] + + this->ky[yindex]*this->cv[1][tindex+1][1]); + acceleration[tindex+2][1] += + - (this->kx[xindex]*this->cv[1][tindex+2][0] + + this->ky[yindex]*this->cv[1][tindex+1][0]); + } + } + ); + if (this->cd->myrank == this->cd->rank[0]) + std::fill_n((rnumber*)(acceleration), 6, 0.0); + this->force_divfree(acceleration); +} + +template <class rnumber> +void fluid_solver<rnumber>::compute_Lagrangian_acceleration(rnumber *__restrict__ acceleration) +{ + this->compute_Lagrangian_acceleration((typename fftw_interface<rnumber>::complex*)acceleration); + fftw_interface<rnumber>::execute(*(this->vc2r[1])); + std::copy( + this->rv[1], + this->rv[1] + 2*this->cd->local_size, + acceleration); +} + +template <class rnumber> +int fluid_solver<rnumber>::write_rpressure() +{ + char fname[512]; + typename fftw_interface<rnumber>::complex *pressure; + pressure = fftw_interface<rnumber>::alloc_complex(this->cd->local_size/3); + this->compute_velocity(this->cvorticity); + this->ift_velocity(); + this->compute_pressure(pressure); + this->fill_up_filename("rpressure", fname); + rnumber *rpressure = fftw_interface<rnumber>::alloc_real((this->cd->local_size/3)*2); + typename fftw_interface<rnumber>::plan c2r; + c2r = fftw_interface<rnumber>::mpi_plan_dft_c2r_3d( + this->rd->sizes[0], this->rd->sizes[1], this->rd->sizes[2], + pressure, rpressure, this->cd->comm, + this->fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN); + fftw_interface<rnumber>::execute(c2r); + /* output goes here */ + int ntmp[3]; + ntmp[0] = this->rd->sizes[0]; + ntmp[1] = this->rd->sizes[1]; + ntmp[2] = this->rd->sizes[2]; + field_descriptor<rnumber> *scalar_descriptor = new field_descriptor<rnumber>(3, ntmp, mpi_real_type<rnumber>::real(), this->cd->comm); + clip_zero_padding<rnumber>(scalar_descriptor, rpressure, 1); + int return_value = scalar_descriptor->write(fname, rpressure); + delete scalar_descriptor; + fftw_interface<rnumber>::destroy_plan(c2r); + fftw_interface<rnumber>::free(pressure); + fftw_interface<rnumber>::free(rpressure); + return return_value; +} /*****************************************************************************/ -/* now actually use the macro defined above */ -FLUID_SOLVER_DEFINITIONS( - FFTW_MANGLE_FLOAT, - float, - MPI_FLOAT, - MPI_COMPLEX) -FLUID_SOLVER_DEFINITIONS( - FFTW_MANGLE_DOUBLE, - double, - MPI_DOUBLE, - BFPS_MPICXX_DOUBLE_COMPLEX) -/*****************************************************************************/ + diff --git a/bfps/cpp/fluid_solver.hpp b/bfps/cpp/fluid_solver.hpp index 2b6ec64de12cc133687074c83c71696ffc507509..4cc75cee4385353f64dc9bc9e7d34c6efba9ad48 100644 --- a/bfps/cpp/fluid_solver.hpp +++ b/bfps/cpp/fluid_solver.hpp @@ -55,12 +55,12 @@ class fluid_solver:public fluid_solver_base<rnumber> typename fluid_solver_base<rnumber>::cnumber *cu, *cv[4]; /* plans */ - void *c2r_vorticity; - void *r2c_vorticity; - void *c2r_velocity; - void *r2c_velocity; - void *uc2r, *ur2c; - void *vr2c[3], *vc2r[3]; + typename fftw_interface<rnumber>::plan *c2r_vorticity; + typename fftw_interface<rnumber>::plan *r2c_vorticity; + typename fftw_interface<rnumber>::plan *c2r_velocity; + typename fftw_interface<rnumber>::plan *r2c_velocity; + typename fftw_interface<rnumber>::plan *uc2r, *ur2c; + typename fftw_interface<rnumber>::plan *vr2c[3], *vc2r[3]; /* physical parameters */ double nu; diff --git a/bfps/cpp/fluid_solver_base.cpp b/bfps/cpp/fluid_solver_base.cpp index 2f2aeee9a8ae699b7863c90dcffb550bc905390a..b7ba8680de20f7f1f4a48bdf561ffd35c33f9528 100644 --- a/bfps/cpp/fluid_solver_base.cpp +++ b/bfps/cpp/fluid_solver_base.cpp @@ -69,24 +69,26 @@ void fluid_solver_base<rnumber>::cospectrum(cnumber *a, cnumber *b, double *spec std::fill_n(cospec_local, this->nshells*9, 0); int tmp_int; CLOOP_K2_NXMODES( - this, - if (k2 <= this->kMspec2) - { - tmp_int = int(sqrt(k2)/this->dk)*9; - for (int i=0; i<3; i++) - for (int j=0; j<3; j++) - { - cospec_local[tmp_int+i*3+j] += nxmodes * ( - (*(a + 3*cindex+i))[0] * (*(b + 3*cindex+j))[0] + - (*(a + 3*cindex+i))[1] * (*(b + 3*cindex+j))[1]); - } - } - ); + this, + + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, + ptrdiff_t zindex, double k2, int nxmodes){if (k2 <= this->kMspec2) + { + tmp_int = int(sqrt(k2)/this->dk)*9; + for (int i=0; i<3; i++) + for (int j=0; j<3; j++) + { + cospec_local[tmp_int+i*3+j] += nxmodes * ( + (*(a + 3*cindex+i))[0] * (*(b + 3*cindex+j))[0] + + (*(a + 3*cindex+i))[1] * (*(b + 3*cindex+j))[1]); + } + }} + ); MPI_Allreduce( - (void*)cospec_local, - (void*)spec, - this->nshells*9, - MPI_DOUBLE, MPI_SUM, this->cd->comm); + (void*)cospec_local, + (void*)spec, + this->nshells*9, + MPI_DOUBLE, MPI_SUM, this->cd->comm); fftw_free(cospec_local); } @@ -98,25 +100,27 @@ void fluid_solver_base<rnumber>::cospectrum(cnumber *a, cnumber *b, double *spec double factor = 1; int tmp_int; CLOOP_K2_NXMODES( - this, - if (k2 <= this->kMspec2) - { - factor = nxmodes*pow(k2, k2exponent); - tmp_int = int(sqrt(k2)/this->dk)*9; - for (int i=0; i<3; i++) - for (int j=0; j<3; j++) - { - cospec_local[tmp_int+i*3+j] += factor * ( - (*(a + 3*cindex+i))[0] * (*(b + 3*cindex+j))[0] + - (*(a + 3*cindex+i))[1] * (*(b + 3*cindex+j))[1]); - } - } - ); + this, + + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, + ptrdiff_t zindex, double k2, int nxmodes){if (k2 <= this->kMspec2) + { + factor = nxmodes*pow(k2, k2exponent); + tmp_int = int(sqrt(k2)/this->dk)*9; + for (int i=0; i<3; i++) + for (int j=0; j<3; j++) + { + cospec_local[tmp_int+i*3+j] += factor * ( + (*(a + 3*cindex+i))[0] * (*(b + 3*cindex+j))[0] + + (*(a + 3*cindex+i))[1] * (*(b + 3*cindex+j))[1]); + } + }} + ); MPI_Allreduce( - (void*)cospec_local, - (void*)spec, - this->nshells*9, - MPI_DOUBLE, MPI_SUM, this->cd->comm); + (void*)cospec_local, + (void*)spec, + this->nshells*9, + MPI_DOUBLE, MPI_SUM, this->cd->comm); //for (int n=0; n<this->nshells; n++) //{ // spec[n] *= 12.5663706144*pow(this->kshell[n], 2) / this->nshell[n]; @@ -175,7 +179,8 @@ void fluid_solver_base<rnumber>::compute_rspace_stats( std::fill_n(local_moments, nmoments*nvals, 0); if (nvals == 4) local_moments[3] = max_estimate[3]; RLOOP( - this, + this, + [&](ptrdiff_t rindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ std::fill_n(pow_tmp, nvals, 1.0); if (nvals == 4) val_tmp[3] = 0.0; for (int i=0; i<3; i++) @@ -207,27 +212,28 @@ void fluid_solver_base<rnumber>::compute_rspace_stats( for (int n=1; n < nmoments-1; n++) for (int i=0; i<nvals; i++) local_moments[n*nvals + i] += (pow_tmp[i] = val_tmp[i]*pow_tmp[i]); - ); + } + ); MPI_Allreduce( - (void*)local_moments, - (void*)moments, - nvals, - MPI_DOUBLE, MPI_MIN, this->cd->comm); + (void*)local_moments, + (void*)moments, + nvals, + MPI_DOUBLE, MPI_MIN, this->cd->comm); MPI_Allreduce( - (void*)(local_moments + nvals), - (void*)(moments+nvals), - (nmoments-2)*nvals, - MPI_DOUBLE, MPI_SUM, this->cd->comm); + (void*)(local_moments + nvals), + (void*)(moments+nvals), + (nmoments-2)*nvals, + MPI_DOUBLE, MPI_SUM, this->cd->comm); MPI_Allreduce( - (void*)(local_moments + (nmoments-1)*nvals), - (void*)(moments+(nmoments-1)*nvals), - nvals, - MPI_DOUBLE, MPI_MAX, this->cd->comm); + (void*)(local_moments + (nmoments-1)*nvals), + (void*)(moments+(nmoments-1)*nvals), + nvals, + MPI_DOUBLE, MPI_MAX, this->cd->comm); MPI_Allreduce( - (void*)local_hist, - (void*)hist, - nbins*nvals, - MPI_INT64_T, MPI_SUM, this->cd->comm); + (void*)local_hist, + (void*)hist, + nbins*nvals, + MPI_INT64_T, MPI_SUM, this->cd->comm); for (int n=1; n < nmoments-1; n++) for (int i=0; i<nvals; i++) moments[n*nvals + i] /= this->normalization_factor; @@ -290,7 +296,8 @@ void fluid_solver_base<rnumber>::compute_rspace_stats( std::fill_n(local_moments, 10*nvals, 0); if (nvals == 4) local_moments[3] = max_estimate[3]; RLOOP( - this, + this, + [&](ptrdiff_t rindex, ptrdiff_t xindex, ptrdiff_t yindex, ptrdiff_t zindex){ std::fill_n(pow_tmp, nvals, 1.0); if (nvals == 4) val_tmp[3] = 0.0; for (int i=0; i<3; i++) @@ -322,27 +329,28 @@ void fluid_solver_base<rnumber>::compute_rspace_stats( for (int n=1; n<9; n++) for (int i=0; i<nvals; i++) local_moments[n*nvals + i] += (pow_tmp[i] = val_tmp[i]*pow_tmp[i]); - ); + } + ); MPI_Allreduce( - (void*)local_moments, - (void*)moments, - nvals, - MPI_DOUBLE, MPI_MIN, this->cd->comm); + (void*)local_moments, + (void*)moments, + nvals, + MPI_DOUBLE, MPI_MIN, this->cd->comm); MPI_Allreduce( - (void*)(local_moments + nvals), - (void*)(moments+nvals), - 8*nvals, - MPI_DOUBLE, MPI_SUM, this->cd->comm); + (void*)(local_moments + nvals), + (void*)(moments+nvals), + 8*nvals, + MPI_DOUBLE, MPI_SUM, this->cd->comm); MPI_Allreduce( - (void*)(local_moments + 9*nvals), - (void*)(moments+9*nvals), - nvals, - MPI_DOUBLE, MPI_MAX, this->cd->comm); + (void*)(local_moments + 9*nvals), + (void*)(moments+9*nvals), + nvals, + MPI_DOUBLE, MPI_MAX, this->cd->comm); MPI_Allreduce( - (void*)local_hist, - (void*)hist, - nbins*nvals, - MPI_INT64_T, MPI_SUM, this->cd->comm); + (void*)local_hist, + (void*)hist, + nbins*nvals, + MPI_INT64_T, MPI_SUM, this->cd->comm); for (int n=1; n<9; n++) for (int i=0; i<nvals; i++) moments[n*nvals + i] /= this->normalization_factor; @@ -371,362 +379,361 @@ void fluid_solver_base<rnumber>::write_spectrum(const char *fname, cnumber *a, c /*****************************************************************************/ /* macro for specializations to numeric types compatible with FFTW */ -#define FLUID_SOLVER_BASE_DEFINITIONS(FFTW, R, MPI_RNUM, MPI_CNUM) \ - \ -template<> \ -fluid_solver_base<R>::fluid_solver_base( \ - const char *NAME, \ - int nx, \ - int ny, \ - int nz, \ - double DKX, \ - double DKY, \ - double DKZ, \ - int DEALIAS_TYPE, \ - unsigned FFTW_PLAN_RIGOR) \ -{ \ - strncpy(this->name, NAME, 256); \ - this->name[255] = '\0'; \ - this->iteration = 0; \ - this->fftw_plan_rigor = FFTW_PLAN_RIGOR; \ - \ - int ntmp[4]; \ - ntmp[0] = nz; \ - ntmp[1] = ny; \ - ntmp[2] = nx; \ - ntmp[3] = 3; \ - this->rd = new field_descriptor<R>( \ - 4, ntmp, MPI_RNUM, MPI_COMM_WORLD);\ - this->normalization_factor = (this->rd->full_size/3); \ - ntmp[0] = ny; \ - ntmp[1] = nz; \ - ntmp[2] = nx/2 + 1; \ - ntmp[3] = 3; \ - this->cd = new field_descriptor<R>( \ - 4, ntmp, MPI_CNUM, this->rd->comm);\ - \ - this->dkx = DKX; \ - this->dky = DKY; \ - this->dkz = DKZ; \ - this->kx = new double[this->cd->sizes[2]]; \ - this->ky = new double[this->cd->subsizes[0]]; \ - this->kz = new double[this->cd->sizes[1]]; \ - this->dealias_type = DEALIAS_TYPE; \ - switch(this->dealias_type) \ - { \ - /* HL07 smooth filter */ \ - case 1: \ - this->kMx = this->dkx*(int(this->rd->sizes[2] / 2)-1); \ - this->kMy = this->dky*(int(this->rd->sizes[1] / 2)-1); \ - this->kMz = this->dkz*(int(this->rd->sizes[0] / 2)-1); \ - break; \ - default: \ - this->kMx = this->dkx*(int(this->rd->sizes[2] / 3)-1); \ - this->kMy = this->dky*(int(this->rd->sizes[1] / 3)-1); \ - this->kMz = this->dkz*(int(this->rd->sizes[0] / 3)-1); \ - } \ - int i, ii; \ - for (i = 0; i<this->cd->sizes[2]; i++) \ - this->kx[i] = i*this->dkx; \ - for (i = 0; i<this->cd->subsizes[0]; i++) \ - { \ - ii = i + this->cd->starts[0]; \ - if (ii <= this->rd->sizes[1]/2) \ - this->ky[i] = this->dky*ii; \ - else \ - this->ky[i] = this->dky*(ii - this->rd->sizes[1]); \ - } \ - for (i = 0; i<this->cd->sizes[1]; i++) \ - { \ - if (i <= this->rd->sizes[0]/2) \ - this->kz[i] = this->dkz*i; \ - else \ - this->kz[i] = this->dkz*(i - this->rd->sizes[0]); \ - } \ - this->kM = this->kMx; \ - if (this->kM < this->kMy) this->kM = this->kMy; \ - if (this->kM < this->kMz) this->kM = this->kMz; \ - this->kM2 = this->kM * this->kM; \ - this->kMspec = this->kM; \ - this->kMspec2 = this->kM2; \ - this->dk = this->dkx; \ - if (this->dk > this->dky) this->dk = this->dky; \ - if (this->dk > this->dkz) this->dk = this->dkz; \ - this->dk2 = this->dk*this->dk; \ - DEBUG_MSG( \ - "kM = %g, kM2 = %g, dk = %g, dk2 = %g\n", \ - this->kM, this->kM2, this->dk, this->dk2); \ - /* spectra stuff */ \ - this->nshells = int(this->kMspec / this->dk) + 2; \ - DEBUG_MSG( \ - "kMspec = %g, kMspec2 = %g, nshells = %ld\n", \ - this->kMspec, this->kMspec2, this->nshells); \ - this->kshell = new double[this->nshells]; \ - std::fill_n(this->kshell, this->nshells, 0.0); \ - this->nshell = new int64_t[this->nshells]; \ - std::fill_n(this->nshell, this->nshells, 0); \ - double *kshell_local = new double[this->nshells]; \ - std::fill_n(kshell_local, this->nshells, 0.0); \ - int64_t *nshell_local = new int64_t[this->nshells]; \ - std::fill_n(nshell_local, this->nshells, 0.0); \ - double knorm; \ - CLOOP_K2_NXMODES( \ - this, \ - if (k2 < this->kM2) \ - { \ - knorm = sqrt(k2); \ - nshell_local[int(knorm/this->dk)] += nxmodes; \ - kshell_local[int(knorm/this->dk)] += nxmodes*knorm; \ - } \ - this->Fourier_filter[int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.)); \ - ); \ - \ - MPI_Allreduce( \ - (void*)(nshell_local), \ - (void*)(this->nshell), \ - this->nshells, \ - MPI_INT64_T, MPI_SUM, this->cd->comm); \ - MPI_Allreduce( \ - (void*)(kshell_local), \ - (void*)(this->kshell), \ - this->nshells, \ - MPI_DOUBLE, MPI_SUM, this->cd->comm); \ - for (unsigned int n=0; n<this->nshells; n++) \ - { \ - this->kshell[n] /= this->nshell[n]; \ - } \ - delete[] nshell_local; \ - delete[] kshell_local; \ -} \ - \ -template<> \ -fluid_solver_base<R>::~fluid_solver_base() \ -{ \ - delete[] this->kshell; \ - delete[] this->nshell; \ - \ - delete[] this->kx; \ - delete[] this->ky; \ - delete[] this->kz; \ - \ - delete this->cd; \ - delete this->rd; \ -} \ - \ -template<> \ -void fluid_solver_base<R>::low_pass_Fourier(FFTW(complex) *a, const int howmany, const double kmax) \ -{ \ - const double km2 = kmax*kmax; \ - const int howmany2 = 2*howmany; \ - /*DEBUG_MSG("entered low_pass_Fourier, kmax=%lg km2=%lg howmany2=%d\n", kmax, km2, howmany2);*/ \ - CLOOP_K2( \ - this, \ - /*DEBUG_MSG("kx=%lg ky=%lg kz=%lg k2=%lg\n", \ - this->kx[xindex], \ - this->ky[yindex], \ - this->kz[zindex], \ - k2);*/ \ - if (k2 >= km2) \ - std::fill_n((R*)(a + howmany*cindex), howmany2, 0.0); \ - );\ -} \ - \ -template<> \ -void fluid_solver_base<R>::dealias(FFTW(complex) *a, const int howmany) \ -{ \ - if (this->dealias_type == 0) \ - { \ - this->low_pass_Fourier(a, howmany, this->kM); \ - return; \ - } \ - double tval; \ - CLOOP_K2( \ - this, \ - tval = this->Fourier_filter[int(round(k2/this->dk2))]; \ - for (int tcounter = 0; tcounter < howmany; tcounter++) \ - for (int i=0; i<2; i++) \ - a[howmany*cindex+tcounter][i] *= tval; \ - ); \ -} \ - \ -template<> \ -void fluid_solver_base<R>::force_divfree(FFTW(complex) *a) \ -{ \ - FFTW(complex) tval; \ - CLOOP_K2( \ - this, \ - if (k2 > 0) \ - { \ - tval[0] = (this->kx[xindex]*((*(a + cindex*3 ))[0]) + \ - this->ky[yindex]*((*(a + cindex*3+1))[0]) + \ - this->kz[zindex]*((*(a + cindex*3+2))[0]) ) / k2; \ - tval[1] = (this->kx[xindex]*((*(a + cindex*3 ))[1]) + \ - this->ky[yindex]*((*(a + cindex*3+1))[1]) + \ - this->kz[zindex]*((*(a + cindex*3+2))[1]) ) / k2; \ - for (int imag_part=0; imag_part<2; imag_part++) \ - { \ - a[cindex*3 ][imag_part] -= tval[imag_part]*this->kx[xindex]; \ - a[cindex*3+1][imag_part] -= tval[imag_part]*this->ky[yindex]; \ - a[cindex*3+2][imag_part] -= tval[imag_part]*this->kz[zindex]; \ - } \ - } \ - );\ - if (this->cd->myrank == this->cd->rank[0]) \ - std::fill_n((R*)(a), 6, 0.0); \ -} \ - \ -template<> \ -void fluid_solver_base<R>::compute_vector_gradient(FFTW(complex) *A, FFTW(complex) *cvec) \ -{ \ - ptrdiff_t tindex; \ - std::fill_n((R*)A, 3*2*this->cd->local_size, 0.0); \ - FFTW(complex) *dx_u, *dy_u, *dz_u; \ - dx_u = A; \ - dy_u = A + this->cd->local_size; \ - dz_u = A + 2*this->cd->local_size; \ - CLOOP_K2( \ - this, \ - if (k2 <= this->kM2) \ - { \ - tindex = 3*cindex; \ - for (int cc=0; cc<3; cc++) \ - { \ - dx_u[tindex + cc][0] = -this->kx[xindex]*cvec[tindex+cc][1]; \ - dx_u[tindex + cc][1] = this->kx[xindex]*cvec[tindex+cc][0]; \ - dy_u[tindex + cc][0] = -this->ky[yindex]*cvec[tindex+cc][1]; \ - dy_u[tindex + cc][1] = this->ky[yindex]*cvec[tindex+cc][0]; \ - dz_u[tindex + cc][0] = -this->kz[zindex]*cvec[tindex+cc][1]; \ - dz_u[tindex + cc][1] = this->kz[zindex]*cvec[tindex+cc][0]; \ - } \ - } \ - ); \ -} \ - \ -template<> \ -void fluid_solver_base<R>::symmetrize(FFTW(complex) *data, const int howmany) \ -{ \ - ptrdiff_t ii, cc; \ - MPI_Status *mpistatus = new MPI_Status; \ - if (this->cd->myrank == this->cd->rank[0]) \ - { \ - for (cc = 0; cc < howmany; cc++) \ - data[cc][1] = 0.0; \ - for (ii = 1; ii < this->cd->sizes[1]/2; ii++) \ - for (cc = 0; cc < howmany; cc++) { \ - ( *(data + cc + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]))[0] = \ - (*(data + cc + howmany*( ii)*this->cd->sizes[2]))[0]; \ - ( *(data + cc + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]))[1] = \ - -(*(data + cc + howmany*( ii)*this->cd->sizes[2]))[1]; \ - } \ - } \ - FFTW(complex) *buffer; \ - buffer = FFTW(alloc_complex)(howmany*this->cd->sizes[1]); \ - ptrdiff_t yy; \ - /*ptrdiff_t tindex;*/ \ - int ranksrc, rankdst; \ - for (yy = 1; yy < this->cd->sizes[0]/2; yy++) { \ - ranksrc = this->cd->rank[yy]; \ - rankdst = this->cd->rank[this->cd->sizes[0] - yy]; \ - if (this->cd->myrank == ranksrc) \ - for (ii = 0; ii < this->cd->sizes[1]; ii++) \ - for (cc = 0; cc < howmany; cc++) \ - for (int imag_comp=0; imag_comp<2; imag_comp++) \ - (*(buffer + howmany*ii+cc))[imag_comp] = \ - (*(data + howmany*((yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[imag_comp]; \ - if (ranksrc != rankdst) \ - { \ - if (this->cd->myrank == ranksrc) \ - MPI_Send((void*)buffer, \ - howmany*this->cd->sizes[1], MPI_CNUM, rankdst, yy, \ - this->cd->comm); \ - if (this->cd->myrank == rankdst) \ - MPI_Recv((void*)buffer, \ - howmany*this->cd->sizes[1], MPI_CNUM, ranksrc, yy, \ - this->cd->comm, mpistatus); \ - } \ - if (this->cd->myrank == rankdst) \ - { \ - for (ii = 1; ii < this->cd->sizes[1]; ii++) \ - for (cc = 0; cc < howmany; cc++) \ - { \ - (*(data + howmany*((this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[0] = \ - (*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[0]; \ - (*(data + howmany*((this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[1] = \ - -(*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[1]; \ - } \ - for (cc = 0; cc < howmany; cc++) \ - { \ - (*((data + cc + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2])))[0] = (*(buffer + cc))[0]; \ - (*((data + cc + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2])))[1] = -(*(buffer + cc))[1]; \ - } \ - } \ - } \ - FFTW(free)(buffer); \ - delete mpistatus; \ - /* put asymmetric data to 0 */\ - /*if (this->cd->myrank == this->cd->rank[this->cd->sizes[0]/2]) \ - { \ - tindex = howmany*(this->cd->sizes[0]/2 - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2]; \ - for (ii = 0; ii < this->cd->sizes[1]; ii++) \ - { \ - std::fill_n((R*)(data + tindex), howmany*2*this->cd->sizes[2], 0.0); \ - tindex += howmany*this->cd->sizes[2]; \ - } \ - } \ - tindex = howmany*(); \ - std::fill_n((R*)(data + tindex), howmany*2, 0.0);*/ \ -} \ - \ -template<> \ -int fluid_solver_base<R>::read_base(const char *fname, R *data) \ -{ \ - char full_name[512]; \ - sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); \ - return this->rd->read(full_name, (void*)data); \ -} \ - \ -template<> \ -int fluid_solver_base<R>::read_base(const char *fname, FFTW(complex) *data) \ -{ \ - char full_name[512]; \ - sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); \ - return this->cd->read(full_name, (void*)data); \ -} \ - \ -template<> \ -int fluid_solver_base<R>::write_base(const char *fname, R *data) \ -{ \ - char full_name[512]; \ - sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); \ - return this->rd->write(full_name, (void*)data); \ -} \ - \ -template<> \ -int fluid_solver_base<R>::write_base(const char *fname, FFTW(complex) *data) \ -{ \ - char full_name[512]; \ - sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); \ - return this->cd->write(full_name, (void*)data); \ -} \ - \ -/* finally, force generation of code */ \ -template class fluid_solver_base<R>; \ +template <class rnumber> +fluid_solver_base<rnumber>::fluid_solver_base( + const char *NAME, + int nx, + int ny, + int nz, + double DKX, + double DKY, + double DKZ, + int DEALIAS_TYPE, + unsigned FFTW_PLAN_RIGOR) +{ + strncpy(this->name, NAME, 256); + this->name[255] = '\0'; + this->iteration = 0; + this->fftw_plan_rigor = FFTW_PLAN_RIGOR; -/*****************************************************************************/ + int ntmp[4]; + ntmp[0] = nz; + ntmp[1] = ny; + ntmp[2] = nx; + ntmp[3] = 3; + this->rd = new field_descriptor<rnumber>( + 4, ntmp, mpi_real_type<rnumber>::real(), MPI_COMM_WORLD); + this->normalization_factor = (this->rd->full_size/3); + ntmp[0] = ny; + ntmp[1] = nz; + ntmp[2] = nx/2 + 1; + ntmp[3] = 3; + this->cd = new field_descriptor<rnumber>( + 4, ntmp, mpi_real_type<rnumber>::complex(), this->rd->comm); + this->dkx = DKX; + this->dky = DKY; + this->dkz = DKZ; + this->kx = new double[this->cd->sizes[2]]; + this->ky = new double[this->cd->subsizes[0]]; + this->kz = new double[this->cd->sizes[1]]; + this->dealias_type = DEALIAS_TYPE; + switch(this->dealias_type) + { + /* HL07 smooth filter */ + case 1: + this->kMx = this->dkx*(int(this->rd->sizes[2] / 2)-1); + this->kMy = this->dky*(int(this->rd->sizes[1] / 2)-1); + this->kMz = this->dkz*(int(this->rd->sizes[0] / 2)-1); + break; + default: + this->kMx = this->dkx*(int(this->rd->sizes[2] / 3)-1); + this->kMy = this->dky*(int(this->rd->sizes[1] / 3)-1); + this->kMz = this->dkz*(int(this->rd->sizes[0] / 3)-1); + } + int i, ii; + for (i = 0; i<this->cd->sizes[2]; i++) + this->kx[i] = i*this->dkx; + for (i = 0; i<this->cd->subsizes[0]; i++) + { + ii = i + this->cd->starts[0]; + if (ii <= this->rd->sizes[1]/2) + this->ky[i] = this->dky*ii; + else + this->ky[i] = this->dky*(ii - this->rd->sizes[1]); + } + for (i = 0; i<this->cd->sizes[1]; i++) + { + if (i <= this->rd->sizes[0]/2) + this->kz[i] = this->dkz*i; + else + this->kz[i] = this->dkz*(i - this->rd->sizes[0]); + } + this->kM = this->kMx; + if (this->kM < this->kMy) this->kM = this->kMy; + if (this->kM < this->kMz) this->kM = this->kMz; + this->kM2 = this->kM * this->kM; + this->kMspec = this->kM; + this->kMspec2 = this->kM2; + this->dk = this->dkx; + if (this->dk > this->dky) this->dk = this->dky; + if (this->dk > this->dkz) this->dk = this->dkz; + this->dk2 = this->dk*this->dk; + DEBUG_MSG( + "kM = %g, kM2 = %g, dk = %g, dk2 = %g\n", + this->kM, this->kM2, this->dk, this->dk2); + /* spectra stuff */ + this->nshells = int(this->kMspec / this->dk) + 2; + DEBUG_MSG( + "kMspec = %g, kMspec2 = %g, nshells = %ld\n", + this->kMspec, this->kMspec2, this->nshells); + this->kshell = new double[this->nshells]; + std::fill_n(this->kshell, this->nshells, 0.0); + this->nshell = new int64_t[this->nshells]; + std::fill_n(this->nshell, this->nshells, 0); + double *kshell_local = new double[this->nshells]; + std::fill_n(kshell_local, this->nshells, 0.0); + int64_t *nshell_local = new int64_t[this->nshells]; + std::fill_n(nshell_local, this->nshells, 0.0); + double knorm; + CLOOP_K2_NXMODES( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, + ptrdiff_t zindex, double k2, int nxmodes){if (k2 < this->kM2) + { + knorm = sqrt(k2); + nshell_local[int(knorm/this->dk)] += nxmodes; + kshell_local[int(knorm/this->dk)] += nxmodes*knorm; + } + this->Fourier_filter[int(round(k2 / this->dk2))] = exp(-36.0 * pow(k2/this->kM2, 18.));} + ); + + MPI_Allreduce( + (void*)(nshell_local), + (void*)(this->nshell), + this->nshells, + MPI_INT64_T, MPI_SUM, this->cd->comm); + MPI_Allreduce( + (void*)(kshell_local), + (void*)(this->kshell), + this->nshells, + MPI_DOUBLE, MPI_SUM, this->cd->comm); + for (unsigned int n=0; n<this->nshells; n++) + { + this->kshell[n] /= this->nshell[n]; + } + delete[] nshell_local; + delete[] kshell_local; +} + +template <class rnumber> +fluid_solver_base<rnumber>::~fluid_solver_base() +{ + delete[] this->kshell; + delete[] this->nshell; + + delete[] this->kx; + delete[] this->ky; + delete[] this->kz; + + delete this->cd; + delete this->rd; +} + +template <class rnumber> +void fluid_solver_base<rnumber>::low_pass_Fourier(cnumber *a, const int howmany, const double kmax) +{ + const double km2 = kmax*kmax; + const int howmany2 = 2*howmany; + /*DEBUG_MSG("entered low_pass_Fourier, kmax=%lg km2=%lg howmany2=%d\n", kmax, km2, howmany2);*/ + CLOOP_K2( + this, + /*DEBUG_MSG("kx=%lg ky=%lg kz=%lg k2=%lg\n", + this->kx[xindex], + this->ky[yindex], + this->kz[zindex], + k2);*/ + + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, + ptrdiff_t zindex, double k2){ + if (k2 >= km2) + std::fill_n((rnumber*)(a + howmany*cindex), howmany2, 0.0);} + ); +} + +template <class rnumber> +void fluid_solver_base<rnumber>::dealias(cnumber *a, const int howmany) +{ + if (this->dealias_type == 0) + { + this->low_pass_Fourier(a, howmany, this->kM); + return; + } + double tval; + CLOOP_K2( + this, + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, + ptrdiff_t zindex, double k2){ + tval = this->Fourier_filter[int(round(k2/this->dk2))]; + for (int tcounter = 0; tcounter < howmany; tcounter++) + for (int i=0; i<2; i++) + a[howmany*cindex+tcounter][i] *= tval; + } + ); +} + +template <class rnumber> +void fluid_solver_base<rnumber>::force_divfree(cnumber *a) +{ + cnumber tval; + CLOOP_K2( + this, + + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, + ptrdiff_t zindex, double k2){if (k2 > 0) + { + tval[0] = (this->kx[xindex]*((*(a + cindex*3 ))[0]) + + this->ky[yindex]*((*(a + cindex*3+1))[0]) + + this->kz[zindex]*((*(a + cindex*3+2))[0]) ) / k2; + tval[1] = (this->kx[xindex]*((*(a + cindex*3 ))[1]) + + this->ky[yindex]*((*(a + cindex*3+1))[1]) + + this->kz[zindex]*((*(a + cindex*3+2))[1]) ) / k2; + for (int imag_part=0; imag_part<2; imag_part++) + { + a[cindex*3 ][imag_part] -= tval[imag_part]*this->kx[xindex]; + a[cindex*3+1][imag_part] -= tval[imag_part]*this->ky[yindex]; + a[cindex*3+2][imag_part] -= tval[imag_part]*this->kz[zindex]; + } + }} + ); + if (this->cd->myrank == this->cd->rank[0]) + std::fill_n((rnumber*)(a), 6, 0.0); +} + +template <class rnumber> +void fluid_solver_base<rnumber>::compute_vector_gradient(cnumber *A, cnumber *cvec) +{ + ptrdiff_t tindex; + std::fill_n((rnumber*)A, 3*2*this->cd->local_size, 0.0); + cnumber *dx_u, *dy_u, *dz_u; + dx_u = A; + dy_u = A + this->cd->local_size; + dz_u = A + 2*this->cd->local_size; + CLOOP_K2( + this, + + [&](ptrdiff_t cindex, ptrdiff_t xindex, ptrdiff_t yindex, + ptrdiff_t zindex, double k2){ + if (k2 <= this->kM2) + { + tindex = 3*cindex; + for (int cc=0; cc<3; cc++) + { + dx_u[tindex + cc][0] = -this->kx[xindex]*cvec[tindex+cc][1]; + dx_u[tindex + cc][1] = this->kx[xindex]*cvec[tindex+cc][0]; + dy_u[tindex + cc][0] = -this->ky[yindex]*cvec[tindex+cc][1]; + dy_u[tindex + cc][1] = this->ky[yindex]*cvec[tindex+cc][0]; + dz_u[tindex + cc][0] = -this->kz[zindex]*cvec[tindex+cc][1]; + dz_u[tindex + cc][1] = this->kz[zindex]*cvec[tindex+cc][0]; + } + }} + ); +} + +template <class rnumber> +void fluid_solver_base<rnumber>::symmetrize(cnumber *data, const int howmany) +{ + ptrdiff_t ii, cc; + MPI_Status *mpistatus = new MPI_Status; + if (this->cd->myrank == this->cd->rank[0]) + { + for (cc = 0; cc < howmany; cc++) + data[cc][1] = 0.0; + for (ii = 1; ii < this->cd->sizes[1]/2; ii++) + for (cc = 0; cc < howmany; cc++) { + ( *(data + cc + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]))[0] = + (*(data + cc + howmany*( ii)*this->cd->sizes[2]))[0]; + ( *(data + cc + howmany*(this->cd->sizes[1] - ii)*this->cd->sizes[2]))[1] = + -(*(data + cc + howmany*( ii)*this->cd->sizes[2]))[1]; + } + } + cnumber *buffer; + buffer = fftw_interface<rnumber>::alloc_complex(howmany*this->cd->sizes[1]); + ptrdiff_t yy; + /*ptrdiff_t tindex;*/ + int ranksrc, rankdst; + for (yy = 1; yy < this->cd->sizes[0]/2; yy++) { + ranksrc = this->cd->rank[yy]; + rankdst = this->cd->rank[this->cd->sizes[0] - yy]; + if (this->cd->myrank == ranksrc) + for (ii = 0; ii < this->cd->sizes[1]; ii++) + for (cc = 0; cc < howmany; cc++) + for (int imag_comp=0; imag_comp<2; imag_comp++) + (*(buffer + howmany*ii+cc))[imag_comp] = + (*(data + howmany*((yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[imag_comp]; + if (ranksrc != rankdst) + { + if (this->cd->myrank == ranksrc) + MPI_Send((void*)buffer, + howmany*this->cd->sizes[1], mpi_real_type<rnumber>::complex(), rankdst, yy, + this->cd->comm); + if (this->cd->myrank == rankdst) + MPI_Recv((void*)buffer, + howmany*this->cd->sizes[1], mpi_real_type<rnumber>::complex(), ranksrc, yy, + this->cd->comm, mpistatus); + } + if (this->cd->myrank == rankdst) + { + for (ii = 1; ii < this->cd->sizes[1]; ii++) + for (cc = 0; cc < howmany; cc++) + { + (*(data + howmany*((this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[0] = + (*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[0]; + (*(data + howmany*((this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1] + ii)*this->cd->sizes[2] + cc))[1] = + -(*(buffer + howmany*(this->cd->sizes[1]-ii)+cc))[1]; + } + for (cc = 0; cc < howmany; cc++) + { + (*((data + cc + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2])))[0] = (*(buffer + cc))[0]; + (*((data + cc + howmany*(this->cd->sizes[0] - yy - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2])))[1] = -(*(buffer + cc))[1]; + } + } + } + fftw_interface<rnumber>::free(buffer); + delete mpistatus; + /* put asymmetric data to 0 */ + /*if (this->cd->myrank == this->cd->rank[this->cd->sizes[0]/2]) + { + tindex = howmany*(this->cd->sizes[0]/2 - this->cd->starts[0])*this->cd->sizes[1]*this->cd->sizes[2]; + for (ii = 0; ii < this->cd->sizes[1]; ii++) + { + std::fill_n((rnumber*)(data + tindex), howmany*2*this->cd->sizes[2], 0.0); + tindex += howmany*this->cd->sizes[2]; + } + } + tindex = howmany*(); + std::fill_n((rnumber*)(data + tindex), howmany*2, 0.0);*/ +} + +template <class rnumber> +int fluid_solver_base<rnumber>::read_base(const char *fname, rnumber *data) +{ + char full_name[512]; + sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); + return this->rd->read(full_name, (void*)data); +} + +template <class rnumber> +int fluid_solver_base<rnumber>::read_base(const char *fname, cnumber *data) +{ + char full_name[512]; + sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); + return this->cd->read(full_name, (void*)data); +} + +template <class rnumber> +int fluid_solver_base<rnumber>::write_base(const char *fname, rnumber *data) +{ + char full_name[512]; + sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); + return this->rd->write(full_name, (void*)data); +} + +template <class rnumber> +int fluid_solver_base<rnumber>::write_base(const char *fname, cnumber *data) +{ + char full_name[512]; + sprintf(full_name, "%s_%s_i%.5x", this->name, fname, this->iteration); + return this->cd->write(full_name, (void*)data); +} + +/* finally, force generation of code */ +template class fluid_solver_base<float>; +template class fluid_solver_base<double>; /*****************************************************************************/ -/* now actually use the macro defined above */ -FLUID_SOLVER_BASE_DEFINITIONS( - FFTW_MANGLE_FLOAT, - float, - MPI_FLOAT, - MPI_COMPLEX) -FLUID_SOLVER_BASE_DEFINITIONS( - FFTW_MANGLE_DOUBLE, - double, - MPI_DOUBLE, - BFPS_MPICXX_DOUBLE_COMPLEX) -/*****************************************************************************/ + + + diff --git a/bfps/cpp/fluid_solver_base.hpp b/bfps/cpp/fluid_solver_base.hpp index 62deb597b4a6a3f4fc87198099d15778e7a2a255..9989bc19f1d3e24f6141e2097ab71a98dcf9f198 100644 --- a/bfps/cpp/fluid_solver_base.hpp +++ b/bfps/cpp/fluid_solver_base.hpp @@ -135,97 +135,99 @@ class fluid_solver_base /* macros for loops */ /* Fourier space loop */ -#define CLOOP(obj, expression) \ - \ -{ \ - ptrdiff_t cindex = 0; \ - for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++) \ - for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) \ - for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++) \ - { \ - expression; \ - cindex++; \ - } \ +template <class ObjectType, class FuncType> +void CLOOP(ObjectType* obj, FuncType expression) +{ + ptrdiff_t cindex = 0; + for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++) + for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) + for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++) + { + expression(cindex, xindex, yindex, zindex); + cindex++; + } } -#define CLOOP_NXMODES(obj, expression) \ - \ -{ \ - ptrdiff_t cindex = 0; \ - for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++) \ - for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) \ - { \ - int nxmodes = 1; \ - ptrdiff_t xindex = 0; \ - expression; \ - cindex++; \ - nxmodes = 2; \ - for (xindex = 1; xindex < obj->cd->subsizes[2]; xindex++) \ - { \ - expression; \ - cindex++; \ - } \ - } \ +template <class ObjectType, class FuncType> +void CLOOP_NXMODES(ObjectType* obj, FuncType expression) +{ + ptrdiff_t cindex = 0; + for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++) + for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) + { + int nxmodes = 1; + ptrdiff_t xindex = 0; + expression; + cindex++; + nxmodes = 2; + for (xindex = 1; xindex < obj->cd->subsizes[2]; xindex++) + { + expression(); + cindex++; + } + } } -#define CLOOP_K2(obj, expression) \ - \ -{ \ - double k2; \ - ptrdiff_t cindex = 0; \ - for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++) \ - for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) \ - for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++) \ - { \ - k2 = (obj->kx[xindex]*obj->kx[xindex] + \ - obj->ky[yindex]*obj->ky[yindex] + \ - obj->kz[zindex]*obj->kz[zindex]); \ - expression; \ - cindex++; \ - } \ + +template <class ObjectType, class FuncType> +void CLOOP_K2(ObjectType* obj, FuncType expression) +{ + double k2; + ptrdiff_t cindex = 0; + for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++) + for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) + for (ptrdiff_t xindex = 0; xindex < obj->cd->subsizes[2]; xindex++) + { + k2 = (obj->kx[xindex]*obj->kx[xindex] + + obj->ky[yindex]*obj->ky[yindex] + + obj->kz[zindex]*obj->kz[zindex]); + expression(cindex, xindex, yindex, zindex, k2); + cindex++; + } } -#define CLOOP_K2_NXMODES(obj, expression) \ - \ -{ \ - double k2; \ - ptrdiff_t cindex = 0; \ - for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++) \ - for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) \ - { \ - int nxmodes = 1; \ - ptrdiff_t xindex = 0; \ - k2 = (obj->kx[xindex]*obj->kx[xindex] + \ - obj->ky[yindex]*obj->ky[yindex] + \ - obj->kz[zindex]*obj->kz[zindex]); \ - expression; \ - cindex++; \ - nxmodes = 2; \ - for (xindex = 1; xindex < obj->cd->subsizes[2]; xindex++) \ - { \ - k2 = (obj->kx[xindex]*obj->kx[xindex] + \ - obj->ky[yindex]*obj->ky[yindex] + \ - obj->kz[zindex]*obj->kz[zindex]); \ - expression; \ - cindex++; \ - } \ - } \ + +template <class ObjectType, class FuncType> +void CLOOP_K2_NXMODES(ObjectType* obj, FuncType expression) +{ + double k2; + ptrdiff_t cindex = 0; + for (ptrdiff_t yindex = 0; yindex < obj->cd->subsizes[0]; yindex++) + for (ptrdiff_t zindex = 0; zindex < obj->cd->subsizes[1]; zindex++) + { + int nxmodes = 1; + ptrdiff_t xindex = 0; + k2 = (obj->kx[xindex]*obj->kx[xindex] + + obj->ky[yindex]*obj->ky[yindex] + + obj->kz[zindex]*obj->kz[zindex]); + expression(cindex, xindex, yindex, zindex, k2, nxmodes); + cindex++; + nxmodes = 2; + for (xindex = 1; xindex < obj->cd->subsizes[2]; xindex++) + { + k2 = (obj->kx[xindex]*obj->kx[xindex] + + obj->ky[yindex]*obj->ky[yindex] + + obj->kz[zindex]*obj->kz[zindex]); + expression(cindex, xindex, yindex, zindex, k2, nxmodes); + cindex++; + } + } } -/* real space loop */ -#define RLOOP(obj, expression) \ - \ -{ \ - for (int zindex = 0; zindex < obj->rd->subsizes[0]; zindex++) \ - for (int yindex = 0; yindex < obj->rd->subsizes[1]; yindex++) \ - { \ - ptrdiff_t rindex = (zindex * obj->rd->subsizes[1] + yindex)*(obj->rd->subsizes[2]+2); \ - for (int xindex = 0; xindex < obj->rd->subsizes[2]; xindex++) \ - { \ - expression; \ - rindex++; \ - } \ - } \ + +template <class ObjectType, class FuncType> +void RLOOP(ObjectType* obj, FuncType expression) +{ + for (int zindex = 0; zindex < obj->rd->subsizes[0]; zindex++) + for (int yindex = 0; yindex < obj->rd->subsizes[1]; yindex++) + { + ptrdiff_t rindex = (zindex * obj->rd->subsizes[1] + yindex)*(obj->rd->subsizes[2]+2); + for (int xindex = 0; xindex < obj->rd->subsizes[2]; xindex++) + { + expression(rindex, xindex, yindex, zindex); + rindex++; + } + } } /*****************************************************************************/ diff --git a/setup.py b/setup.py index 938936cba3330e6bc77d186fa6d15ab463d92364..cfd9fba198807f31df19d7b53ab6e0eada98b60c 100644 --- a/setup.py +++ b/setup.py @@ -111,7 +111,8 @@ src_file_list = ['field', header_list = (['cpp/base.hpp'] + ['cpp/' + fname + '.hpp' - for fname in src_file_list]) + for fname in src_file_list] + + ['cpp/fftw_interface.hpp']) with open('MANIFEST.in', 'w') as manifest_in_file: for fname in (['bfps/cpp/' + ff + '.cpp' for ff in src_file_list] +