diff --git a/makefile b/makefile index f05972414ec066186a03669c6907470799f0389a..9eb470ce27518da157a908ec4e0e5455977e8bf2 100644 --- a/makefile +++ b/makefile @@ -58,7 +58,7 @@ endif base_files := \ field_descriptor \ - fftwf_tools \ + fftw_tools \ Morton_shuffler \ p3DFFT_to_iR \ fluid_solver diff --git a/src/Morton_shuffler.hpp b/src/Morton_shuffler.hpp index a1e83b86c69417575e1dab3b02ef0e48aab7495a..afb03596a49199f6393ff17d0d858502cdc1ad08 100644 --- a/src/Morton_shuffler.hpp +++ b/src/Morton_shuffler.hpp @@ -22,7 +22,7 @@ #include <stdlib.h> #include <iostream> #include "field_descriptor.hpp" -#include "fftwf_tools.hpp" +#include "fftw_tools.hpp" #ifndef MORTON_SHUFFLER diff --git a/src/fftwf_tools.cpp b/src/fftw_tools.cpp similarity index 71% rename from src/fftwf_tools.cpp rename to src/fftw_tools.cpp index 20de89002a540eceab323c006669cb2cb6dd643c..795982531bfde0e7a908b6547d943f35c5c7b9b5 100644 --- a/src/fftwf_tools.cpp +++ b/src/fftw_tools.cpp @@ -21,16 +21,16 @@ #include <stdlib.h> #include <algorithm> #include <iostream> -#include "fftwf_tools.hpp" +#include "fftw_tools.hpp" -// should I use a template here? -int fftwf_copy_complex_array( - field_descriptor<float> *fi, - fftwf_complex *ai, - field_descriptor<float> *fo, - fftwf_complex *ao) +template <class rnumber> +int copy_complex_array( + field_descriptor<rnumber> *fi, + rnumber (*ai)[2], + field_descriptor<rnumber> *fo, + rnumber (*ao)[2]) { if (((fi->ndims != 3) || (fo->ndims != 3)) || @@ -46,7 +46,7 @@ int fftwf_copy_complex_array( // clean up destination, in case we're padding with zeros // (even if only for one dimension) - std::fill_n((float*)ao, fo->local_size, 0.0); + std::fill_n((rnumber*)ao, fo->local_size, 0.0); int64_t ii0, ii1; int64_t oi0, oi1; @@ -74,9 +74,9 @@ int fftwf_copy_complex_array( (irank == fi->myrank)) { std::copy( - (float*)(ai + (ii0 - fi->starts[0] )*fi->slice_size), - (float*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size), - (float*)buffer); + (rnumber*)(ai + (ii0 - fi->starts[0] )*fi->slice_size), + (rnumber*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size), + (rnumber*)buffer); } else { @@ -119,9 +119,9 @@ int fftwf_copy_complex_array( continue; } std::copy( - (float*)(buffer + ii1*fi->sizes[2]), - (float*)(buffer + ii1*fi->sizes[2] + min_fast_dim), - (float*)(ao + + (rnumber*)(buffer + ii1*fi->sizes[2]), + (rnumber*)(buffer + ii1*fi->sizes[2] + min_fast_dim), + (rnumber*)(ao + ((oi0 - fo->starts[0])*fo->sizes[1] + oi1)*fo->sizes[2])); } @@ -133,14 +133,17 @@ int fftwf_copy_complex_array( return EXIT_SUCCESS; } -int fftwf_clip_zero_padding( - field_descriptor<float> *f, - float *a, + + +template <class rnumber> +int clip_zero_padding( + field_descriptor<rnumber> *f, + rnumber *a, int howmany) { if (f->ndims != 3) return EXIT_FAILURE; - float *b = a; + rnumber *b = a; ptrdiff_t copy_size = f->sizes[2] * howmany; ptrdiff_t skip_size = copy_size + 2*howmany; for (int i0 = 0; i0 < f->subsizes[0]; i0++) @@ -153,20 +156,46 @@ int fftwf_clip_zero_padding( return EXIT_SUCCESS; } -int fftwf_get_descriptors_3D( + + +template <class rnumber> +int get_descriptors_3D( int n0, int n1, int n2, - field_descriptor<float> **fr, - field_descriptor<float> **fc) + field_descriptor<rnumber> **fr, + field_descriptor<rnumber> **fc) { int ntmp[3]; ntmp[0] = n0; ntmp[1] = n1; ntmp[2] = n2; - *fr = new field_descriptor<float>(3, ntmp, MPI_FLOAT, MPI_COMM_WORLD); + *fr = new field_descriptor<rnumber>(3, ntmp, MPI_FLOAT, MPI_COMM_WORLD); ntmp[0] = n0; ntmp[1] = n1; ntmp[2] = n2/2+1; - *fc = new field_descriptor<float>(3, ntmp, MPI_COMPLEX8, MPI_COMM_WORLD); + *fc = new field_descriptor<rnumber>(3, ntmp, MPI_COMPLEX8, MPI_COMM_WORLD); return EXIT_SUCCESS; } +#define FORCE_IMPLEMENTATION(rnumber) \ +template \ +int copy_complex_array<rnumber>( \ + field_descriptor<rnumber> *fi, \ + rnumber (*ai)[2], \ + field_descriptor<rnumber> *fo, \ + rnumber (*ao)[2]); \ + \ +template \ +int clip_zero_padding<rnumber>( \ + field_descriptor<rnumber> *f, \ + rnumber *a, \ + int howmany); \ + \ +template \ +int get_descriptors_3D<rnumber>( \ + int n0, int n1, int n2, \ + field_descriptor<rnumber> **fr, \ + field_descriptor<rnumber> **fc); + +FORCE_IMPLEMENTATION(float) +FORCE_IMPLEMENTATION(double) + diff --git a/src/fftwf_tools.hpp b/src/fftw_tools.hpp similarity index 76% rename from src/fftwf_tools.hpp rename to src/fftw_tools.hpp index 12cbb300911f5dde102d90828ad1bd1453b1d47f..78fc88c0e912064672af186e299ff732347336fa 100644 --- a/src/fftwf_tools.hpp +++ b/src/fftw_tools.hpp @@ -22,9 +22,9 @@ #include <fftw3-mpi.h> #include "field_descriptor.hpp" -#ifndef FFTWF_TOOLS +#ifndef FFTW_TOOLS -#define FFTWF_TOOLS +#define FFTW_TOOLS extern int myrank, nprocs; @@ -32,15 +32,17 @@ extern int myrank, nprocs; * Fourier space: either chop off high modes, or pad with zeros. * the arrays are assumed to use 3D mpi fftw layout. * */ -int fftwf_copy_complex_array( - field_descriptor<float> *fi, - fftwf_complex *ai, - field_descriptor<float> *fo, - fftwf_complex *ao); - -int fftwf_clip_zero_padding( - field_descriptor<float> *f, - float *a, +template <class rnumber> +int copy_complex_array( + field_descriptor<rnumber> *fi, + rnumber (*ai)[2], + field_descriptor<rnumber> *fo, + rnumber (*ao)[2]); + +template <class rnumber> +int clip_zero_padding( + field_descriptor<rnumber> *f, + rnumber *a, int howmany=1); /* function to get pair of descriptors for real and Fourier space @@ -51,10 +53,11 @@ int fftwf_clip_zero_padding( * 2*fc->local_size, and then the zeros cleaned up before trying * to write data. * */ -int fftwf_get_descriptors_3D( +template <class rnumber> +int get_descriptors_3D( int n0, int n1, int n2, - field_descriptor<float> **fr, - field_descriptor<float> **fc); + field_descriptor<rnumber> **fr, + field_descriptor<rnumber> **fc); -#endif//FFTWF_TOOLS +#endif//FFTW_TOOLS diff --git a/src/field_descriptor.cpp b/src/field_descriptor.cpp index b8a127b54646f3918716cd02305c1f850404ba51..23eaf05986e6df26ff11cfd6336dd0a12c86fa2e 100644 --- a/src/field_descriptor.cpp +++ b/src/field_descriptor.cpp @@ -86,9 +86,9 @@ field_descriptor<rnumber>::field_descriptor( tsubsizes[i] = this->subsizes[i]; tstarts[i] = this->starts[i]; } - tsizes[ndims-1] *= sizeof(float); - tsubsizes[ndims-1] *= sizeof(float); - tstarts[ndims-1] *= sizeof(float); + tsizes[ndims-1] *= sizeof(rnumber); + tsubsizes[ndims-1] *= sizeof(rnumber); + tstarts[ndims-1] *= sizeof(rnumber); if (this->mpi_dtype == MPI_COMPLEX8) { tsizes[ndims-1] *= 2; @@ -482,4 +482,5 @@ field_descriptor<float>* field_descriptor<float>::get_transpose() } template class field_descriptor<float>; +template class field_descriptor<double>; diff --git a/src/fluid_solver.cpp b/src/fluid_solver.cpp index 754ecfa172f9a3125a71b666d091d0fa2b0d03be..e24827858a9a4dbb878967886afe32f5b56686d4 100644 --- a/src/fluid_solver.cpp +++ b/src/fluid_solver.cpp @@ -21,7 +21,7 @@ #include "fluid_solver.hpp" -#include "fftwf_tools.hpp" +#include "fftw_tools.hpp" @@ -36,7 +36,7 @@ fluid_solver<R>::fluid_solver( \ int ny, \ int nz) \ { \ - FFTW(get_descriptors_3D)(nz, ny, nx, &this->fr, &this->fc);\ + get_descriptors_3D<R>(nz, ny, nx, &this->fr, &this->fc);\ this->cvorticity = FFTW(alloc_complex)(this->fc->local_size*3);\ this->cvelocity = FFTW(alloc_complex)(this->fc->local_size*3);\ this->rvorticity = FFTW(alloc_real)(this->fc->local_size*6);\ diff --git a/src/main_fluid_solver.cpp b/src/main_fluid_solver.cpp index f673e4a6ffcb7c57812404d80372e4b099a5057a..4c69e86d11597d100e9ccdc9978e82b1c42fd7c6 100644 --- a/src/main_fluid_solver.cpp +++ b/src/main_fluid_solver.cpp @@ -18,6 +18,7 @@ * ************************************************************************/ +#include "base.hpp" #include "fluid_solver.hpp" #include <iostream> @@ -29,10 +30,16 @@ int main(int argc, char *argv[]) MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); - fluid_solver<float> fs; + fluid_solver<float> *fs; + fs = new fluid_solver<float>(32, 32, 32); + DEBUG_MSG("fluid_solver object created\n"); + + delete fs; + DEBUG_MSG("fluid_solver object deleted\n"); // clean up fftwf_mpi_cleanup(); + fftw_mpi_cleanup(); MPI_Finalize(); return EXIT_SUCCESS; } diff --git a/src/p3DFFT_to_iR.cpp b/src/p3DFFT_to_iR.cpp index cb4ecba07ee9a040c1e54d3c009881b99c7699ae..60ea120ea7d7adfd7ff049cfd781c1517e2d1cc3 100644 --- a/src/p3DFFT_to_iR.cpp +++ b/src/p3DFFT_to_iR.cpp @@ -69,7 +69,7 @@ p3DFFT_to_iR::p3DFFT_to_iR( // following 3 arguments are dimensions for real space grid dimensions // f3r and f3c will be allocated in this call - fftwf_get_descriptors_3D( + get_descriptors_3D<float>( N0, N1, N2, &this->f3r, &this->f3c); @@ -141,7 +141,7 @@ int p3DFFT_to_iR::read( this->f1c->transpose(this->c12); DEBUG_MSG("p3DFFT_to_iR::read " "fftwf_copy_complex_array(\n"); - fftwf_copy_complex_array( + copy_complex_array<float>( this->f2c, this->c12, this->f3c, this->c3 + i*this->f3c->local_size); } @@ -156,7 +156,7 @@ int p3DFFT_to_iR::read( DEBUG_MSG("p3DFFT_to_iR::read " "fftwf_clip_zero_padding(this->f3r, this->r3, 2);\n"); - fftwf_clip_zero_padding(this->f3r, this->r3, this->howmany); + clip_zero_padding<float>(this->f3r, this->r3, this->howmany); DEBUG_MSG("p3DFFT_to_iR::read return\n"); return EXIT_SUCCESS; } diff --git a/src/p3DFFT_to_iR.hpp b/src/p3DFFT_to_iR.hpp index db52e7968cca7da347eb03647cf6a97e868df4ca..63a25c914656b1b2ac64e01e1bf0a5da10b65793 100644 --- a/src/p3DFFT_to_iR.hpp +++ b/src/p3DFFT_to_iR.hpp @@ -22,7 +22,7 @@ #include <stdlib.h> #include <iostream> #include "field_descriptor.hpp" -#include "fftwf_tools.hpp" +#include "fftw_tools.hpp" #ifndef P3DFFT_TO_IR