diff --git a/makefile b/makefile index b4b1c4c5a909ffa1734fd6427c922593ded70a58..67882e8094e02246fbd97d723b0bd7fcbee8d211 100644 --- a/makefile +++ b/makefile @@ -2,7 +2,9 @@ MPICXX = mpicxx LINKER = mpicxx DEFINES = CFLAGS = -Wall \ - -O2 + -O2 \ + #-pg \ + #-finstrument-functions LIBS = -lfftw3_mpi \ -lfftw3 \ diff --git a/src/RMHD_converter.cpp b/src/RMHD_converter.cpp index 18ef57cd6eda397aec52d839d7d9e93a36c79f92..e4394241ecb5c633b1518eaaa21957fed0acca82 100644 --- a/src/RMHD_converter.cpp +++ b/src/RMHD_converter.cpp @@ -57,23 +57,18 @@ RMHD_converter::RMHD_converter( this->c3 = (fftwf_complex*)this->r3; // allocate plans - this->complex2real0 = fftwf_mpi_plan_dft_c2r_3d( - f3r->sizes[0], f3r->sizes[1], f3r->sizes[2], + ptrdiff_t blabla[] = {this->f3r->sizes[0], + this->f3r->sizes[1], + this->f3r->sizes[2]}; + this->complex2real = fftwf_mpi_plan_many_dft_c2r( + 3, + blabla, + 2, + FFTW_MPI_DEFAULT_BLOCK, + FFTW_MPI_DEFAULT_BLOCK, this->c3, this->r3, MPI_COMM_WORLD, FFTW_ESTIMATE); - this->complex2real1 = fftwf_mpi_plan_dft_c2r_3d( - f3r->sizes[0], f3r->sizes[1], f3r->sizes[2], - this->c3 + this->f3c->local_size, - this->r3 + 2*this->f3c->local_size, - MPI_COMM_WORLD, - FFTW_PATIENT); - - // various descriptions for the real data - n[0] = N0*2; - n[1] = N1; - n[2] = N2; - this->f4r = new field_descriptor(3, n, MPI_REAL4, MPI_COMM_WORLD); this->s = new Morton_shuffler(N0, N1, N2, 2, nfiles); } @@ -85,7 +80,6 @@ RMHD_converter::~RMHD_converter() delete this->f2c; delete this->f3c; delete this->f3r; - delete this->f4r; delete this->s; @@ -93,8 +87,7 @@ RMHD_converter::~RMHD_converter() fftwf_free(this->c12); fftwf_free(this->r3); - fftwf_destroy_plan(this->complex2real0); - fftwf_destroy_plan(this->complex2real1); + fftwf_destroy_plan(this->complex2real); } int RMHD_converter::convert( @@ -109,7 +102,6 @@ int RMHD_converter::convert( fftwf_copy_complex_array( this->f2c, this->c12, this->f3c, this->c3); - fftwf_execute(this->complex2real0); //read second field this->f0c->read(ifile1, (void*)this->c0); @@ -118,13 +110,12 @@ int RMHD_converter::convert( fftwf_copy_complex_array( this->f2c, this->c12, this->f3c, this->c3 + this->f3c->local_size); - fftwf_execute(this->complex2real1); - fftwf_clip_zero_padding(this->f4r, this->r3); + this->f3c->interleave(this->c3, 2); - // mix components - this->f3r->interleave(this->r3, 2); + fftwf_execute(this->complex2real); + fftwf_clip_zero_padding(this->f3r, this->r3, 2); this->s->shuffle(this->r3, ofile); return EXIT_SUCCESS; } diff --git a/src/RMHD_converter.hpp b/src/RMHD_converter.hpp index 6ce834c6f0f912ebfff24775d3ece70c84081129..c792cdf4ee279a6a570c8d58495a1583c8e16288 100644 --- a/src/RMHD_converter.hpp +++ b/src/RMHD_converter.hpp @@ -20,9 +20,6 @@ class RMHD_converter field_descriptor *f2c; // descriptor for 3D fully transposed input field_descriptor *f3c, *f3r; // descriptors for FFT - // descriptor for N0*2 x N1 x N2 real space array - field_descriptor *f4r; - Morton_shuffler *s; fftwf_complex *c0 ; // array to store 2D input @@ -30,8 +27,7 @@ class RMHD_converter fftwf_complex *c3 ; // array to store resized Fourier data float *r3 ; // array to store real space data - fftwf_plan complex2real0; - fftwf_plan complex2real1; + fftwf_plan complex2real; /* methods */ RMHD_converter( diff --git a/src/fftwf_tools.cpp b/src/fftwf_tools.cpp index 1fb8436c775a1431dcb7be2a37be72bf7e3c35b5..3ea239bc1d7935f4662b5102ce7b16eb29204d4f 100644 --- a/src/fftwf_tools.cpp +++ b/src/fftwf_tools.cpp @@ -115,7 +115,8 @@ int fftwf_copy_complex_array( int fftwf_clip_zero_padding( field_descriptor *f, - float *a) + float *a, + int howmany) { if (f->ndims != 3) return EXIT_FAILURE; @@ -123,9 +124,9 @@ int fftwf_clip_zero_padding( for (int i0 = 0; i0 < f->subsizes[0]; i0++) for (int i1 = 0; i1 < f->sizes[1]; i1++) { - std::copy(a, a + f->sizes[2], b); - a += f->sizes[2] + 2; - b += f->sizes[2]; + std::copy(a, a + f->sizes[2]*howmany, b); + a += f->sizes[2]*howmany + 2*howmany; + b += f->sizes[2]*howmany; } return EXIT_SUCCESS; } diff --git a/src/fftwf_tools.hpp b/src/fftwf_tools.hpp index 17344e53dfb38b03a8b472b03702410e3eb29a31..f39ed5277d2bada764f6c8b89b17841b121dbf4d 100644 --- a/src/fftwf_tools.hpp +++ b/src/fftwf_tools.hpp @@ -20,7 +20,8 @@ int fftwf_copy_complex_array( int fftwf_clip_zero_padding( field_descriptor *f, - float *a); + float *a, + int howmany=1); /* function to get pair of descriptors for real and Fourier space * arrays used with fftw. diff --git a/src/field_descriptor.cpp b/src/field_descriptor.cpp index 2c37486bf7d52494dc32f1f4c65497461d73af5d..56dd4346d723dc172566207c2e4c3997722382ea 100644 --- a/src/field_descriptor.cpp +++ b/src/field_descriptor.cpp @@ -266,17 +266,29 @@ int field_descriptor::interleave( } int field_descriptor::interleave( - fftw_complex *input, - fftw_complex *output, + fftwf_complex *a, int dim) { - // TODO: implement inplace interleaver - for (int k = 0; k < this->local_size; k++) - for (int j = 0; j < dim; j++) - { - output[k*dim + j][0] = input[j*this->local_size + k][0]; - output[k*dim + j][1] = input[j*this->local_size + k][1]; - } + fftwf_iodim howmany_dims[2]; + howmany_dims[0].n = dim; + howmany_dims[0].is = this->local_size; + howmany_dims[0].os = 1; + howmany_dims[1].n = this->local_size; + howmany_dims[1].is = 1; + howmany_dims[1].os = dim; + const int howmany_rank = sizeof(howmany_dims)/sizeof(howmany_dims[0]); + + fftwf_plan tmp = fftwf_plan_guru_dft( + /*rank*/0, + /*dims*/NULL, + howmany_rank, + howmany_dims, + a, + a, + +1, + FFTW_ESTIMATE); + fftwf_execute(tmp); + fftwf_destroy_plan(tmp); return EXIT_SUCCESS; } diff --git a/src/field_descriptor.hpp b/src/field_descriptor.hpp index 5166d022949d2ac2e5fcb0836b5bbbb04485a295..c2fb4c5fec4336179cf13743db536319995d2414 100644 --- a/src/field_descriptor.hpp +++ b/src/field_descriptor.hpp @@ -61,8 +61,7 @@ class field_descriptor float *input, int dim); int interleave( - fftw_complex *input, - fftw_complex *output, + fftwf_complex *input, int dim); };