Skip to content
Snippets Groups Projects
Commit 4aeeb630 authored by Chichi Lalescu's avatar Chichi Lalescu
Browse files

Merge branch 'feature-interleave_pre_fft' into develop

parents d1bc906b d777a978
No related branches found
No related tags found
No related merge requests found
...@@ -2,7 +2,9 @@ MPICXX = mpicxx ...@@ -2,7 +2,9 @@ MPICXX = mpicxx
LINKER = mpicxx LINKER = mpicxx
DEFINES = DEFINES =
CFLAGS = -Wall \ CFLAGS = -Wall \
-O2 -O2 \
#-pg \
#-finstrument-functions
LIBS = -lfftw3_mpi \ LIBS = -lfftw3_mpi \
-lfftw3 \ -lfftw3 \
... ...
......
...@@ -57,23 +57,18 @@ RMHD_converter::RMHD_converter( ...@@ -57,23 +57,18 @@ RMHD_converter::RMHD_converter(
this->c3 = (fftwf_complex*)this->r3; this->c3 = (fftwf_complex*)this->r3;
// allocate plans // allocate plans
this->complex2real0 = fftwf_mpi_plan_dft_c2r_3d( ptrdiff_t blabla[] = {this->f3r->sizes[0],
f3r->sizes[0], f3r->sizes[1], f3r->sizes[2], this->f3r->sizes[1],
this->f3r->sizes[2]};
this->complex2real = fftwf_mpi_plan_many_dft_c2r(
3,
blabla,
2,
FFTW_MPI_DEFAULT_BLOCK,
FFTW_MPI_DEFAULT_BLOCK,
this->c3, this->r3, this->c3, this->r3,
MPI_COMM_WORLD, MPI_COMM_WORLD,
FFTW_ESTIMATE); FFTW_ESTIMATE);
this->complex2real1 = fftwf_mpi_plan_dft_c2r_3d(
f3r->sizes[0], f3r->sizes[1], f3r->sizes[2],
this->c3 + this->f3c->local_size,
this->r3 + 2*this->f3c->local_size,
MPI_COMM_WORLD,
FFTW_PATIENT);
// various descriptions for the real data
n[0] = N0*2;
n[1] = N1;
n[2] = N2;
this->f4r = new field_descriptor(3, n, MPI_REAL4, MPI_COMM_WORLD);
this->s = new Morton_shuffler(N0, N1, N2, 2, nfiles); this->s = new Morton_shuffler(N0, N1, N2, 2, nfiles);
} }
...@@ -85,7 +80,6 @@ RMHD_converter::~RMHD_converter() ...@@ -85,7 +80,6 @@ RMHD_converter::~RMHD_converter()
delete this->f2c; delete this->f2c;
delete this->f3c; delete this->f3c;
delete this->f3r; delete this->f3r;
delete this->f4r;
delete this->s; delete this->s;
...@@ -93,8 +87,7 @@ RMHD_converter::~RMHD_converter() ...@@ -93,8 +87,7 @@ RMHD_converter::~RMHD_converter()
fftwf_free(this->c12); fftwf_free(this->c12);
fftwf_free(this->r3); fftwf_free(this->r3);
fftwf_destroy_plan(this->complex2real0); fftwf_destroy_plan(this->complex2real);
fftwf_destroy_plan(this->complex2real1);
} }
int RMHD_converter::convert( int RMHD_converter::convert(
...@@ -109,7 +102,6 @@ int RMHD_converter::convert( ...@@ -109,7 +102,6 @@ int RMHD_converter::convert(
fftwf_copy_complex_array( fftwf_copy_complex_array(
this->f2c, this->c12, this->f2c, this->c12,
this->f3c, this->c3); this->f3c, this->c3);
fftwf_execute(this->complex2real0);
//read second field //read second field
this->f0c->read(ifile1, (void*)this->c0); this->f0c->read(ifile1, (void*)this->c0);
...@@ -118,13 +110,12 @@ int RMHD_converter::convert( ...@@ -118,13 +110,12 @@ int RMHD_converter::convert(
fftwf_copy_complex_array( fftwf_copy_complex_array(
this->f2c, this->c12, this->f2c, this->c12,
this->f3c, this->c3 + this->f3c->local_size); this->f3c, this->c3 + this->f3c->local_size);
fftwf_execute(this->complex2real1);
fftwf_clip_zero_padding(this->f4r, this->r3); this->f3c->interleave(this->c3, 2);
// mix components fftwf_execute(this->complex2real);
this->f3r->interleave(this->r3, 2);
fftwf_clip_zero_padding(this->f3r, this->r3, 2);
this->s->shuffle(this->r3, ofile); this->s->shuffle(this->r3, ofile);
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
... ...
......
...@@ -20,9 +20,6 @@ class RMHD_converter ...@@ -20,9 +20,6 @@ class RMHD_converter
field_descriptor *f2c; // descriptor for 3D fully transposed input field_descriptor *f2c; // descriptor for 3D fully transposed input
field_descriptor *f3c, *f3r; // descriptors for FFT field_descriptor *f3c, *f3r; // descriptors for FFT
// descriptor for N0*2 x N1 x N2 real space array
field_descriptor *f4r;
Morton_shuffler *s; Morton_shuffler *s;
fftwf_complex *c0 ; // array to store 2D input fftwf_complex *c0 ; // array to store 2D input
...@@ -30,8 +27,7 @@ class RMHD_converter ...@@ -30,8 +27,7 @@ class RMHD_converter
fftwf_complex *c3 ; // array to store resized Fourier data fftwf_complex *c3 ; // array to store resized Fourier data
float *r3 ; // array to store real space data float *r3 ; // array to store real space data
fftwf_plan complex2real0; fftwf_plan complex2real;
fftwf_plan complex2real1;
/* methods */ /* methods */
RMHD_converter( RMHD_converter(
... ...
......
#include <stdlib.h> #include <stdlib.h>
#include <algorithm> #include <algorithm>
#include <iostream> #include <iostream>
#include "field_descriptor.hpp"
#include "fftwf_tools.hpp" #include "fftwf_tools.hpp"
...@@ -116,17 +115,20 @@ int fftwf_copy_complex_array( ...@@ -116,17 +115,20 @@ int fftwf_copy_complex_array(
int fftwf_clip_zero_padding( int fftwf_clip_zero_padding(
field_descriptor *f, field_descriptor *f,
float *a) float *a,
int howmany)
{ {
if (f->ndims != 3) if (f->ndims != 3)
return EXIT_FAILURE; return EXIT_FAILURE;
float *b = a; float *b = a;
ptrdiff_t copy_size = f->sizes[2] * howmany;
ptrdiff_t skip_size = copy_size + 2*howmany;
for (int i0 = 0; i0 < f->subsizes[0]; i0++) for (int i0 = 0; i0 < f->subsizes[0]; i0++)
for (int i1 = 0; i1 < f->sizes[1]; i1++) for (int i1 = 0; i1 < f->sizes[1]; i1++)
{ {
std::copy(a, a + f->sizes[2], b); std::copy(a, a + copy_size, b);
a += f->sizes[2] + 2; a += skip_size;
b += f->sizes[2]; b += copy_size;
} }
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
...@@ -148,28 +150,3 @@ int fftwf_get_descriptors_3D( ...@@ -148,28 +150,3 @@ int fftwf_get_descriptors_3D(
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
/* the following is copied from
* http://agentzlerich.blogspot.com/2010/01/using-fftw-for-in-place-matrix.html
* */
fftwf_plan plan_transpose(
int rows,
int cols,
float *in,
float *out,
const unsigned flags)
{
fftwf_iodim howmany_dims[2];
howmany_dims[0].n = rows;
howmany_dims[0].is = cols;
howmany_dims[0].os = 1;
howmany_dims[1].n = cols;
howmany_dims[1].is = 1;
howmany_dims[1].os = rows;
const int howmany_rank = sizeof(howmany_dims)/sizeof(howmany_dims[0]);
return fftwf_plan_guru_r2r(
/*rank*/0, /*dims*/NULL,
howmany_rank, howmany_dims,
in, out, /*kind*/NULL, flags);
}
#include <mpi.h> #include <mpi.h>
#include <fftw3-mpi.h> #include <fftw3-mpi.h>
#include "field_descriptor.hpp"
#ifndef FFTWF_TOOLS #ifndef FFTWF_TOOLS
...@@ -19,7 +20,8 @@ int fftwf_copy_complex_array( ...@@ -19,7 +20,8 @@ int fftwf_copy_complex_array(
int fftwf_clip_zero_padding( int fftwf_clip_zero_padding(
field_descriptor *f, field_descriptor *f,
float *a); float *a,
int howmany=1);
/* function to get pair of descriptors for real and Fourier space /* function to get pair of descriptors for real and Fourier space
* arrays used with fftw. * arrays used with fftw.
...@@ -34,12 +36,5 @@ int fftwf_get_descriptors_3D( ...@@ -34,12 +36,5 @@ int fftwf_get_descriptors_3D(
field_descriptor **fr, field_descriptor **fr,
field_descriptor **fc); field_descriptor **fc);
fftwf_plan plan_transpose(
int rows,
int cols,
float *in,
float *out,
const unsigned flags = FFTW_ESTIMATE);
#endif//FFTWF_TOOLS #endif//FFTWF_TOOLS
...@@ -3,8 +3,6 @@ ...@@ -3,8 +3,6 @@
#include <iostream> #include <iostream>
#include "field_descriptor.hpp" #include "field_descriptor.hpp"
extern int myrank, nprocs;
field_descriptor::field_descriptor( field_descriptor::field_descriptor(
int ndims, int ndims,
int *n, int *n,
...@@ -241,6 +239,9 @@ int field_descriptor::interleave( ...@@ -241,6 +239,9 @@ int field_descriptor::interleave(
float *a, float *a,
int dim) int dim)
{ {
/* the following is copied from
* http://agentzlerich.blogspot.com/2010/01/using-fftw-for-in-place-matrix.html
* */
fftwf_iodim howmany_dims[2]; fftwf_iodim howmany_dims[2];
howmany_dims[0].n = dim; howmany_dims[0].n = dim;
howmany_dims[0].is = this->local_size; howmany_dims[0].is = this->local_size;
...@@ -265,17 +266,29 @@ int field_descriptor::interleave( ...@@ -265,17 +266,29 @@ int field_descriptor::interleave(
} }
int field_descriptor::interleave( int field_descriptor::interleave(
fftw_complex *input, fftwf_complex *a,
fftw_complex *output,
int dim) int dim)
{ {
// TODO: implement inplace interleaver fftwf_iodim howmany_dims[2];
for (int k = 0; k < this->local_size; k++) howmany_dims[0].n = dim;
for (int j = 0; j < dim; j++) howmany_dims[0].is = this->local_size;
{ howmany_dims[0].os = 1;
output[k*dim + j][0] = input[j*this->local_size + k][0]; howmany_dims[1].n = this->local_size;
output[k*dim + j][1] = input[j*this->local_size + k][1]; howmany_dims[1].is = 1;
} howmany_dims[1].os = dim;
const int howmany_rank = sizeof(howmany_dims)/sizeof(howmany_dims[0]);
fftwf_plan tmp = fftwf_plan_guru_dft(
/*rank*/0,
/*dims*/NULL,
howmany_rank,
howmany_dims,
a,
a,
+1,
FFTW_ESTIMATE);
fftwf_execute(tmp);
fftwf_destroy_plan(tmp);
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
... ...
......
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
#define FIELD_DESCRIPTOR #define FIELD_DESCRIPTOR
extern int myrank, nprocs;
class field_descriptor class field_descriptor
{ {
public: public:
...@@ -59,8 +61,7 @@ class field_descriptor ...@@ -59,8 +61,7 @@ class field_descriptor
float *input, float *input,
int dim); int dim);
int interleave( int interleave(
fftw_complex *input, fftwf_complex *input,
fftw_complex *output,
int dim); int dim);
}; };
... ...
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment