Skip to content
Snippets Groups Projects
Commit 0e990f59 authored by Chichi Lalescu's avatar Chichi Lalescu
Browse files

perform all ffts at once, on interleaved input

for some reason, code is a lot faster now. which is good.
parent b037041e
No related branches found
No related tags found
No related merge requests found
......@@ -2,7 +2,9 @@ MPICXX = mpicxx
LINKER = mpicxx
DEFINES =
CFLAGS = -Wall \
-O2
-O2 \
#-pg \
#-finstrument-functions
LIBS = -lfftw3_mpi \
-lfftw3 \
......
......@@ -57,23 +57,18 @@ RMHD_converter::RMHD_converter(
this->c3 = (fftwf_complex*)this->r3;
// allocate plans
this->complex2real0 = fftwf_mpi_plan_dft_c2r_3d(
f3r->sizes[0], f3r->sizes[1], f3r->sizes[2],
ptrdiff_t blabla[] = {this->f3r->sizes[0],
this->f3r->sizes[1],
this->f3r->sizes[2]};
this->complex2real = fftwf_mpi_plan_many_dft_c2r(
3,
blabla,
2,
FFTW_MPI_DEFAULT_BLOCK,
FFTW_MPI_DEFAULT_BLOCK,
this->c3, this->r3,
MPI_COMM_WORLD,
FFTW_ESTIMATE);
this->complex2real1 = fftwf_mpi_plan_dft_c2r_3d(
f3r->sizes[0], f3r->sizes[1], f3r->sizes[2],
this->c3 + this->f3c->local_size,
this->r3 + 2*this->f3c->local_size,
MPI_COMM_WORLD,
FFTW_PATIENT);
// various descriptions for the real data
n[0] = N0*2;
n[1] = N1;
n[2] = N2;
this->f4r = new field_descriptor(3, n, MPI_REAL4, MPI_COMM_WORLD);
this->s = new Morton_shuffler(N0, N1, N2, 2, nfiles);
}
......@@ -85,7 +80,6 @@ RMHD_converter::~RMHD_converter()
delete this->f2c;
delete this->f3c;
delete this->f3r;
delete this->f4r;
delete this->s;
......@@ -93,8 +87,7 @@ RMHD_converter::~RMHD_converter()
fftwf_free(this->c12);
fftwf_free(this->r3);
fftwf_destroy_plan(this->complex2real0);
fftwf_destroy_plan(this->complex2real1);
fftwf_destroy_plan(this->complex2real);
}
int RMHD_converter::convert(
......@@ -109,7 +102,6 @@ int RMHD_converter::convert(
fftwf_copy_complex_array(
this->f2c, this->c12,
this->f3c, this->c3);
fftwf_execute(this->complex2real0);
//read second field
this->f0c->read(ifile1, (void*)this->c0);
......@@ -118,13 +110,12 @@ int RMHD_converter::convert(
fftwf_copy_complex_array(
this->f2c, this->c12,
this->f3c, this->c3 + this->f3c->local_size);
fftwf_execute(this->complex2real1);
fftwf_clip_zero_padding(this->f4r, this->r3);
this->f3c->interleave(this->c3, 2);
// mix components
this->f3r->interleave(this->r3, 2);
fftwf_execute(this->complex2real);
fftwf_clip_zero_padding(this->f3r, this->r3, 2);
this->s->shuffle(this->r3, ofile);
return EXIT_SUCCESS;
}
......
......@@ -20,9 +20,6 @@ class RMHD_converter
field_descriptor *f2c; // descriptor for 3D fully transposed input
field_descriptor *f3c, *f3r; // descriptors for FFT
// descriptor for N0*2 x N1 x N2 real space array
field_descriptor *f4r;
Morton_shuffler *s;
fftwf_complex *c0 ; // array to store 2D input
......@@ -30,8 +27,7 @@ class RMHD_converter
fftwf_complex *c3 ; // array to store resized Fourier data
float *r3 ; // array to store real space data
fftwf_plan complex2real0;
fftwf_plan complex2real1;
fftwf_plan complex2real;
/* methods */
RMHD_converter(
......
......@@ -115,7 +115,8 @@ int fftwf_copy_complex_array(
int fftwf_clip_zero_padding(
field_descriptor *f,
float *a)
float *a,
int howmany)
{
if (f->ndims != 3)
return EXIT_FAILURE;
......@@ -123,9 +124,9 @@ int fftwf_clip_zero_padding(
for (int i0 = 0; i0 < f->subsizes[0]; i0++)
for (int i1 = 0; i1 < f->sizes[1]; i1++)
{
std::copy(a, a + f->sizes[2], b);
a += f->sizes[2] + 2;
b += f->sizes[2];
std::copy(a, a + f->sizes[2]*howmany, b);
a += f->sizes[2]*howmany + 2*howmany;
b += f->sizes[2]*howmany;
}
return EXIT_SUCCESS;
}
......
......@@ -20,7 +20,8 @@ int fftwf_copy_complex_array(
int fftwf_clip_zero_padding(
field_descriptor *f,
float *a);
float *a,
int howmany=1);
/* function to get pair of descriptors for real and Fourier space
* arrays used with fftw.
......
......@@ -266,17 +266,29 @@ int field_descriptor::interleave(
}
int field_descriptor::interleave(
fftw_complex *input,
fftw_complex *output,
fftwf_complex *a,
int dim)
{
// TODO: implement inplace interleaver
for (int k = 0; k < this->local_size; k++)
for (int j = 0; j < dim; j++)
{
output[k*dim + j][0] = input[j*this->local_size + k][0];
output[k*dim + j][1] = input[j*this->local_size + k][1];
}
fftwf_iodim howmany_dims[2];
howmany_dims[0].n = dim;
howmany_dims[0].is = this->local_size;
howmany_dims[0].os = 1;
howmany_dims[1].n = this->local_size;
howmany_dims[1].is = 1;
howmany_dims[1].os = dim;
const int howmany_rank = sizeof(howmany_dims)/sizeof(howmany_dims[0]);
fftwf_plan tmp = fftwf_plan_guru_dft(
/*rank*/0,
/*dims*/NULL,
howmany_rank,
howmany_dims,
a,
a,
+1,
FFTW_ESTIMATE);
fftwf_execute(tmp);
fftwf_destroy_plan(tmp);
return EXIT_SUCCESS;
}
......
......@@ -61,8 +61,7 @@ class field_descriptor
float *input,
int dim);
int interleave(
fftw_complex *input,
fftw_complex *output,
fftwf_complex *input,
int dim);
};
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment