Skip to content
Snippets Groups Projects
Commit e616bb3d authored by Chichi Lalescu's avatar Chichi Lalescu
Browse files

Merge branch 'feature-inplace_fft' into develop

parents d5cf2e8c 00c4c5bd
Branches
Tags
No related merge requests found
...@@ -51,9 +51,10 @@ RMHD_converter::RMHD_converter( ...@@ -51,9 +51,10 @@ RMHD_converter::RMHD_converter(
//allocate fields //allocate fields
this->c0 = fftwf_alloc_complex(this->f0c->local_size); this->c0 = fftwf_alloc_complex(this->f0c->local_size);
this->c12 = fftwf_alloc_complex(this->f1c->local_size); this->c12 = fftwf_alloc_complex(this->f1c->local_size);
this->c3 = fftwf_alloc_complex(this->f3c->local_size);
// 4 instead of 2, because we have 2 fields to write // 4 instead of 2, because we have 2 fields to write
this->r3 = fftwf_alloc_real( 4*this->f3c->local_size); this->r3 = fftwf_alloc_real( 4*this->f3c->local_size);
// all FFTs are going to be inplace
this->c3 = (fftwf_complex*)this->r3;
// allocate plans // allocate plans
this->complex2real0 = fftwf_mpi_plan_dft_c2r_3d( this->complex2real0 = fftwf_mpi_plan_dft_c2r_3d(
...@@ -63,7 +64,8 @@ RMHD_converter::RMHD_converter( ...@@ -63,7 +64,8 @@ RMHD_converter::RMHD_converter(
FFTW_ESTIMATE); FFTW_ESTIMATE);
this->complex2real1 = fftwf_mpi_plan_dft_c2r_3d( this->complex2real1 = fftwf_mpi_plan_dft_c2r_3d(
f3r->sizes[0], f3r->sizes[1], f3r->sizes[2], f3r->sizes[0], f3r->sizes[1], f3r->sizes[2],
this->c3, this->r3 + 2*this->f3c->local_size, this->c3 + this->f3c->local_size,
this->r3 + 2*this->f3c->local_size,
MPI_COMM_WORLD, MPI_COMM_WORLD,
FFTW_PATIENT); FFTW_PATIENT);
...@@ -89,7 +91,6 @@ RMHD_converter::~RMHD_converter() ...@@ -89,7 +91,6 @@ RMHD_converter::~RMHD_converter()
fftwf_free(this->c0); fftwf_free(this->c0);
fftwf_free(this->c12); fftwf_free(this->c12);
fftwf_free(this->c3);
fftwf_free(this->r3); fftwf_free(this->r3);
fftwf_destroy_plan(this->complex2real0); fftwf_destroy_plan(this->complex2real0);
...@@ -109,7 +110,6 @@ int RMHD_converter::convert( ...@@ -109,7 +110,6 @@ int RMHD_converter::convert(
this->f2c, this->c12, this->f2c, this->c12,
this->f3c, this->c3); this->f3c, this->c3);
fftwf_execute(this->complex2real0); fftwf_execute(this->complex2real0);
proc_print_err_message("0 field read and transformed");
//read second field //read second field
this->f0c->read(ifile1, (void*)this->c0); this->f0c->read(ifile1, (void*)this->c0);
...@@ -117,22 +117,18 @@ int RMHD_converter::convert( ...@@ -117,22 +117,18 @@ int RMHD_converter::convert(
this->f1c->transpose(this->c12); this->f1c->transpose(this->c12);
fftwf_copy_complex_array( fftwf_copy_complex_array(
this->f2c, this->c12, this->f2c, this->c12,
this->f3c, this->c3); this->f3c, this->c3 + this->f3c->local_size);
fftwf_execute(this->complex2real1); fftwf_execute(this->complex2real1);
proc_print_err_message("1 field read and transformed");
fftwf_clip_zero_padding(this->f4r, this->r3); fftwf_clip_zero_padding(this->f4r, this->r3);
proc_print_err_message("clipped zero padding");
// new array where mixed components will be placed // new array where mixed components will be placed
float *rtmp = fftwf_alloc_real( 2*this->f3r->local_size); float *rtmp = fftwf_alloc_real( 2*this->f3r->local_size);
// mix components // mix components
this->f3r->interleave(this->r3, rtmp, 2); this->f3r->interleave(this->r3, rtmp, 2);
proc_print_err_message("interleaved array");
this->s->shuffle(rtmp, this->r3, ofile); this->s->shuffle(rtmp, this->r3, ofile);
proc_print_err_message("did zshuffle");
fftwf_free(rtmp); fftwf_free(rtmp);
return EXIT_SUCCESS; return EXIT_SUCCESS;
... ...
......
...@@ -148,3 +148,27 @@ int fftwf_get_descriptors_3D( ...@@ -148,3 +148,27 @@ int fftwf_get_descriptors_3D(
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
/* the following is copied from
* http://agentzlerich.blogspot.com/2010/01/using-fftw-for-in-place-matrix.html
* */
fftwf_plan plan_transpose(
int rows,
int cols,
float *in,
float *out,
const unsigned flags)
{
fftwf_iodim howmany_dims[2];
howmany_dims[0].n = rows;
howmany_dims[0].is = cols;
howmany_dims[0].os = 1;
howmany_dims[1].n = cols;
howmany_dims[1].is = 1;
howmany_dims[1].os = rows;
const int howmany_rank = sizeof(howmany_dims)/sizeof(howmany_dims[0]);
return fftw_plan_guru_r2r(/*rank*/0, /*dims*/NULL,
howmany_rank, howmany_dims,
in, out, /*kind*/NULL, flags);
}
...@@ -34,5 +34,12 @@ int fftwf_get_descriptors_3D( ...@@ -34,5 +34,12 @@ int fftwf_get_descriptors_3D(
field_descriptor **fr, field_descriptor **fr,
field_descriptor **fc); field_descriptor **fc);
fftwf_plan plan_transpose(
int rows,
int cols,
float *in,
float *out,
const unsigned flags = FFTW_ESTIMATE);
#endif//FFTWF_TOOLS #endif//FFTWF_TOOLS
...@@ -244,6 +244,21 @@ int field_descriptor::interleave( ...@@ -244,6 +244,21 @@ int field_descriptor::interleave(
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
int field_descriptor::interleave(
fftw_complex *input,
fftw_complex *output,
int dim)
{
// TODO: implement inplace interleaver
for (int k = 0; k < this->local_size; k++)
for (int j = 0; j < dim; j++)
{
output[k*dim + j][0] = input[j*this->local_size + k][0];
output[k*dim + j][1] = input[j*this->local_size + k][1];
}
return EXIT_SUCCESS;
}
field_descriptor* field_descriptor::get_transpose() field_descriptor* field_descriptor::get_transpose()
{ {
int n[this->ndims]; int n[this->ndims];
... ...
......
...@@ -59,6 +59,10 @@ class field_descriptor ...@@ -59,6 +59,10 @@ class field_descriptor
float *input, float *input,
float *output, float *output,
int dim); int dim);
int interleave(
fftw_complex *input,
fftw_complex *output,
int dim);
}; };
... ...
......
{ {
"metadata": { "metadata": {
"name": "", "name": "",
"signature": "sha256:516a254b6efbae6085846f2a801721af2ae2f8a4f2311b81b5b8f6564b5049ae" "signature": "sha256:7dead44f793111d0f8b169a9c6b140d43b9ac207648b3787a6263fd3ff30f493"
}, },
"nbformat": 3, "nbformat": 3,
"nbformat_minor": 0, "nbformat_minor": 0,
...@@ -63,8 +63,16 @@ ...@@ -63,8 +63,16 @@
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
"prompt_number": 56 {
"output_type": "stream",
"stream": "stderr",
"text": [
"-c:15: RuntimeWarning: divide by zero encountered in divide\n"
]
}
],
"prompt_number": 2
}, },
{ {
"cell_type": "code", "cell_type": "code",
...@@ -151,17 +159,19 @@ ...@@ -151,17 +159,19 @@
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"prompt_number": 57 "prompt_number": 3
}, },
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"def get_cpp_data(branch = 'develop'):\n", "def get_cpp_data(branch = None):\n",
" if not (type(branch) == type(None)):\n",
" subprocess.call(['git', 'checkout', branch])\n", " subprocess.call(['git', 'checkout', branch])\n",
" subprocess.call(['rm', 'Rdata_z0000000', 'Rdata_z0000800'])\n", " subprocess.call(['rm', 'Rdata_z0000000', 'Rdata_z0000800'])\n",
" if subprocess.call(['make', 'full.elf']) == 0:\n", " if subprocess.call(['make', 'full.elf']) == 0:\n",
" subprocess.call(['mpirun.mpich',\n", " subprocess.call(['time',\n",
" 'mpirun.mpich',\n",
" '-np',\n", " '-np',\n",
" '8',\n", " '8',\n",
" './full.elf',\n", " './full.elf',\n",
...@@ -182,12 +192,15 @@ ...@@ -182,12 +192,15 @@
" print ('compilation error')\n", " print ('compilation error')\n",
" return None\n", " return None\n",
"\n", "\n",
"Rdata = get_cpp_data(branch = 'feature-arbitrary_dimensions')" "#Rdata = get_cpp_data(branch = 'develop')\n",
"# develop says 30 secs, inplace fft is 28 secs\n",
"#Rdata = get_cpp_data(branch = 'feature-inplace_fft')\n",
"Rdata = get_cpp_data()"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"prompt_number": 60 "prompt_number": 23
}, },
{ {
"cell_type": "code", "cell_type": "code",
...@@ -215,11 +228,11 @@ ...@@ -215,11 +228,11 @@
"output_type": "stream", "output_type": "stream",
"stream": "stdout", "stream": "stdout",
"text": [ "text": [
"3.8147e-06\n" "2.86102e-06\n"
] ]
} }
], ],
"prompt_number": 61 "prompt_number": 15
}, },
{ {
"cell_type": "code", "cell_type": "code",
...@@ -228,9 +241,9 @@ ...@@ -228,9 +241,9 @@
"# i0 = np.random.randint(16)\n", "# i0 = np.random.randint(16)\n",
"# i1 = np.random.randint(16)\n", "# i1 = np.random.randint(16)\n",
"# i2 = np.random.randint(16)\n", "# i2 = np.random.randint(16)\n",
"i0 = 8\n", "i0 = 6\n",
"i1 = 11\n", "i1 = 11\n",
"i2 = 2\n", "i2 = 15\n",
"z = cm.grid3D_to_zindex(np.array([i0, i1, i2]))\n", "z = cm.grid3D_to_zindex(np.array([i0, i1, i2]))\n",
"mlab.contour3d(Rdata_py[z, :, :, :, 0],\n", "mlab.contour3d(Rdata_py[z, :, :, :, 0],\n",
" color = (0, 1, 0))\n", " color = (0, 1, 0))\n",
...@@ -244,7 +257,7 @@ ...@@ -244,7 +257,7 @@
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"prompt_number": 35 "prompt_number": 16
}, },
{ {
"cell_type": "code", "cell_type": "code",
... ...
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment