Commit 1032993d authored by Cristian Lalescu's avatar Cristian Lalescu
Browse files

make fftw_tools template functions

parent 5690a465
......@@ -58,7 +58,7 @@ endif
base_files := \
field_descriptor \
fftwf_tools \
fftw_tools \
Morton_shuffler \
p3DFFT_to_iR \
fluid_solver
......
......@@ -22,7 +22,7 @@
#include <stdlib.h>
#include <iostream>
#include "field_descriptor.hpp"
#include "fftwf_tools.hpp"
#include "fftw_tools.hpp"
#ifndef MORTON_SHUFFLER
......
......@@ -21,16 +21,16 @@
#include <stdlib.h>
#include <algorithm>
#include <iostream>
#include "fftwf_tools.hpp"
#include "fftw_tools.hpp"
// should I use a template here?
int fftwf_copy_complex_array(
field_descriptor<float> *fi,
fftwf_complex *ai,
field_descriptor<float> *fo,
fftwf_complex *ao)
template <class rnumber>
int copy_complex_array(
field_descriptor<rnumber> *fi,
rnumber (*ai)[2],
field_descriptor<rnumber> *fo,
rnumber (*ao)[2])
{
if (((fi->ndims != 3) ||
(fo->ndims != 3)) ||
......@@ -46,7 +46,7 @@ int fftwf_copy_complex_array(
// clean up destination, in case we're padding with zeros
// (even if only for one dimension)
std::fill_n((float*)ao, fo->local_size, 0.0);
std::fill_n((rnumber*)ao, fo->local_size, 0.0);
int64_t ii0, ii1;
int64_t oi0, oi1;
......@@ -74,9 +74,9 @@ int fftwf_copy_complex_array(
(irank == fi->myrank))
{
std::copy(
(float*)(ai + (ii0 - fi->starts[0] )*fi->slice_size),
(float*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size),
(float*)buffer);
(rnumber*)(ai + (ii0 - fi->starts[0] )*fi->slice_size),
(rnumber*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size),
(rnumber*)buffer);
}
else
{
......@@ -119,9 +119,9 @@ int fftwf_copy_complex_array(
continue;
}
std::copy(
(float*)(buffer + ii1*fi->sizes[2]),
(float*)(buffer + ii1*fi->sizes[2] + min_fast_dim),
(float*)(ao +
(rnumber*)(buffer + ii1*fi->sizes[2]),
(rnumber*)(buffer + ii1*fi->sizes[2] + min_fast_dim),
(rnumber*)(ao +
((oi0 - fo->starts[0])*fo->sizes[1] +
oi1)*fo->sizes[2]));
}
......@@ -133,14 +133,17 @@ int fftwf_copy_complex_array(
return EXIT_SUCCESS;
}
int fftwf_clip_zero_padding(
field_descriptor<float> *f,
float *a,
template <class rnumber>
int clip_zero_padding(
field_descriptor<rnumber> *f,
rnumber *a,
int howmany)
{
if (f->ndims != 3)
return EXIT_FAILURE;
float *b = a;
rnumber *b = a;
ptrdiff_t copy_size = f->sizes[2] * howmany;
ptrdiff_t skip_size = copy_size + 2*howmany;
for (int i0 = 0; i0 < f->subsizes[0]; i0++)
......@@ -153,20 +156,46 @@ int fftwf_clip_zero_padding(
return EXIT_SUCCESS;
}
int fftwf_get_descriptors_3D(
template <class rnumber>
int get_descriptors_3D(
int n0, int n1, int n2,
field_descriptor<float> **fr,
field_descriptor<float> **fc)
field_descriptor<rnumber> **fr,
field_descriptor<rnumber> **fc)
{
int ntmp[3];
ntmp[0] = n0;
ntmp[1] = n1;
ntmp[2] = n2;
*fr = new field_descriptor<float>(3, ntmp, MPI_FLOAT, MPI_COMM_WORLD);
*fr = new field_descriptor<rnumber>(3, ntmp, MPI_FLOAT, MPI_COMM_WORLD);
ntmp[0] = n0;
ntmp[1] = n1;
ntmp[2] = n2/2+1;
*fc = new field_descriptor<float>(3, ntmp, MPI_COMPLEX8, MPI_COMM_WORLD);
*fc = new field_descriptor<rnumber>(3, ntmp, MPI_COMPLEX8, MPI_COMM_WORLD);
return EXIT_SUCCESS;
}
#define FORCE_IMPLEMENTATION(rnumber) \
template \
int copy_complex_array<rnumber>( \
field_descriptor<rnumber> *fi, \
rnumber (*ai)[2], \
field_descriptor<rnumber> *fo, \
rnumber (*ao)[2]); \
\
template \
int clip_zero_padding<rnumber>( \
field_descriptor<rnumber> *f, \
rnumber *a, \
int howmany); \
\
template \
int get_descriptors_3D<rnumber>( \
int n0, int n1, int n2, \
field_descriptor<rnumber> **fr, \
field_descriptor<rnumber> **fc);
FORCE_IMPLEMENTATION(float)
FORCE_IMPLEMENTATION(double)
......@@ -22,9 +22,9 @@
#include <fftw3-mpi.h>
#include "field_descriptor.hpp"
#ifndef FFTWF_TOOLS
#ifndef FFTW_TOOLS
#define FFTWF_TOOLS
#define FFTW_TOOLS
extern int myrank, nprocs;
......@@ -32,15 +32,17 @@ extern int myrank, nprocs;
* Fourier space: either chop off high modes, or pad with zeros.
* the arrays are assumed to use 3D mpi fftw layout.
* */
int fftwf_copy_complex_array(
field_descriptor<float> *fi,
fftwf_complex *ai,
field_descriptor<float> *fo,
fftwf_complex *ao);
int fftwf_clip_zero_padding(
field_descriptor<float> *f,
float *a,
template <class rnumber>
int copy_complex_array(
field_descriptor<rnumber> *fi,
rnumber (*ai)[2],
field_descriptor<rnumber> *fo,
rnumber (*ao)[2]);
template <class rnumber>
int clip_zero_padding(
field_descriptor<rnumber> *f,
rnumber *a,
int howmany=1);
/* function to get pair of descriptors for real and Fourier space
......@@ -51,10 +53,11 @@ int fftwf_clip_zero_padding(
* 2*fc->local_size, and then the zeros cleaned up before trying
* to write data.
* */
int fftwf_get_descriptors_3D(
template <class rnumber>
int get_descriptors_3D(
int n0, int n1, int n2,
field_descriptor<float> **fr,
field_descriptor<float> **fc);
field_descriptor<rnumber> **fr,
field_descriptor<rnumber> **fc);
#endif//FFTWF_TOOLS
#endif//FFTW_TOOLS
......@@ -86,9 +86,9 @@ field_descriptor<rnumber>::field_descriptor(
tsubsizes[i] = this->subsizes[i];
tstarts[i] = this->starts[i];
}
tsizes[ndims-1] *= sizeof(float);
tsubsizes[ndims-1] *= sizeof(float);
tstarts[ndims-1] *= sizeof(float);
tsizes[ndims-1] *= sizeof(rnumber);
tsubsizes[ndims-1] *= sizeof(rnumber);
tstarts[ndims-1] *= sizeof(rnumber);
if (this->mpi_dtype == MPI_COMPLEX8)
{
tsizes[ndims-1] *= 2;
......@@ -482,4 +482,5 @@ field_descriptor<float>* field_descriptor<float>::get_transpose()
}
template class field_descriptor<float>;
template class field_descriptor<double>;
......@@ -21,7 +21,7 @@
#include "fluid_solver.hpp"
#include "fftwf_tools.hpp"
#include "fftw_tools.hpp"
......@@ -36,7 +36,7 @@ fluid_solver<R>::fluid_solver( \
int ny, \
int nz) \
{ \
FFTW(get_descriptors_3D)(nz, ny, nx, &this->fr, &this->fc);\
get_descriptors_3D<R>(nz, ny, nx, &this->fr, &this->fc);\
this->cvorticity = FFTW(alloc_complex)(this->fc->local_size*3);\
this->cvelocity = FFTW(alloc_complex)(this->fc->local_size*3);\
this->rvorticity = FFTW(alloc_real)(this->fc->local_size*6);\
......
......@@ -18,6 +18,7 @@
*
************************************************************************/
#include "base.hpp"
#include "fluid_solver.hpp"
#include <iostream>
......@@ -29,10 +30,16 @@ int main(int argc, char *argv[])
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
fluid_solver<float> fs;
fluid_solver<float> *fs;
fs = new fluid_solver<float>(32, 32, 32);
DEBUG_MSG("fluid_solver object created\n");
delete fs;
DEBUG_MSG("fluid_solver object deleted\n");
// clean up
fftwf_mpi_cleanup();
fftw_mpi_cleanup();
MPI_Finalize();
return EXIT_SUCCESS;
}
......
......@@ -69,7 +69,7 @@ p3DFFT_to_iR::p3DFFT_to_iR(
// following 3 arguments are dimensions for real space grid dimensions
// f3r and f3c will be allocated in this call
fftwf_get_descriptors_3D(
get_descriptors_3D<float>(
N0, N1, N2,
&this->f3r, &this->f3c);
......@@ -141,7 +141,7 @@ int p3DFFT_to_iR::read(
this->f1c->transpose(this->c12);
DEBUG_MSG("p3DFFT_to_iR::read "
"fftwf_copy_complex_array(\n");
fftwf_copy_complex_array(
copy_complex_array<float>(
this->f2c, this->c12,
this->f3c, this->c3 + i*this->f3c->local_size);
}
......@@ -156,7 +156,7 @@ int p3DFFT_to_iR::read(
DEBUG_MSG("p3DFFT_to_iR::read "
"fftwf_clip_zero_padding(this->f3r, this->r3, 2);\n");
fftwf_clip_zero_padding(this->f3r, this->r3, this->howmany);
clip_zero_padding<float>(this->f3r, this->r3, this->howmany);
DEBUG_MSG("p3DFFT_to_iR::read return\n");
return EXIT_SUCCESS;
}
......
......@@ -22,7 +22,7 @@
#include <stdlib.h>
#include <iostream>
#include "field_descriptor.hpp"
#include "fftwf_tools.hpp"
#include "fftw_tools.hpp"
#ifndef P3DFFT_TO_IR
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment