-
Chichi Lalescu authoredChichi Lalescu authored
fftw_tools.cpp 6.44 KiB
/**********************************************************************
* *
* Copyright 2015 Max Planck Institute *
* for Dynamics and Self-Organization *
* *
* This file is part of bfps. *
* *
* bfps is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published *
* by the Free Software Foundation, either version 3 of the License, *
* or (at your option) any later version. *
* *
* bfps is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with bfps. If not, see <http://www.gnu.org/licenses/> *
* *
* Contact: Cristian.Lalescu@ds.mpg.de *
* *
**********************************************************************/
#include <stdlib.h>
#include <algorithm>
#include <iostream>
#include "base.hpp"
#include "fftw_tools.hpp"
#define NDEBUG
template <class rnumber>
int clip_zero_padding(
field_descriptor<rnumber> *f,
rnumber *a,
int howmany)
{
if (f->ndims < 3)
return EXIT_FAILURE;
rnumber *b = a;
ptrdiff_t copy_size = f->sizes[2] * howmany;
ptrdiff_t skip_size = copy_size + 2*howmany;
for (int i0 = 0; i0 < f->subsizes[0]; i0++)
for (int i1 = 0; i1 < f->sizes[1]; i1++)
{
std::copy(a, a + copy_size, b);
a += skip_size;
b += copy_size;
}
return EXIT_SUCCESS;
}
#define TOOLS_IMPLEMENTATION(FFTW, R, MPI_RNUM, MPI_CNUM) \
template <> \
int copy_complex_array<R>( \
field_descriptor<R> *fi, \
R (*ai)[2], \
field_descriptor<R> *fo, \
R (*ao)[2], \
int howmany) \
{ \
DEBUG_MSG("entered copy_complex_array\n"); \
FFTW(complex) *buffer; \
buffer = FFTW(alloc_complex)(fi->slice_size*howmany); \
\
int min_fast_dim; \
min_fast_dim = \
(fi->sizes[2] > fo->sizes[2]) ? \
fo->sizes[2] : fi->sizes[2]; \
\
/* clean up destination, in case we're padding with zeros
(even if only for one dimension) */ \
std::fill_n((R*)ao, fo->local_size*2, 0.0); \
\
int64_t ii0, ii1; \
int64_t oi0, oi1; \
int64_t delta1, delta0; \
int irank, orank; \
delta0 = (fo->sizes[0] - fi->sizes[0]); \
delta1 = (fo->sizes[1] - fi->sizes[1]); \
for (ii0=0; ii0 < fi->sizes[0]; ii0++) \
{ \
if (ii0 <= fi->sizes[0]/2) \
{ \
oi0 = ii0; \
if (oi0 > fo->sizes[0]/2) \
continue; \
} \
else \
{ \
oi0 = ii0 + delta0; \
if ((oi0 < 0) || ((fo->sizes[0] - oi0) >= fo->sizes[0]/2)) \
continue; \
} \
irank = fi->rank[ii0]; \
orank = fo->rank[oi0]; \
if ((irank == orank) && \
(irank == fi->myrank)) \
{ \
std::copy( \
(R*)(ai + (ii0 - fi->starts[0] )*fi->slice_size), \
(R*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size), \
(R*)buffer); \
} \
else \
{ \
if (fi->myrank == irank) \
{ \
MPI_Send( \
(void*)(ai + (ii0-fi->starts[0])*fi->slice_size), \
fi->slice_size, \
MPI_CNUM, \
orank, \
ii0, \
fi->comm); \
} \
if (fi->myrank == orank) \
{ \
MPI_Recv( \
(void*)(buffer), \
fi->slice_size, \
MPI_CNUM, \
irank, \
ii0, \
fi->comm, \
MPI_STATUS_IGNORE); \
} \
} \
if (fi->myrank == orank) \
{ \
for (ii1 = 0; ii1 < fi->sizes[1]; ii1++) \
{ \
if (ii1 <= fi->sizes[1]/2) \
{ \
oi1 = ii1; \
if (oi1 > fo->sizes[1]/2) \
continue; \
} \
else \
{ \
oi1 = ii1 + delta1; \
if ((oi1 < 0) || ((fo->sizes[1] - oi1) >= fo->sizes[1]/2)) \
continue; \
} \
std::copy( \
(R*)(buffer + (ii1*fi->sizes[2]*howmany)), \
(R*)(buffer + (ii1*fi->sizes[2] + min_fast_dim)*howmany), \
(R*)(ao + \
((oi0 - fo->starts[0])*fo->sizes[1] + \
oi1)*fo->sizes[2]*howmany)); \
} \
} \
} \
fftw_free(buffer); \
MPI_Barrier(fi->comm); \
\
DEBUG_MSG("exiting copy_complex_array\n"); \
return EXIT_SUCCESS; \
} \
\
template <> \
int get_descriptors_3D<R>( \
int n0, int n1, int n2, \
field_descriptor<R> **fr, \
field_descriptor<R> **fc) \
{ \
int ntmp[3]; \
ntmp[0] = n0; \
ntmp[1] = n1; \
ntmp[2] = n2; \
*fr = new field_descriptor<R>(3, ntmp, MPI_RNUM, MPI_COMM_WORLD); \
ntmp[0] = n0; \
ntmp[1] = n1; \
ntmp[2] = n2/2+1; \
*fc = new field_descriptor<R>(3, ntmp, MPI_CNUM, MPI_COMM_WORLD); \
return EXIT_SUCCESS; \
} \
\
template \
int clip_zero_padding<R>( \
field_descriptor<R> *f, \
R *a, \
int howmany); \
TOOLS_IMPLEMENTATION(
FFTW_MANGLE_FLOAT,
float,
MPI_FLOAT,
MPI_COMPLEX)
TOOLS_IMPLEMENTATION(
FFTW_MANGLE_DOUBLE,
double,
MPI_DOUBLE,
BFPS_MPICXX_DOUBLE_COMPLEX)