Skip to content
Snippets Groups Projects
fftw_tools.cpp 6.44 KiB
/**********************************************************************
*                                                                     *
*  Copyright 2015 Max Planck Institute                                *
*                 for Dynamics and Self-Organization                  *
*                                                                     *
*  This file is part of bfps.                                         *
*                                                                     *
*  bfps is free software: you can redistribute it and/or modify       *
*  it under the terms of the GNU General Public License as published  *
*  by the Free Software Foundation, either version 3 of the License,  *
*  or (at your option) any later version.                             *
*                                                                     *
*  bfps is distributed in the hope that it will be useful,            *
*  but WITHOUT ANY WARRANTY; without even the implied warranty of     *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      *
*  GNU General Public License for more details.                       *
*                                                                     *
*  You should have received a copy of the GNU General Public License  *
*  along with bfps.  If not, see <http://www.gnu.org/licenses/>       *
*                                                                     *
* Contact: Cristian.Lalescu@ds.mpg.de                                 *
*                                                                     *
**********************************************************************/

#include <stdlib.h>
#include <algorithm>
#include <iostream>
#include "base.hpp"
#include "fftw_tools.hpp"

#define NDEBUG

template <class rnumber>
int clip_zero_padding(
        field_descriptor<rnumber> *f,
        rnumber *a,
        int howmany)
{
    if (f->ndims < 3)
        return EXIT_FAILURE;
    rnumber *b = a;
    ptrdiff_t copy_size = f->sizes[2] * howmany;
    ptrdiff_t skip_size = copy_size + 2*howmany;
    for (int i0 = 0; i0 < f->subsizes[0]; i0++)
        for (int i1 = 0; i1 < f->sizes[1]; i1++)
        {
            std::copy(a, a + copy_size, b);
            a += skip_size;
            b += copy_size;
        }
    return EXIT_SUCCESS;
}



#define TOOLS_IMPLEMENTATION(FFTW, R, MPI_RNUM, MPI_CNUM) \
template <> \
int copy_complex_array<R>( \
        field_descriptor<R> *fi, \
        R (*ai)[2], \
        field_descriptor<R> *fo, \
        R (*ao)[2], \
        int howmany) \
{ \
    DEBUG_MSG("entered copy_complex_array\n"); \
    FFTW(complex) *buffer; \
    buffer = FFTW(alloc_complex)(fi->slice_size*howmany); \
 \
    int min_fast_dim; \
    min_fast_dim = \
        (fi->sizes[2] > fo->sizes[2]) ? \
         fo->sizes[2] : fi->sizes[2]; \
 \
    /* clean up destination, in case we're padding with zeros
       (even if only for one dimension) */ \
    std::fill_n((R*)ao, fo->local_size*2, 0.0); \
 \
    int64_t ii0, ii1; \
    int64_t oi0, oi1; \
    int64_t delta1, delta0; \
    int irank, orank; \
    delta0 = (fo->sizes[0] - fi->sizes[0]); \
    delta1 = (fo->sizes[1] - fi->sizes[1]); \
    for (ii0=0; ii0 < fi->sizes[0]; ii0++) \
    { \
        if (ii0 <= fi->sizes[0]/2) \
        { \
            oi0 = ii0; \
            if (oi0 > fo->sizes[0]/2) \
                continue; \
        } \
        else \
        { \
            oi0 = ii0 + delta0; \
            if ((oi0 < 0) || ((fo->sizes[0] - oi0) >= fo->sizes[0]/2)) \
                continue; \
        } \
        irank = fi->rank[ii0]; \
        orank = fo->rank[oi0]; \
        if ((irank == orank) && \
            (irank == fi->myrank)) \
        { \
            std::copy( \
                    (R*)(ai + (ii0 - fi->starts[0]    )*fi->slice_size), \
                    (R*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size), \
                    (R*)buffer); \
        } \
        else \
        { \
            if (fi->myrank == irank) \
            { \
                MPI_Send( \
                        (void*)(ai + (ii0-fi->starts[0])*fi->slice_size), \
                        fi->slice_size, \
                        MPI_CNUM, \
                        orank, \
                        ii0, \
                        fi->comm); \
            } \
            if (fi->myrank == orank) \
            { \
                MPI_Recv( \
                        (void*)(buffer), \
                        fi->slice_size, \
                        MPI_CNUM, \
                        irank, \
                        ii0, \
                        fi->comm, \
                        MPI_STATUS_IGNORE); \
            } \
        } \
        if (fi->myrank == orank) \
        { \
            for (ii1 = 0; ii1 < fi->sizes[1]; ii1++) \
            { \
                if (ii1 <= fi->sizes[1]/2) \
                { \
                    oi1 = ii1; \
                    if (oi1 > fo->sizes[1]/2) \
                        continue; \
                } \
                else \
                { \
                    oi1 = ii1 + delta1; \
                    if ((oi1 < 0) || ((fo->sizes[1] - oi1) >= fo->sizes[1]/2)) \
                        continue; \
                } \
                std::copy( \
                        (R*)(buffer + (ii1*fi->sizes[2]*howmany)), \
                        (R*)(buffer + (ii1*fi->sizes[2] + min_fast_dim)*howmany), \
                        (R*)(ao + \
                                 ((oi0 - fo->starts[0])*fo->sizes[1] + \
                                  oi1)*fo->sizes[2]*howmany)); \
            } \
        } \
    } \
    fftw_free(buffer); \
    MPI_Barrier(fi->comm); \
 \
    DEBUG_MSG("exiting copy_complex_array\n"); \
    return EXIT_SUCCESS; \
} \
 \
template <> \
int get_descriptors_3D<R>( \
        int n0, int n1, int n2, \
        field_descriptor<R> **fr, \
        field_descriptor<R> **fc) \
{ \
    int ntmp[3]; \
    ntmp[0] = n0; \
    ntmp[1] = n1; \
    ntmp[2] = n2; \
    *fr = new field_descriptor<R>(3, ntmp, MPI_RNUM, MPI_COMM_WORLD); \
    ntmp[0] = n0; \
    ntmp[1] = n1; \
    ntmp[2] = n2/2+1; \
    *fc = new field_descriptor<R>(3, ntmp, MPI_CNUM, MPI_COMM_WORLD); \
    return EXIT_SUCCESS; \
} \
 \
template \
int clip_zero_padding<R>( \
        field_descriptor<R> *f, \
        R *a, \
        int howmany); \



TOOLS_IMPLEMENTATION(
        FFTW_MANGLE_FLOAT,
        float,
        MPI_FLOAT,
        MPI_COMPLEX)
TOOLS_IMPLEMENTATION(
        FFTW_MANGLE_DOUBLE,
        double,
        MPI_DOUBLE,
        BFPS_MPICXX_DOUBLE_COMPLEX)