Skip to content
Snippets Groups Projects
fftw_tools.cpp 5.65 KiB
/***********************************************************************
*
*  Copyright 2015 Max Planck Institute for Dynamics and SelfOrganization
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Contact: Cristian.Lalescu@ds.mpg.de
*
************************************************************************/

#include <stdlib.h>
#include <algorithm>
#include <iostream>
#include "base.hpp"
#include "fftw_tools.hpp"

//#ifdef NDEBUG
//#undef NDEBUG
//#endif//NDEBUG

template <class rnumber>
int clip_zero_padding(
        field_descriptor<rnumber> *f,
        rnumber *a,
        int howmany)
{
    if (f->ndims < 3)
        return EXIT_FAILURE;
    rnumber *b = a;
    ptrdiff_t copy_size = f->sizes[2] * howmany;
    ptrdiff_t skip_size = copy_size + 2*howmany;
    for (int i0 = 0; i0 < f->subsizes[0]; i0++)
        for (int i1 = 0; i1 < f->sizes[1]; i1++)
        {
            std::copy(a, a + copy_size, b);
            a += skip_size;
            b += copy_size;
        }
    return EXIT_SUCCESS;
}



#define TOOLS_IMPLEMENTATION(FFTW, R, MPI_RNUM, MPI_CNUM) \
template <> \
int copy_complex_array<R>( \
        field_descriptor<R> *fi, \
        R (*ai)[2], \
        field_descriptor<R> *fo, \
        R (*ao)[2], \
        int howmany) \
{ \
    DEBUG_MSG("entered copy_complex_array\n"); \
    FFTW(complex) *buffer; \
    buffer = FFTW(alloc_complex)(fi->slice_size*howmany); \
 \
    int min_fast_dim; \
    min_fast_dim = \
        (fi->sizes[2] > fo->sizes[2]) ? \
         fo->sizes[2] : fi->sizes[2]; \
 \
    /* clean up destination, in case we're padding with zeros
       (even if only for one dimension) */ \
    std::fill_n((R*)ao, fo->local_size*2, 0.0); \
 \
    int64_t ii0, ii1; \
    int64_t oi0, oi1; \
    int64_t delta1, delta0; \
    int irank, orank; \
    delta0 = (fo->sizes[0] - fi->sizes[0]); \
    delta1 = (fo->sizes[1] - fi->sizes[1]); \
    for (ii0=0; ii0 < fi->sizes[0]; ii0++) \
    { \
        if (ii0 <= fi->sizes[0]/2) \
        { \
            oi0 = ii0; \
            if (oi0 > fo->sizes[0]/2) \
                continue; \
        } \
        else \
        { \
            oi0 = ii0 + delta0; \
            if ((oi0 < 0) || ((fo->sizes[0] - oi0) >= fo->sizes[0]/2)) \
                continue; \
        } \
        irank = fi->rank[ii0]; \
        orank = fo->rank[oi0]; \
        if ((irank == orank) && \
            (irank == fi->myrank)) \
        { \
            std::copy( \
                    (R*)(ai + (ii0 - fi->starts[0]    )*fi->slice_size), \
                    (R*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size), \
                    (R*)buffer); \
        } \
        else \
        { \
            if (fi->myrank == irank) \
            { \
                MPI_Send( \
                        (void*)(ai + (ii0-fi->starts[0])*fi->slice_size), \
                        fi->slice_size, \
                        MPI_CNUM, \
                        orank, \
                        ii0, \
                        fi->comm); \
            } \
            if (fi->myrank == orank) \
            { \
                MPI_Recv( \
                        (void*)(buffer), \
                        fi->slice_size, \
                        MPI_CNUM, \
                        irank, \
                        ii0, \
                        fi->comm, \
                        MPI_STATUS_IGNORE); \
            } \
        } \
        if (fi->myrank == orank) \
        { \
            for (ii1 = 0; ii1 < fi->sizes[1]; ii1++) \
            { \
                if (ii1 <= fi->sizes[1]/2) \
                { \
                    oi1 = ii1; \
                    if (oi1 > fo->sizes[1]/2) \
                        continue; \
                } \
                else \
                { \
                    oi1 = ii1 + delta1; \
                    if ((oi1 < 0) || ((fo->sizes[1] - oi1) >= fo->sizes[1]/2)) \
                        continue; \
                } \
                std::copy( \
                        (R*)(buffer + (ii1*fi->sizes[2]*howmany)), \
                        (R*)(buffer + (ii1*fi->sizes[2] + min_fast_dim)*howmany), \
                        (R*)(ao + \
                                 ((oi0 - fo->starts[0])*fo->sizes[1] + \
                                  oi1)*fo->sizes[2]*howmany)); \
            } \
        } \
    } \
    fftw_free(buffer); \
    MPI_Barrier(fi->comm); \
 \
    DEBUG_MSG("exiting copy_complex_array\n"); \
    return EXIT_SUCCESS; \
} \
 \
template <> \
int get_descriptors_3D<R>( \
        int n0, int n1, int n2, \
        field_descriptor<R> **fr, \
        field_descriptor<R> **fc) \
{ \
    int ntmp[3]; \
    ntmp[0] = n0; \
    ntmp[1] = n1; \
    ntmp[2] = n2; \
    *fr = new field_descriptor<R>(3, ntmp, MPI_RNUM, MPI_COMM_WORLD); \
    ntmp[0] = n0; \
    ntmp[1] = n1; \
    ntmp[2] = n2/2+1; \
    *fc = new field_descriptor<R>(3, ntmp, MPI_CNUM, MPI_COMM_WORLD); \
    return EXIT_SUCCESS; \
} \
 \
template \
int clip_zero_padding<R>( \
        field_descriptor<R> *f, \
        R *a, \
        int howmany); \



TOOLS_IMPLEMENTATION(
        FFTW_MANGLE_FLOAT,
        float,
        MPI_FLOAT,
        MPI_COMPLEX)
TOOLS_IMPLEMENTATION(
        FFTW_MANGLE_DOUBLE,
        double,
        MPI_DOUBLE,
        BFPS_MPICXX_DOUBLE_COMPLEX)