/********************************************************************** * * * Copyright 2015 Max Planck Institute * * for Dynamics and Self-Organization * * * * This file is part of bfps. * * * * bfps is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published * * by the Free Software Foundation, either version 3 of the License, * * or (at your option) any later version. * * * * bfps is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with bfps. If not, see <http://www.gnu.org/licenses/> * * * * Contact: Cristian.Lalescu@ds.mpg.de * * * **********************************************************************/ #include <stdlib.h> #include <algorithm> #include <iostream> #include "base.hpp" #include "fftw_tools.hpp" #define NDEBUG template <class rnumber> int clip_zero_padding( field_descriptor<rnumber> *f, rnumber *a, int howmany) { if (f->ndims < 3) return EXIT_FAILURE; rnumber *b = a; ptrdiff_t copy_size = f->sizes[2] * howmany; ptrdiff_t skip_size = copy_size + 2*howmany; for (int i0 = 0; i0 < f->subsizes[0]; i0++) for (int i1 = 0; i1 < f->sizes[1]; i1++) { std::copy(a, a + copy_size, b); a += skip_size; b += copy_size; } return EXIT_SUCCESS; } #define TOOLS_IMPLEMENTATION(FFTW, R, MPI_RNUM, MPI_CNUM) \ template <> \ int copy_complex_array<R>( \ field_descriptor<R> *fi, \ R (*ai)[2], \ field_descriptor<R> *fo, \ R (*ao)[2], \ int howmany) \ { \ DEBUG_MSG("entered copy_complex_array\n"); \ FFTW(complex) *buffer; \ buffer = FFTW(alloc_complex)(fi->slice_size*howmany); \ \ int min_fast_dim; \ min_fast_dim = \ (fi->sizes[2] > fo->sizes[2]) ? \ fo->sizes[2] : fi->sizes[2]; \ \ /* clean up destination, in case we're padding with zeros (even if only for one dimension) */ \ std::fill_n((R*)ao, fo->local_size*2, 0.0); \ \ int64_t ii0, ii1; \ int64_t oi0, oi1; \ int64_t delta1, delta0; \ int irank, orank; \ delta0 = (fo->sizes[0] - fi->sizes[0]); \ delta1 = (fo->sizes[1] - fi->sizes[1]); \ for (ii0=0; ii0 < fi->sizes[0]; ii0++) \ { \ if (ii0 <= fi->sizes[0]/2) \ { \ oi0 = ii0; \ if (oi0 > fo->sizes[0]/2) \ continue; \ } \ else \ { \ oi0 = ii0 + delta0; \ if ((oi0 < 0) || ((fo->sizes[0] - oi0) >= fo->sizes[0]/2)) \ continue; \ } \ irank = fi->rank[ii0]; \ orank = fo->rank[oi0]; \ if ((irank == orank) && \ (irank == fi->myrank)) \ { \ std::copy( \ (R*)(ai + (ii0 - fi->starts[0] )*fi->slice_size), \ (R*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size), \ (R*)buffer); \ } \ else \ { \ if (fi->myrank == irank) \ { \ MPI_Send( \ (void*)(ai + (ii0-fi->starts[0])*fi->slice_size), \ fi->slice_size, \ MPI_CNUM, \ orank, \ ii0, \ fi->comm); \ } \ if (fi->myrank == orank) \ { \ MPI_Recv( \ (void*)(buffer), \ fi->slice_size, \ MPI_CNUM, \ irank, \ ii0, \ fi->comm, \ MPI_STATUS_IGNORE); \ } \ } \ if (fi->myrank == orank) \ { \ for (ii1 = 0; ii1 < fi->sizes[1]; ii1++) \ { \ if (ii1 <= fi->sizes[1]/2) \ { \ oi1 = ii1; \ if (oi1 > fo->sizes[1]/2) \ continue; \ } \ else \ { \ oi1 = ii1 + delta1; \ if ((oi1 < 0) || ((fo->sizes[1] - oi1) >= fo->sizes[1]/2)) \ continue; \ } \ std::copy( \ (R*)(buffer + (ii1*fi->sizes[2]*howmany)), \ (R*)(buffer + (ii1*fi->sizes[2] + min_fast_dim)*howmany), \ (R*)(ao + \ ((oi0 - fo->starts[0])*fo->sizes[1] + \ oi1)*fo->sizes[2]*howmany)); \ } \ } \ } \ fftw_free(buffer); \ MPI_Barrier(fi->comm); \ \ DEBUG_MSG("exiting copy_complex_array\n"); \ return EXIT_SUCCESS; \ } \ \ template <> \ int get_descriptors_3D<R>( \ int n0, int n1, int n2, \ field_descriptor<R> **fr, \ field_descriptor<R> **fc) \ { \ int ntmp[3]; \ ntmp[0] = n0; \ ntmp[1] = n1; \ ntmp[2] = n2; \ *fr = new field_descriptor<R>(3, ntmp, MPI_RNUM, MPI_COMM_WORLD); \ ntmp[0] = n0; \ ntmp[1] = n1; \ ntmp[2] = n2/2+1; \ *fc = new field_descriptor<R>(3, ntmp, MPI_CNUM, MPI_COMM_WORLD); \ return EXIT_SUCCESS; \ } \ \ template \ int clip_zero_padding<R>( \ field_descriptor<R> *f, \ R *a, \ int howmany); \ TOOLS_IMPLEMENTATION( FFTW_MANGLE_FLOAT, float, MPI_FLOAT, MPI_COMPLEX) TOOLS_IMPLEMENTATION( FFTW_MANGLE_DOUBLE, double, MPI_DOUBLE, BFPS_MPICXX_DOUBLE_COMPLEX)