diff --git a/src/field_descriptor.cpp b/src/field_descriptor.cpp index 198ab98c3230d6225a52143358cf1c52e824cca4..52be817edcc7e02ec3a3942b314b824864c8a6c8 100644 --- a/src/field_descriptor.cpp +++ b/src/field_descriptor.cpp @@ -1,4 +1,5 @@ #include <stdlib.h> +#include <algorithm> #include "field_descriptor.hpp" extern int myrank, nprocs; @@ -9,13 +10,14 @@ field_descriptor::field_descriptor( MPI_Datatype element_type) { this->ndims = ndims; - this->sizes = (int*)malloc(ndims*sizeof(int)); - this->subsizes = (int*)malloc(ndims*sizeof(int)); - this->starts = (int*)malloc(ndims*sizeof(int)); + this->sizes = new int[ndims]; + this->subsizes = new int[ndims]; + this->starts = new int[ndims]; this->sizes[0] = n[0]; this->subsizes[0] = n[0]/nprocs; this->starts[0] = myrank*(n[0]/nprocs); this->mpi_dtype = element_type; + this->slice_size = 1; this->local_size = this->subsizes[0]; this->full_size = this->sizes[0]; for (int i = 1; i < this->ndims; i++) @@ -23,6 +25,7 @@ field_descriptor::field_descriptor( this->sizes[i] = n[i]; this->subsizes[i] = n[i]; this->starts[i] = 0; + this->slice_size *= this->subsizes[i]; this->local_size *= this->subsizes[i]; this->full_size *= this->sizes[i]; } @@ -39,9 +42,9 @@ field_descriptor::field_descriptor( field_descriptor::~field_descriptor() { - free((void*)this->sizes); - free((void*)this->subsizes); - free((void*)this->starts); + delete[] this->sizes; + delete[] this->subsizes; + delete[] this->starts; MPI_Type_free(&this->mpi_array_dtype); } @@ -165,12 +168,113 @@ field_descriptor* field_descriptor::get_transpose() } -int resize( +// should I use a template here? +int fftwf_copy_complex_array( field_descriptor *fi, fftwf_complex *ai, field_descriptor *fo, fftwf_complex *ao) { + if ((fi->ndims != 3) || (fo->ndims != 3)) + return -1; + fftwf_complex *buffer; + buffer = fftwf_alloc_complex(fi->slice_size); + + int min_fast_dim; + min_fast_dim = + (fi->sizes[fi->ndims - 1] > fo->sizes[fi->ndims - 1]) ? + fo->sizes[fi->ndims - 1] : fi->sizes[fi->ndims - 1]; + + // clean up destination, in case we're padding with zeros + // (even if only for one dimension) + std::fill_n((float*)ao, fo->local_size, 0.0); + + int64_t ii0, ii1; + int64_t oi0, oi1; + int64_t delta1, delta0; + delta0 = (fo->sizes[0] - fi->sizes[0]); + delta1 = (fo->sizes[1] - fi->sizes[1]); + for (ii0=0; ii0 < fi->sizes[0]; ii0++) + { + if (ii0 <= fi->sizes[0]/2) + { + oi0 = ii0; + if (oi0 > fo->sizes[0]/2) + continue; + } + else + { + oi0 = ii0 + delta0; + if ((oi0 < 0) || ((fo->sizes[0] - oi0) >= fo->sizes[0]/2)) + continue; + } + if ((fi->rank(ii0) == fo->rank(oi0)) && + (myrank == fi->rank(ii0))) + { + std::copy( + ai + (ii0 - fi->starts[0] )*fi->slice_size, + ai + (ii0 - fi->starts[0] + 1)*fi->slice_size, + buffer); + } + else + { + if (myrank == fi->rank(ii0)) + { +// printf( +// "send %d ii0 = %ld oi0 = %ld fo->rank(oi0) = %d\n", +// myrank, +// ii0, +// oi0, +// fo->rank(oi0)); + MPI_Send( + (void*)(ai + (ii0-fi->starts[0])*fi->slice_size), + fi->slice_size, + MPI_COMPLEX8, + fo->rank(oi0), + ii0, + MPI_COMM_WORLD); + } + if (myrank == fo->rank(oi0)) + { +// printf("receive %d ii0 = %ld oi0 = %ld\n", myrank, ii0, oi0); + MPI_Recv( + (void*)(buffer), + fi->slice_size, + MPI_COMPLEX8, + fi->rank(ii0), + ii0, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + if (myrank == fo->rank(oi0)) + { +// printf("%d ii0 = %ld\n", myrank, ii0); + for (ii1 = 0; ii1 < fi->sizes[1]; ii1++) + { + if (ii1 <= fi->sizes[1]/2) + { + oi1 = ii1; + if (oi1 > fo->sizes[1]/2) + continue; + } + else + { + oi1 = ii1 + delta1; + if ((oi1 < 0) || ((fo->sizes[1] - oi1) >= fo->sizes[1]/2)) + continue; + } + std::copy( + (buffer + ii1*fi->sizes[2]), + (buffer + ii1*fi->sizes[2] + min_fast_dim), + (ao + ((oi0 - fo->starts[0])*fo->sizes[1] + oi1)*fo->sizes[2])); + } +// printf("%d ii0 = %ld, after copy\n", myrank, ii0); + } + } + fftw_free(buffer); + MPI_Barrier(MPI_COMM_WORLD); + return EXIT_SUCCESS; } diff --git a/src/field_descriptor.hpp b/src/field_descriptor.hpp index 1627669f91044151bc5d9afd869900a491b530bf..ebd89947a138d3fd92cacf590108d8f3a4eb497e 100644 --- a/src/field_descriptor.hpp +++ b/src/field_descriptor.hpp @@ -14,7 +14,7 @@ class field_descriptor int *subsizes; int *starts; int ndims; - int local_size, full_size; + int slice_size, local_size, full_size; MPI_Datatype mpi_array_dtype, mpi_dtype; @@ -47,6 +47,11 @@ class field_descriptor int transpose( float *input, float *output); + + inline int rank(int i0) + { + return i0 / this->subsizes[0]; + } }; @@ -54,7 +59,7 @@ class field_descriptor * Fourier space: either chop off high modes, or pad with zeros. * the arrays are assumed to use fftw layout. * */ -int resize( +int fftwf_copy_complex_array( field_descriptor *fi, fftwf_complex *ai, field_descriptor *fo,