diff --git a/src/base.hpp b/src/base.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d4a63a187072f4d6e425e5fba137691edf874b12 --- /dev/null +++ b/src/base.hpp @@ -0,0 +1,43 @@ +#include <mpi.h> +#include <stdarg.h> +#include <iostream> + +#ifndef BASE + +#define BASE + +static const int message_buffer_length = 1024; +static char debug_message_buffer[message_buffer_length]; +extern int myrank, nprocs; + +#ifndef NDEBUG + +inline void DEBUG_MSG(const char * format, ...) +{ + va_list argptr; + va_start(argptr, format); + sprintf( + debug_message_buffer, + "cpu%.4d ", + myrank); + vsnprintf( + debug_message_buffer + 8, + message_buffer_length - 8, + format, + argptr); + va_end(argptr); + std::cerr << debug_message_buffer; +} + +#define CHECK_POINT() DEBUG_MSG("%s %s", __FILE__, __LINE__) + +#else + + #define DEBUG_MSG(x) + +#define CHECK_POINT() + +#endif//NDEBUG + +#endif//BASE + diff --git a/src/field_descriptor.cpp b/src/field_descriptor.cpp index 759219b9a3c7b5a8e45b29ea31bd89451b13c543..77d7bc3b18ceb2cddc8310b237d8d75224a742e0 100644 --- a/src/field_descriptor.cpp +++ b/src/field_descriptor.cpp @@ -1,6 +1,7 @@ #include <stdlib.h> #include <algorithm> #include <iostream> +#include "base.hpp" #include "field_descriptor.hpp" field_descriptor::field_descriptor( @@ -9,6 +10,8 @@ field_descriptor::field_descriptor( MPI_Datatype element_type, MPI_Comm COMM_TO_USE) { + DEBUG_MSG("entered field_descriptor::field_descriptor\n"); + //CHECK_POINT(); this->comm = COMM_TO_USE; MPI_Comm_rank(this->comm, &this->myrank); MPI_Comm_size(this->comm, &this->nprocs); @@ -42,15 +45,61 @@ field_descriptor::field_descriptor( this->slice_size *= this->subsizes[i]; this->full_size *= this->sizes[i]; } - MPI_Type_create_subarray( - ndims, - this->sizes, - this->subsizes, - this->starts, - MPI_ORDER_C, - this->mpi_dtype, - &this->mpi_array_dtype); - MPI_Type_commit(&this->mpi_array_dtype); + DEBUG_MSG( + "inside field_descriptor constructor, about to call " + "MPI_Type_create_subarray\n" + "%d %d %d\n", + this->sizes[0], + this->subsizes[0], + this->starts[0]); + int local_zero_array[this->nprocs], zero_array[this->nprocs]; + for (int i=0; i<this->nprocs; i++) + local_zero_array[i] = 0; + local_zero_array[this->myrank] = (this->subsizes[0] == 0) ? 1 : 0; + MPI_Allreduce( + local_zero_array, + zero_array, + this->nprocs, + MPI_INT, + MPI_SUM, + this->comm); + int no_of_excluded_ranks = 0; + for (int i = 0; i<this->nprocs; i++) + no_of_excluded_ranks += zero_array[i]; + if (no_of_excluded_ranks == 0) + { + this->io_comm = this->comm; + this->io_nprocs = this->nprocs; + this->io_myrank = this->myrank; + } + else + { + int excluded_rank[no_of_excluded_ranks]; + for (int i=0, j=0; i<this->nprocs; i++) + if (zero_array[i]) + { + excluded_rank[j] = i; + j++; + } + MPI_Group tgroup0, tgroup; + MPI_Comm_group(this->comm, &tgroup0); + MPI_Group_excl(tgroup0, no_of_excluded_ranks, excluded_rank, &tgroup); + MPI_Comm_create(this->comm, tgroup, &this->io_comm); + MPI_Group_free(&tgroup0); + MPI_Group_free(&tgroup); + } + if (this->subsizes[0] > 0) + { + MPI_Type_create_subarray( + ndims, + this->sizes, + this->subsizes, + this->starts, + MPI_ORDER_C, + this->mpi_dtype, + &this->mpi_array_dtype); + MPI_Type_commit(&this->mpi_array_dtype); + } this->rank = new int[this->sizes[0]]; int *local_rank = new int[this->sizes[0]]; std::fill_n(local_rank, this->sizes[0], 0); @@ -65,48 +114,62 @@ field_descriptor::field_descriptor( MPI_SUM, this->comm); delete[] local_rank; + DEBUG_MSG("exiting field_descriptor::field_descriptor\n"); } field_descriptor::~field_descriptor() { + DEBUG_MSG("entered field_descriptor::~field_descriptor\n"); delete[] this->sizes; delete[] this->subsizes; delete[] this->starts; delete[] this->rank; - MPI_Type_free(&this->mpi_array_dtype); +// if (this->subsizes[0] > 0) + { + DEBUG_MSG("deallocating mpi_array_dtype\n"); + MPI_Type_free(&this->mpi_array_dtype); + if (this->nprocs != this->io_nprocs) + { + DEBUG_MSG("freeing io_comm\n"); + MPI_Comm_free(&this->io_comm); + } + } + DEBUG_MSG("exiting field_descriptor::~field_descriptor\n"); } int field_descriptor::read( const char *fname, void *buffer) { - MPI_Info info; - MPI_Info_create(&info); - MPI_File f; - char ffname[200]; - sprintf(ffname, "%s", fname); - - MPI_File_open( - this->comm, - ffname, - MPI_MODE_RDONLY, - info, - &f); - MPI_File_set_view( - f, - 0, - this->mpi_dtype, - this->mpi_array_dtype, - "native", //this needs to be made more general - info); - MPI_File_read_all( - f, - buffer, - this->local_size, - this->mpi_dtype, - MPI_STATUS_IGNORE); - MPI_File_close(&f); + if (this->subsizes[0] > 0) + { + MPI_Info info; + MPI_Info_create(&info); + MPI_File f; + char ffname[200]; + sprintf(ffname, "%s", fname); + MPI_File_open( + this->io_comm, + ffname, + MPI_MODE_RDONLY, + info, + &f); + MPI_File_set_view( + f, + 0, + this->mpi_dtype, + this->mpi_array_dtype, + "native", //this needs to be made more general + info); + MPI_File_read_all( + f, + buffer, + this->local_size, + this->mpi_dtype, + MPI_STATUS_IGNORE); + MPI_File_close(&f); + } return EXIT_SUCCESS; } @@ -114,32 +177,35 @@ int field_descriptor::write( const char *fname, void *buffer) { - MPI_Info info; - MPI_Info_create(&info); - MPI_File f; - char ffname[200]; - sprintf(ffname, "%s", fname); + if (this->subsizes[0] > 0) + { + MPI_Info info; + MPI_Info_create(&info); + MPI_File f; + char ffname[200]; + sprintf(ffname, "%s", fname); - MPI_File_open( - this->comm, - ffname, - MPI_MODE_CREATE | MPI_MODE_WRONLY, - info, - &f); - MPI_File_set_view( - f, - 0, - this->mpi_dtype, - this->mpi_array_dtype, - "native", //this needs to be made more general - info); - MPI_File_write_all( - f, - buffer, - this->local_size, - this->mpi_dtype, - MPI_STATUS_IGNORE); - MPI_File_close(&f); + MPI_File_open( + this->comm, + ffname, + MPI_MODE_CREATE | MPI_MODE_WRONLY, + info, + &f); + MPI_File_set_view( + f, + 0, + this->mpi_dtype, + this->mpi_array_dtype, + "native", //this needs to be made more general + info); + MPI_File_write_all( + f, + buffer, + this->local_size, + this->mpi_dtype, + MPI_STATUS_IGNORE); + MPI_File_close(&f); + } return EXIT_SUCCESS; } @@ -300,14 +366,3 @@ field_descriptor* field_descriptor::get_transpose() return new field_descriptor(this->ndims, n, this->mpi_dtype, this->comm); } -void proc_print_err_message(const char *message) -{ -#ifndef NDEBUG - for (int i = 0; i < nprocs; i++) - { - if (myrank == i) - std::cerr << i << " " << message << std::endl; - MPI_Barrier(MPI_COMM_WORLD); - } -#endif -} diff --git a/src/field_descriptor.hpp b/src/field_descriptor.hpp index c2fb4c5fec4336179cf13743db536319995d2414..9960e0efc660e7cb8f90ac397ea08dc53f9ea1b7 100644 --- a/src/field_descriptor.hpp +++ b/src/field_descriptor.hpp @@ -19,8 +19,8 @@ class field_descriptor int *rank; ptrdiff_t slice_size, local_size, full_size; MPI_Datatype mpi_array_dtype, mpi_dtype; - int myrank, nprocs; - MPI_Comm comm; + int myrank, nprocs, io_myrank, io_nprocs; + MPI_Comm comm, io_comm; /* methods */ @@ -80,7 +80,5 @@ int fftwf_clip_zero_padding( field_descriptor *f, float *a); -void proc_print_err_message(const char *message); - #endif//FIELD_DESCRIPTOR diff --git a/src/p3DFFT_to_iR.cpp b/src/p3DFFT_to_iR.cpp index 03fbeb3b723e35b2bf6270c23f33876cdf33e221..72c389903155c48f7b82570465e569d8ef9268c4 100644 --- a/src/p3DFFT_to_iR.cpp +++ b/src/p3DFFT_to_iR.cpp @@ -1,17 +1,18 @@ +#include "base.hpp" #include "p3DFFT_to_iR.hpp" #include <string> #include <iostream> - p3DFFT_to_iR::p3DFFT_to_iR( int n0, int n1, int n2, int N0, int N1, int N2, - int howmany) + int howmany, + bool allocate_fields) { this->howmany = howmany; int n[7]; - proc_print_err_message("entering constructor of p3DFFT_to_iR"); + DEBUG_MSG("entering constructor of p3DFFT_to_iR\n"); // first 3 arguments are dimensions for input array // i.e. actual dimensions for the Fourier representation. @@ -43,29 +44,34 @@ p3DFFT_to_iR::p3DFFT_to_iR( N0, N1, N2, &this->f3r, &this->f3c); - //allocate fields - this->c0 = fftwf_alloc_complex(this->f0c->local_size); - this->c12 = fftwf_alloc_complex(this->f1c->local_size); - // 4 instead of 2, because we have 2 fields to write - this->r3 = fftwf_alloc_real( 2*this->f3c->local_size*this->howmany); - // all FFTs are going to be inplace - this->c3 = (fftwf_complex*)this->r3; - - // allocate plans - ptrdiff_t blabla[] = {this->f3r->sizes[0], - this->f3r->sizes[1], - this->f3r->sizes[2]}; - this->complex2real = fftwf_mpi_plan_many_dft_c2r( - 3, - blabla, - this->howmany, - FFTW_MPI_DEFAULT_BLOCK, - FFTW_MPI_DEFAULT_BLOCK, - this->c3, this->r3, - MPI_COMM_WORLD, - FFTW_ESTIMATE); - - proc_print_err_message("exiting constructor of p3DFFT_to_iR"); + this->fields_allocated = false; + if (allocate_fields) + { + //allocate fields + this->c0 = fftwf_alloc_complex(this->f0c->local_size); + this->c12 = fftwf_alloc_complex(this->f1c->local_size); + // 4 instead of 2, because we have 2 fields to write + this->r3 = fftwf_alloc_real( 2*this->f3c->local_size*this->howmany); + // all FFTs are going to be inplace + this->c3 = (fftwf_complex*)this->r3; + + // allocate plans + ptrdiff_t blabla[] = {this->f3r->sizes[0], + this->f3r->sizes[1], + this->f3r->sizes[2]}; + this->complex2real = fftwf_mpi_plan_many_dft_c2r( + 3, + blabla, + this->howmany, + FFTW_MPI_DEFAULT_BLOCK, + FFTW_MPI_DEFAULT_BLOCK, + this->c3, this->r3, + MPI_COMM_WORLD, + FFTW_ESTIMATE); + this->fields_allocated = true; + } + + DEBUG_MSG("exiting constructor of p3DFFT_to_iR\n"); } p3DFFT_to_iR::~p3DFFT_to_iR() @@ -76,11 +82,13 @@ p3DFFT_to_iR::~p3DFFT_to_iR() delete this->f3c; delete this->f3r; - fftwf_free(this->c0); - fftwf_free(this->c12); - fftwf_free(this->r3); - - fftwf_destroy_plan(this->complex2real); + if (this->fields_allocated) + { + fftwf_free(this->c0); + fftwf_free(this->c12); + fftwf_free(this->r3); + fftwf_destroy_plan(this->complex2real); + } } int p3DFFT_to_iR::read( @@ -89,34 +97,34 @@ int p3DFFT_to_iR::read( //read fields for (int i = 0; i < this->howmany; i++) { - proc_print_err_message("p3DFFT_to_iR::read " - "this->f0c->read(ifile0, (void*)this->c0);"); + DEBUG_MSG("p3DFFT_to_iR::read " + "this->f0c->read(ifile0, (void*)this->c0);\n"); this->f0c->read(ifile[i], (void*)this->c0); - proc_print_err_message("p3DFFT_to_iR::read " - "this->f0c->transpose(this->c0, this->c12);"); + DEBUG_MSG("p3DFFT_to_iR::read " + "this->f0c->transpose(this->c0, this->c12);\n"); this->f0c->transpose(this->c0, this->c12); - proc_print_err_message("p3DFFT_to_iR::read " - "this->f1c->transpose(this->c12);"); + DEBUG_MSG("p3DFFT_to_iR::read " + "this->f1c->transpose(this->c12);\n"); this->f1c->transpose(this->c12); - proc_print_err_message("p3DFFT_to_iR::read " - "fftwf_copy_complex_array("); + DEBUG_MSG("p3DFFT_to_iR::read " + "fftwf_copy_complex_array(\n"); fftwf_copy_complex_array( this->f2c, this->c12, this->f3c, this->c3 + i*this->f3c->local_size); } - proc_print_err_message("p3DFFT_to_iR::read " - "this->f3c->interleave(this->c3, 2);"); + DEBUG_MSG("p3DFFT_to_iR::read " + "this->f3c->interleave(this->c3, 2);\n"); this->f3c->interleave(this->c3, this->howmany); - proc_print_err_message("p3DFFT_to_iR::read " - "fftwf_execute(this->complex2real);"); + DEBUG_MSG("p3DFFT_to_iR::read " + "fftwf_execute(this->complex2real);\n"); fftwf_execute(this->complex2real); - proc_print_err_message("p3DFFT_to_iR::read " - "fftwf_clip_zero_padding(this->f3r, this->r3, 2);"); + DEBUG_MSG("p3DFFT_to_iR::read " + "fftwf_clip_zero_padding(this->f3r, this->r3, 2);\n"); fftwf_clip_zero_padding(this->f3r, this->r3, this->howmany); - proc_print_err_message("p3DFFT_to_iR::read return"); + DEBUG_MSG("p3DFFT_to_iR::read return\n"); return EXIT_SUCCESS; } diff --git a/src/p3DFFT_to_iR.hpp b/src/p3DFFT_to_iR.hpp index 0d61c2612684ee3be24ca0fa6b5e43bae9b23a3c..a3adb827a58736c6ba8d944c2a7c93af7f7fef60 100644 --- a/src/p3DFFT_to_iR.hpp +++ b/src/p3DFFT_to_iR.hpp @@ -29,6 +29,7 @@ class p3DFFT_to_iR fftwf_complex *c12; // array to store transposed input fftwf_complex *c3 ; // array to store resized Fourier data float *r3 ; // array to store real space data + bool fields_allocated; fftwf_plan complex2real; @@ -36,7 +37,8 @@ class p3DFFT_to_iR p3DFFT_to_iR( int n0, int n1, int n2, int N0, int N1, int N2, - int howmany); + int howmany, + bool allocate_fields = true); ~p3DFFT_to_iR(); int read( diff --git a/src/test_field_descriptor.cpp b/src/test_field_descriptor.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6dcb2621edbb78c99ec40630a3d953721f808378 --- /dev/null +++ b/src/test_field_descriptor.cpp @@ -0,0 +1,66 @@ +#include "p3DFFT_to_iR.hpp" +#include "Morton_shuffler.hpp" +#include <iostream> + +int myrank, nprocs; + +int main(int argc, char *argv[]) +{ + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + int n, N, nfiles, nfields; + int test_type = 0; + switch(argc) + { + case 6: + test_type = atoi(argv[5]); + case 5: + n = atoi(argv[1]); + N = atoi(argv[2]); + nfiles = atoi(argv[3]); + nfields = atoi(argv[4]); + break; + default: + std::cerr << + "not enough (or too many) parameters.\naborting." << + std::endl; + MPI_Finalize(); + return EXIT_SUCCESS; + } + p3DFFT_to_iR *r = new p3DFFT_to_iR( + (n/2+1), n, n, + N, N, N, + nfields, + test_type); + + Morton_shuffler *s = new Morton_shuffler( + N, N, N, nfields, nfiles); + + if (test_type) + { + // initialize file names + char* ifile[nfields]; + for (int i=0; i<nfields; i++) + { + ifile[i] = (char*)malloc(100*sizeof(char)); + sprintf(ifile[i], "Kdata%d", i); + } + + //read + r->read(ifile); + for (int i = 0; i<nfields; i++) + free(ifile[i]); + s->shuffle(r->r3, "Rdata"); + + delete s; + } + delete r; + + // clean up + fftwf_mpi_cleanup(); + MPI_Finalize(); + return EXIT_SUCCESS; +} +