diff --git a/src/Morton_shuffler.cpp b/src/Morton_shuffler.cpp index b771b69b07bef0daf739b99dc742b31855f27c70..7aa512f1e688814c7f804099d3b73f1524df5f3f 100644 --- a/src/Morton_shuffler.cpp +++ b/src/Morton_shuffler.cpp @@ -31,6 +31,12 @@ Morton_shuffler::Morton_shuffler( n[0] = (N0/8) * (N1/8) * (N2/8); n[1] = 8*8*8*this->d; this->dzcubbie = new field_descriptor(2, n, MPI_REAL4, MPI_COMM_WORLD); + char bla[200]; + for (int z = 0; z < this->dzcubbie->sizes[0]; z++) + { + sprintf(bla, "z = %d, zid = %d", z, this->dzcubbie->rank[z]); + proc_print_err_message(bla); + } //set up output file descriptor int out_rank, out_nprocs; @@ -65,15 +71,21 @@ int Morton_shuffler::shuffle( ptrdiff_t cubbie_size = 8*8*8*this->d; ptrdiff_t cc; float *rz = fftwf_alloc_real(cubbie_size); + char bla[200]; for (int k = 0; k < this->drcubbie->sizes[0]; k++) { - rid = this->drcubbie->rank(k); + rid = this->drcubbie->rank[k]; kk = k - this->drcubbie->starts[0]; for (int j = 0; j < this->drcubbie->sizes[1]; j++) for (int i = 0; i < this->drcubbie->sizes[2]; i++) { z = regular_to_zindex(k, j, i); - zid = this->dzcubbie->rank(z); + zid = this->dzcubbie->rank[z]; + sprintf( + bla, + "inside loops k = %d, j = %d, i = %d, z = %d, rid = %d, zid = %d", + k, j, i, z, rid, zid); + proc_print_err_message(bla); zz = z - this->dzcubbie->starts[0]; if (myrank == rid || myrank == zid) { diff --git a/src/RMHD_converter.cpp b/src/RMHD_converter.cpp index 3f8e1749e76c67a93630169d16e36fedca75ddad..d347009b7ebedbc4343c779f9c3fc79006776bbc 100644 --- a/src/RMHD_converter.cpp +++ b/src/RMHD_converter.cpp @@ -18,7 +18,6 @@ RMHD_converter::RMHD_converter( } int n[7]; - proc_print_err_message("aloha 00"); // first 3 arguments are dimensions for input array // i.e. actual dimensions for the Fourier representation. // NOT real space grid dimensions @@ -29,7 +28,6 @@ RMHD_converter::RMHD_converter( n[1] = n2; this->f0c = new field_descriptor(2, n, MPI_COMPLEX8, MPI_COMM_WORLD); - proc_print_err_message("aloha 01"); // f1c will be pointing at the input array after it has been // transposed in 2D, therefore we have this correspondence: // f0c->sizes[0] = f1c->sizes[1]*f1c->sizes[2] @@ -38,21 +36,18 @@ RMHD_converter::RMHD_converter( n[2] = n1; this->f1c = new field_descriptor(3, n, MPI_COMPLEX8, MPI_COMM_WORLD); - proc_print_err_message("aloha 02"); // the description for the fully transposed field n[0] = n2; n[1] = n1; n[2] = n0; this->f2c = new field_descriptor(3, n, MPI_COMPLEX8, MPI_COMM_WORLD); - proc_print_err_message("aloha 03"); // following 3 arguments are dimensions for real space grid dimensions // f3r and f3c will be allocated in this call fftwf_get_descriptors_3D( N0, N1, N2, &this->f3r, &this->f3c); - proc_print_err_message("aloha 04"); //allocate fields this->c0 = fftwf_alloc_complex(this->f0c->local_size); this->c12 = fftwf_alloc_complex(this->f1c->local_size); @@ -60,7 +55,6 @@ RMHD_converter::RMHD_converter( // 4 instead of 2, because we have 2 fields to write this->r3 = fftwf_alloc_real( 4*this->f3c->local_size); - proc_print_err_message("aloha 05"); // allocate plans this->complex2real0 = fftwf_mpi_plan_dft_c2r_3d( f3r->sizes[0], f3r->sizes[1], f3r->sizes[2], @@ -115,6 +109,7 @@ int RMHD_converter::convert( this->f2c, this->c12, this->f3c, this->c3); fftwf_execute(this->complex2real0); + proc_print_err_message("0 field read and transformed"); //read second field this->f0c->read(ifile1, (void*)this->c0); @@ -124,16 +119,20 @@ int RMHD_converter::convert( this->f2c, this->c12, this->f3c, this->c3); fftwf_execute(this->complex2real1); + proc_print_err_message("1 field read and transformed"); fftwf_clip_zero_padding(this->f4r, this->r3); + proc_print_err_message("clipped zero padding"); // new array where mixed components will be placed float *rtmp = fftwf_alloc_real( 2*this->f3r->local_size); // mix components this->f3r->interleave(this->r3, rtmp, 2); + proc_print_err_message("interleaved array"); this->s->shuffle(rtmp, this->r3, ofile); + proc_print_err_message("did zshuffle"); fftwf_free(rtmp); return EXIT_SUCCESS; diff --git a/src/fftwf_tools.cpp b/src/fftwf_tools.cpp index b8e0d20a23794738a203a086b915d0ee2b505da6..97a1d55d8bfb0de8428aa3dd705961344b287b60 100644 --- a/src/fftwf_tools.cpp +++ b/src/fftwf_tools.cpp @@ -13,7 +13,9 @@ int fftwf_copy_complex_array( field_descriptor *fo, fftwf_complex *ao) { - if ((fi->ndims != 3) || (fo->ndims != 3)) + if (((fi->ndims != 3) || + (fo->ndims != 3)) || + (fi->comm != fo->comm)) return EXIT_FAILURE; fftwf_complex *buffer; buffer = fftwf_alloc_complex(fi->slice_size); @@ -30,8 +32,26 @@ int fftwf_copy_complex_array( int64_t ii0, ii1; int64_t oi0, oi1; int64_t delta1, delta0; + int irank, orank; + char bla[100]; delta0 = (fo->sizes[0] - fi->sizes[0]); delta1 = (fo->sizes[1] - fi->sizes[1]); + //for (int64_t i = 0; i < fi->sizes[0]; i++) + //{ + // sprintf( + // bla, + // "i = %d, irank = %d", + // i, fi->rank[i]); + // proc_print_err_message(bla); + //} + //for (int64_t i = 0; i < fo->sizes[0]; i++) + //{ + // sprintf( + // bla, + // "i = %d, orank = %d", + // i, fo->rank[i]); + // proc_print_err_message(bla); + //} for (ii0=0; ii0 < fi->sizes[0]; ii0++) { if (ii0 <= fi->sizes[0]/2) @@ -46,8 +66,15 @@ int fftwf_copy_complex_array( if ((oi0 < 0) || ((fo->sizes[0] - oi0) >= fo->sizes[0]/2)) continue; } - if ((fi->rank(ii0) == fo->rank(oi0)) && - (myrank == fi->rank(ii0))) + irank = fi->rank[ii0]; + orank = fo->rank[oi0]; + //sprintf( + // bla, + // "inside ii0 loop, ii0 = %ld, iid = %d, oid = %d", + // ii0, irank, orank); + //proc_print_err_message(bla); + if ((irank == orank) && + (irank == fi->myrank)) { std::copy( (float*)(ai + (ii0 - fi->starts[0] )*fi->slice_size), @@ -56,29 +83,33 @@ int fftwf_copy_complex_array( } else { - if (myrank == fi->rank(ii0)) + if (fi->myrank == irank) { + // proc_print_err_message("sending"); MPI_Send( (void*)(ai + (ii0-fi->starts[0])*fi->slice_size), fi->slice_size, MPI_COMPLEX8, - fo->rank(oi0), + orank, ii0, - MPI_COMM_WORLD); + fi->comm); + // proc_print_err_message("sent"); } - if (myrank == fo->rank(oi0)) + if (fi->myrank == orank) { + // proc_print_err_message("receiving"); MPI_Recv( (void*)(buffer), fi->slice_size, MPI_COMPLEX8, - fi->rank(ii0), + irank, ii0, - MPI_COMM_WORLD, + fi->comm, MPI_STATUS_IGNORE); + // proc_print_err_message("received"); } } - if (myrank == fo->rank(oi0)) + if (fi->myrank == orank) { for (ii1 = 0; ii1 < fi->sizes[1]; ii1++) { @@ -97,12 +128,14 @@ int fftwf_copy_complex_array( std::copy( (float*)(buffer + ii1*fi->sizes[2]), (float*)(buffer + ii1*fi->sizes[2] + min_fast_dim), - (float*)(ao + ((oi0 - fo->starts[0])*fo->sizes[1] + oi1)*fo->sizes[2])); + (float*)(ao + + ((oi0 - fo->starts[0])*fo->sizes[1] + + oi1)*fo->sizes[2])); } } } fftw_free(buffer); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(fi->comm); return EXIT_SUCCESS; } @@ -129,18 +162,15 @@ int fftwf_get_descriptors_3D( field_descriptor **fr, field_descriptor **fc) { - proc_print_err_message("fftwf_get_descriptors 00"); int ntmp[3]; ntmp[0] = n0; ntmp[1] = n1; ntmp[2] = n2; *fr = new field_descriptor(3, ntmp, MPI_REAL4, MPI_COMM_WORLD); - proc_print_err_message("fftwf_get_descriptors 01"); ntmp[0] = n0; ntmp[1] = n1; ntmp[2] = n2/2+1; *fc = new field_descriptor(3, ntmp, MPI_COMPLEX8, MPI_COMM_WORLD); - proc_print_err_message("fftwf_get_descriptors 02"); return EXIT_SUCCESS; } diff --git a/src/field_descriptor.cpp b/src/field_descriptor.cpp index df6fe39d583fa3024a93159a41882b5895d30d90..5ec79ac2bcd2b712259fdc52790dfab8407786f7 100644 --- a/src/field_descriptor.cpp +++ b/src/field_descriptor.cpp @@ -18,12 +18,23 @@ field_descriptor::field_descriptor( this->sizes = new int[ndims]; this->subsizes = new int[ndims]; this->starts = new int[ndims]; + ptrdiff_t *nfftw = new ptrdiff_t[ndims]; + ptrdiff_t local_n0, local_0_start; + for (int i = 0; i < this->ndims; i++) + nfftw[i] = n[i]; + this->local_size = fftwf_mpi_local_size_many( + this->ndims, + nfftw, + 1, + FFTW_MPI_DEFAULT_BLOCK, + this->comm, + &local_n0, + &local_0_start); this->sizes[0] = n[0]; - this->subsizes[0] = n[0]/this->nprocs; - this->starts[0] = this->myrank*(n[0]/this->nprocs); + this->subsizes[0] = local_n0; + this->starts[0] = local_0_start; this->mpi_dtype = element_type; this->slice_size = 1; - this->local_size = this->subsizes[0]; this->full_size = this->sizes[0]; for (int i = 1; i < this->ndims; i++) { @@ -31,7 +42,6 @@ field_descriptor::field_descriptor( this->subsizes[i] = n[i]; this->starts[i] = 0; this->slice_size *= this->subsizes[i]; - this->local_size *= this->subsizes[i]; this->full_size *= this->sizes[i]; } MPI_Type_create_subarray( @@ -43,6 +53,29 @@ field_descriptor::field_descriptor( this->mpi_dtype, &this->mpi_array_dtype); MPI_Type_commit(&this->mpi_array_dtype); + this->rank = new int[this->sizes[0]]; + int *local_rank = new int[this->sizes[0]]; + std::fill_n(local_rank, this->sizes[0], 0); + for (int i = 0; i < this->sizes[0]; i++) + if (i >= this->starts[0] && i < this->starts[0] + this->subsizes[0]) + local_rank[i] = this->myrank; + MPI_Allreduce( + local_rank, + this->rank, + this->sizes[0], + MPI_INT, + MPI_SUM, + this->comm); + char bla[200]; + for (int i = 0; i < this->sizes[0]; i++) + { + sprintf( + bla, + "i = %d, irank = %d", + i, this->rank[i]); + proc_print_err_message(bla); + } + delete[] local_rank; } field_descriptor::~field_descriptor() @@ -50,6 +83,7 @@ field_descriptor::~field_descriptor() delete[] this->sizes; delete[] this->subsizes; delete[] this->starts; + delete[] this->rank; MPI_Type_free(&this->mpi_array_dtype); } diff --git a/src/field_descriptor.hpp b/src/field_descriptor.hpp index 920920997019024f713a9262d061dfcb33c065a1..e60fe02ebac2b19ae3439bb66cdb3eb61c9d2ad1 100644 --- a/src/field_descriptor.hpp +++ b/src/field_descriptor.hpp @@ -14,6 +14,7 @@ class field_descriptor int *subsizes; int *starts; int ndims; + int *rank; ptrdiff_t slice_size, local_size, full_size; MPI_Datatype mpi_array_dtype, mpi_dtype; int myrank, nprocs; @@ -58,11 +59,6 @@ class field_descriptor float *input, float *output, int dim); - - inline int rank(int i0) - { - return i0 / this->subsizes[0]; - } }; diff --git a/test.ipynb b/test.ipynb index 8a3ffb294494925123fd726303d1f8e1e9094b54..72059225dcd2589bb333d66fc271be2d15a8ff27 100644 --- a/test.ipynb +++ b/test.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:b66beda6c64119cb0725443436f940cc3d6a7269d12b4d537c8d44881b89be2c" + "signature": "sha256:69740a4c678f49d132215631329a0db6b8e997cbd8727af1a2295fa164ceadc6" }, "nbformat": 3, "nbformat_minor": 0, @@ -190,12 +190,12 @@ " print ('compilation error')\n", " return None\n", "\n", - "Rdata = get_cpp_data(branch = 'develop')" + "Rdata = get_cpp_data(branch = 'feature-arbitrary_dimensions')" ], "language": "python", "metadata": {}, "outputs": [], - "prompt_number": 23 + "prompt_number": 4 }, { "cell_type": "code", @@ -226,7 +226,7 @@ ] } ], - "prompt_number": 24 + "prompt_number": 5 }, { "cell_type": "code",