diff --git a/src/Morton_shuffler.cpp b/src/Morton_shuffler.cpp
index b771b69b07bef0daf739b99dc742b31855f27c70..7aa512f1e688814c7f804099d3b73f1524df5f3f 100644
--- a/src/Morton_shuffler.cpp
+++ b/src/Morton_shuffler.cpp
@@ -31,6 +31,12 @@ Morton_shuffler::Morton_shuffler(
     n[0] = (N0/8) * (N1/8) * (N2/8);
     n[1] = 8*8*8*this->d;
     this->dzcubbie = new field_descriptor(2, n, MPI_REAL4, MPI_COMM_WORLD);
+    char bla[200];
+    for (int z = 0; z < this->dzcubbie->sizes[0]; z++)
+    {
+        sprintf(bla, "z = %d, zid = %d", z, this->dzcubbie->rank[z]);
+        proc_print_err_message(bla);
+    }
 
     //set up output file descriptor
     int out_rank, out_nprocs;
@@ -65,15 +71,21 @@ int Morton_shuffler::shuffle(
     ptrdiff_t cubbie_size = 8*8*8*this->d;
     ptrdiff_t cc;
     float *rz = fftwf_alloc_real(cubbie_size);
+    char bla[200];
     for (int k = 0; k < this->drcubbie->sizes[0]; k++)
     {
-        rid = this->drcubbie->rank(k);
+        rid = this->drcubbie->rank[k];
         kk = k - this->drcubbie->starts[0];
         for (int j = 0; j < this->drcubbie->sizes[1]; j++)
         for (int i = 0; i < this->drcubbie->sizes[2]; i++)
         {
             z = regular_to_zindex(k, j, i);
-            zid = this->dzcubbie->rank(z);
+            zid = this->dzcubbie->rank[z];
+        sprintf(
+                bla,
+                "inside loops k = %d, j = %d, i = %d, z = %d, rid = %d, zid = %d",
+                k, j, i, z, rid, zid);
+        proc_print_err_message(bla);
             zz = z - this->dzcubbie->starts[0];
             if (myrank == rid || myrank == zid)
             {
diff --git a/src/RMHD_converter.cpp b/src/RMHD_converter.cpp
index 3f8e1749e76c67a93630169d16e36fedca75ddad..d347009b7ebedbc4343c779f9c3fc79006776bbc 100644
--- a/src/RMHD_converter.cpp
+++ b/src/RMHD_converter.cpp
@@ -18,7 +18,6 @@ RMHD_converter::RMHD_converter(
     }
     int n[7];
 
-    proc_print_err_message("aloha 00");
     // first 3 arguments are dimensions for input array
     // i.e. actual dimensions for the Fourier representation.
     // NOT real space grid dimensions
@@ -29,7 +28,6 @@ RMHD_converter::RMHD_converter(
     n[1] = n2;
     this->f0c = new field_descriptor(2, n, MPI_COMPLEX8, MPI_COMM_WORLD);
 
-    proc_print_err_message("aloha 01");
     // f1c will be pointing at the input array after it has been
     // transposed in 2D, therefore we have this correspondence:
     // f0c->sizes[0] = f1c->sizes[1]*f1c->sizes[2]
@@ -38,21 +36,18 @@ RMHD_converter::RMHD_converter(
     n[2] = n1;
     this->f1c = new field_descriptor(3, n, MPI_COMPLEX8, MPI_COMM_WORLD);
 
-    proc_print_err_message("aloha 02");
     // the description for the fully transposed field
     n[0] = n2;
     n[1] = n1;
     n[2] = n0;
     this->f2c = new field_descriptor(3, n, MPI_COMPLEX8, MPI_COMM_WORLD);
 
-    proc_print_err_message("aloha 03");
     // following 3 arguments are dimensions for real space grid dimensions
     // f3r and f3c will be allocated in this call
     fftwf_get_descriptors_3D(
             N0, N1, N2,
             &this->f3r, &this->f3c);
 
-    proc_print_err_message("aloha 04");
     //allocate fields
     this->c0  = fftwf_alloc_complex(this->f0c->local_size);
     this->c12 = fftwf_alloc_complex(this->f1c->local_size);
@@ -60,7 +55,6 @@ RMHD_converter::RMHD_converter(
     // 4 instead of 2, because we have 2 fields to write
     this->r3  = fftwf_alloc_real( 4*this->f3c->local_size);
 
-    proc_print_err_message("aloha 05");
     // allocate plans
     this->complex2real0 = fftwf_mpi_plan_dft_c2r_3d(
             f3r->sizes[0], f3r->sizes[1], f3r->sizes[2],
@@ -115,6 +109,7 @@ int RMHD_converter::convert(
             this->f2c, this->c12,
             this->f3c, this->c3);
     fftwf_execute(this->complex2real0);
+    proc_print_err_message("0 field read and transformed");
 
     //read second field
     this->f0c->read(ifile1, (void*)this->c0);
@@ -124,16 +119,20 @@ int RMHD_converter::convert(
             this->f2c, this->c12,
             this->f3c, this->c3);
     fftwf_execute(this->complex2real1);
+    proc_print_err_message("1 field read and transformed");
 
     fftwf_clip_zero_padding(this->f4r, this->r3);
+    proc_print_err_message("clipped zero padding");
 
     // new array where mixed components will be placed
     float *rtmp = fftwf_alloc_real( 2*this->f3r->local_size);
 
     // mix components
     this->f3r->interleave(this->r3, rtmp, 2);
+    proc_print_err_message("interleaved array");
 
     this->s->shuffle(rtmp, this->r3, ofile);
+    proc_print_err_message("did zshuffle");
 
     fftwf_free(rtmp);
     return EXIT_SUCCESS;
diff --git a/src/fftwf_tools.cpp b/src/fftwf_tools.cpp
index b8e0d20a23794738a203a086b915d0ee2b505da6..97a1d55d8bfb0de8428aa3dd705961344b287b60 100644
--- a/src/fftwf_tools.cpp
+++ b/src/fftwf_tools.cpp
@@ -13,7 +13,9 @@ int fftwf_copy_complex_array(
         field_descriptor *fo,
         fftwf_complex *ao)
 {
-    if ((fi->ndims != 3) || (fo->ndims != 3))
+    if (((fi->ndims != 3) ||
+         (fo->ndims != 3)) ||
+        (fi->comm != fo->comm))
         return EXIT_FAILURE;
     fftwf_complex *buffer;
     buffer = fftwf_alloc_complex(fi->slice_size);
@@ -30,8 +32,26 @@ int fftwf_copy_complex_array(
     int64_t ii0, ii1;
     int64_t oi0, oi1;
     int64_t delta1, delta0;
+    int irank, orank;
+    char bla[100];
     delta0 = (fo->sizes[0] - fi->sizes[0]);
     delta1 = (fo->sizes[1] - fi->sizes[1]);
+    //for (int64_t i = 0; i < fi->sizes[0]; i++)
+    //{
+    //    sprintf(
+    //            bla,
+    //            "i = %d, irank = %d",
+    //            i, fi->rank[i]);
+    //    proc_print_err_message(bla);
+    //}
+    //for (int64_t i = 0; i < fo->sizes[0]; i++)
+    //{
+    //    sprintf(
+    //            bla,
+    //            "i = %d, orank = %d",
+    //            i, fo->rank[i]);
+    //    proc_print_err_message(bla);
+    //}
     for (ii0=0; ii0 < fi->sizes[0]; ii0++)
     {
         if (ii0 <= fi->sizes[0]/2)
@@ -46,8 +66,15 @@ int fftwf_copy_complex_array(
             if ((oi0 < 0) || ((fo->sizes[0] - oi0) >= fo->sizes[0]/2))
                 continue;
         }
-        if ((fi->rank(ii0) == fo->rank(oi0)) &&
-            (myrank == fi->rank(ii0)))
+        irank = fi->rank[ii0];
+        orank = fo->rank[oi0];
+        //sprintf(
+        //        bla,
+        //        "inside ii0 loop, ii0 = %ld, iid = %d, oid = %d",
+        //        ii0, irank, orank);
+        //proc_print_err_message(bla);
+        if ((irank == orank) &&
+            (irank == fi->myrank))
         {
             std::copy(
                     (float*)(ai + (ii0 - fi->starts[0]    )*fi->slice_size),
@@ -56,29 +83,33 @@ int fftwf_copy_complex_array(
         }
         else
         {
-            if (myrank == fi->rank(ii0))
+            if (fi->myrank == irank)
             {
+    //            proc_print_err_message("sending");
                 MPI_Send(
                         (void*)(ai + (ii0-fi->starts[0])*fi->slice_size),
                         fi->slice_size,
                         MPI_COMPLEX8,
-                        fo->rank(oi0),
+                        orank,
                         ii0,
-                        MPI_COMM_WORLD);
+                        fi->comm);
+     //           proc_print_err_message("sent");
             }
-            if (myrank == fo->rank(oi0))
+            if (fi->myrank == orank)
             {
+      //          proc_print_err_message("receiving");
                 MPI_Recv(
                         (void*)(buffer),
                         fi->slice_size,
                         MPI_COMPLEX8,
-                        fi->rank(ii0),
+                        irank,
                         ii0,
-                        MPI_COMM_WORLD,
+                        fi->comm,
                         MPI_STATUS_IGNORE);
+      //          proc_print_err_message("received");
             }
         }
-        if (myrank == fo->rank(oi0))
+        if (fi->myrank == orank)
         {
             for (ii1 = 0; ii1 < fi->sizes[1]; ii1++)
             {
@@ -97,12 +128,14 @@ int fftwf_copy_complex_array(
                 std::copy(
                         (float*)(buffer + ii1*fi->sizes[2]),
                         (float*)(buffer + ii1*fi->sizes[2] + min_fast_dim),
-                        (float*)(ao + ((oi0 - fo->starts[0])*fo->sizes[1] + oi1)*fo->sizes[2]));
+                        (float*)(ao +
+                                 ((oi0 - fo->starts[0])*fo->sizes[1] +
+                                  oi1)*fo->sizes[2]));
             }
         }
     }
     fftw_free(buffer);
-    MPI_Barrier(MPI_COMM_WORLD);
+    MPI_Barrier(fi->comm);
 
     return EXIT_SUCCESS;
 }
@@ -129,18 +162,15 @@ int fftwf_get_descriptors_3D(
         field_descriptor **fr,
         field_descriptor **fc)
 {
-    proc_print_err_message("fftwf_get_descriptors 00");
     int ntmp[3];
     ntmp[0] = n0;
     ntmp[1] = n1;
     ntmp[2] = n2;
     *fr = new field_descriptor(3, ntmp, MPI_REAL4, MPI_COMM_WORLD);
-    proc_print_err_message("fftwf_get_descriptors 01");
     ntmp[0] = n0;
     ntmp[1] = n1;
     ntmp[2] = n2/2+1;
     *fc = new field_descriptor(3, ntmp, MPI_COMPLEX8, MPI_COMM_WORLD);
-    proc_print_err_message("fftwf_get_descriptors 02");
     return EXIT_SUCCESS;
 }
 
diff --git a/src/field_descriptor.cpp b/src/field_descriptor.cpp
index df6fe39d583fa3024a93159a41882b5895d30d90..5ec79ac2bcd2b712259fdc52790dfab8407786f7 100644
--- a/src/field_descriptor.cpp
+++ b/src/field_descriptor.cpp
@@ -18,12 +18,23 @@ field_descriptor::field_descriptor(
     this->sizes    = new int[ndims];
     this->subsizes = new int[ndims];
     this->starts   = new int[ndims];
+    ptrdiff_t *nfftw = new ptrdiff_t[ndims];
+    ptrdiff_t local_n0, local_0_start;
+    for (int i = 0; i < this->ndims; i++)
+        nfftw[i] = n[i];
+    this->local_size = fftwf_mpi_local_size_many(
+            this->ndims,
+            nfftw,
+            1,
+            FFTW_MPI_DEFAULT_BLOCK,
+            this->comm,
+            &local_n0,
+            &local_0_start);
     this->sizes[0] = n[0];
-    this->subsizes[0] = n[0]/this->nprocs;
-    this->starts[0] = this->myrank*(n[0]/this->nprocs);
+    this->subsizes[0] = local_n0;
+    this->starts[0] = local_0_start;
     this->mpi_dtype = element_type;
     this->slice_size = 1;
-    this->local_size = this->subsizes[0];
     this->full_size = this->sizes[0];
     for (int i = 1; i < this->ndims; i++)
     {
@@ -31,7 +42,6 @@ field_descriptor::field_descriptor(
         this->subsizes[i] = n[i];
         this->starts[i] = 0;
         this->slice_size *= this->subsizes[i];
-        this->local_size *= this->subsizes[i];
         this->full_size *= this->sizes[i];
     }
     MPI_Type_create_subarray(
@@ -43,6 +53,29 @@ field_descriptor::field_descriptor(
             this->mpi_dtype,
             &this->mpi_array_dtype);
     MPI_Type_commit(&this->mpi_array_dtype);
+    this->rank = new int[this->sizes[0]];
+    int *local_rank = new int[this->sizes[0]];
+    std::fill_n(local_rank, this->sizes[0], 0);
+    for (int i = 0; i < this->sizes[0]; i++)
+        if (i >= this->starts[0] && i < this->starts[0] + this->subsizes[0])
+            local_rank[i] = this->myrank;
+    MPI_Allreduce(
+            local_rank,
+            this->rank,
+            this->sizes[0],
+            MPI_INT,
+            MPI_SUM,
+            this->comm);
+    char bla[200];
+    for (int i = 0; i < this->sizes[0]; i++)
+    {
+        sprintf(
+                bla,
+                "i = %d, irank = %d",
+                i, this->rank[i]);
+        proc_print_err_message(bla);
+    }
+    delete[] local_rank;
 }
 
 field_descriptor::~field_descriptor()
@@ -50,6 +83,7 @@ field_descriptor::~field_descriptor()
     delete[] this->sizes;
     delete[] this->subsizes;
     delete[] this->starts;
+    delete[] this->rank;
     MPI_Type_free(&this->mpi_array_dtype);
 }
 
diff --git a/src/field_descriptor.hpp b/src/field_descriptor.hpp
index 920920997019024f713a9262d061dfcb33c065a1..e60fe02ebac2b19ae3439bb66cdb3eb61c9d2ad1 100644
--- a/src/field_descriptor.hpp
+++ b/src/field_descriptor.hpp
@@ -14,6 +14,7 @@ class field_descriptor
         int *subsizes;
         int *starts;
         int ndims;
+        int *rank;
         ptrdiff_t slice_size, local_size, full_size;
         MPI_Datatype mpi_array_dtype, mpi_dtype;
         int myrank, nprocs;
@@ -58,11 +59,6 @@ class field_descriptor
                 float *input,
                 float *output,
                 int dim);
-
-        inline int rank(int i0)
-        {
-            return i0 / this->subsizes[0];
-        }
 };
 
 
diff --git a/test.ipynb b/test.ipynb
index 8a3ffb294494925123fd726303d1f8e1e9094b54..72059225dcd2589bb333d66fc271be2d15a8ff27 100644
--- a/test.ipynb
+++ b/test.ipynb
@@ -1,7 +1,7 @@
 {
  "metadata": {
   "name": "",
-  "signature": "sha256:b66beda6c64119cb0725443436f940cc3d6a7269d12b4d537c8d44881b89be2c"
+  "signature": "sha256:69740a4c678f49d132215631329a0db6b8e997cbd8727af1a2295fa164ceadc6"
  },
  "nbformat": 3,
  "nbformat_minor": 0,
@@ -190,12 +190,12 @@
       "        print ('compilation error')\n",
       "        return None\n",
       "\n",
-      "Rdata = get_cpp_data(branch = 'develop')"
+      "Rdata = get_cpp_data(branch = 'feature-arbitrary_dimensions')"
      ],
      "language": "python",
      "metadata": {},
      "outputs": [],
-     "prompt_number": 23
+     "prompt_number": 4
     },
     {
      "cell_type": "code",
@@ -226,7 +226,7 @@
        ]
       }
      ],
-     "prompt_number": 24
+     "prompt_number": 5
     },
     {
      "cell_type": "code",