diff --git a/src/base.hpp b/src/base.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d4a63a187072f4d6e425e5fba137691edf874b12
--- /dev/null
+++ b/src/base.hpp
@@ -0,0 +1,43 @@
+#include <mpi.h>
+#include <stdarg.h>
+#include <iostream>
+
+#ifndef BASE
+
+#define BASE
+
+static const int message_buffer_length = 1024;
+static char debug_message_buffer[message_buffer_length];
+extern int myrank, nprocs;
+
+#ifndef NDEBUG
+
+inline void DEBUG_MSG(const char * format, ...)
+{
+    va_list argptr;
+    va_start(argptr, format);
+    sprintf(
+            debug_message_buffer,
+            "cpu%.4d ",
+            myrank);
+    vsnprintf(
+            debug_message_buffer + 8,
+            message_buffer_length - 8,
+            format,
+            argptr);
+    va_end(argptr);
+    std::cerr << debug_message_buffer;
+}
+
+#define CHECK_POINT() DEBUG_MSG("%s %s", __FILE__, __LINE__)
+
+#else
+
+    #define DEBUG_MSG(x)
+
+#define CHECK_POINT()
+
+#endif//NDEBUG
+
+#endif//BASE
+
diff --git a/src/field_descriptor.cpp b/src/field_descriptor.cpp
index 759219b9a3c7b5a8e45b29ea31bd89451b13c543..77d7bc3b18ceb2cddc8310b237d8d75224a742e0 100644
--- a/src/field_descriptor.cpp
+++ b/src/field_descriptor.cpp
@@ -1,6 +1,7 @@
 #include <stdlib.h>
 #include <algorithm>
 #include <iostream>
+#include "base.hpp"
 #include "field_descriptor.hpp"
 
 field_descriptor::field_descriptor(
@@ -9,6 +10,8 @@ field_descriptor::field_descriptor(
         MPI_Datatype element_type,
         MPI_Comm COMM_TO_USE)
 {
+    DEBUG_MSG("entered field_descriptor::field_descriptor\n");
+    //CHECK_POINT();
     this->comm = COMM_TO_USE;
     MPI_Comm_rank(this->comm, &this->myrank);
     MPI_Comm_size(this->comm, &this->nprocs);
@@ -42,15 +45,61 @@ field_descriptor::field_descriptor(
         this->slice_size *= this->subsizes[i];
         this->full_size *= this->sizes[i];
     }
-    MPI_Type_create_subarray(
-            ndims,
-            this->sizes,
-            this->subsizes,
-            this->starts,
-            MPI_ORDER_C,
-            this->mpi_dtype,
-            &this->mpi_array_dtype);
-    MPI_Type_commit(&this->mpi_array_dtype);
+    DEBUG_MSG(
+            "inside field_descriptor constructor, about to call "
+            "MPI_Type_create_subarray\n"
+            "%d %d %d\n",
+            this->sizes[0],
+            this->subsizes[0],
+            this->starts[0]);
+    int local_zero_array[this->nprocs], zero_array[this->nprocs];
+    for (int i=0; i<this->nprocs; i++)
+        local_zero_array[i] = 0;
+    local_zero_array[this->myrank] = (this->subsizes[0] == 0) ? 1 : 0;
+    MPI_Allreduce(
+            local_zero_array,
+            zero_array,
+            this->nprocs,
+            MPI_INT,
+            MPI_SUM,
+            this->comm);
+    int no_of_excluded_ranks = 0;
+    for (int i = 0; i<this->nprocs; i++)
+        no_of_excluded_ranks += zero_array[i];
+    if (no_of_excluded_ranks == 0)
+    {
+        this->io_comm = this->comm;
+        this->io_nprocs = this->nprocs;
+        this->io_myrank = this->myrank;
+    }
+    else
+    {
+        int excluded_rank[no_of_excluded_ranks];
+        for (int i=0, j=0; i<this->nprocs; i++)
+            if (zero_array[i])
+            {
+                excluded_rank[j] = i;
+                j++;
+            }
+        MPI_Group tgroup0, tgroup;
+        MPI_Comm_group(this->comm, &tgroup0);
+        MPI_Group_excl(tgroup0, no_of_excluded_ranks, excluded_rank, &tgroup);
+        MPI_Comm_create(this->comm, tgroup, &this->io_comm);
+        MPI_Group_free(&tgroup0);
+        MPI_Group_free(&tgroup);
+    }
+    if (this->subsizes[0] > 0)
+    {
+        MPI_Type_create_subarray(
+                ndims,
+                this->sizes,
+                this->subsizes,
+                this->starts,
+                MPI_ORDER_C,
+                this->mpi_dtype,
+                &this->mpi_array_dtype);
+        MPI_Type_commit(&this->mpi_array_dtype);
+    }
     this->rank = new int[this->sizes[0]];
     int *local_rank = new int[this->sizes[0]];
     std::fill_n(local_rank, this->sizes[0], 0);
@@ -65,48 +114,62 @@ field_descriptor::field_descriptor(
             MPI_SUM,
             this->comm);
     delete[] local_rank;
+    DEBUG_MSG("exiting field_descriptor::field_descriptor\n");
 }
 
 field_descriptor::~field_descriptor()
 {
+    DEBUG_MSG("entered field_descriptor::~field_descriptor\n");
     delete[] this->sizes;
     delete[] this->subsizes;
     delete[] this->starts;
     delete[] this->rank;
-    MPI_Type_free(&this->mpi_array_dtype);
+//    if (this->subsizes[0] > 0)
+    {
+        DEBUG_MSG("deallocating mpi_array_dtype\n");
+        MPI_Type_free(&this->mpi_array_dtype);
+        if (this->nprocs != this->io_nprocs)
+        {
+            DEBUG_MSG("freeing io_comm\n");
+            MPI_Comm_free(&this->io_comm);
+        }
+    }
+    DEBUG_MSG("exiting field_descriptor::~field_descriptor\n");
 }
 
 int field_descriptor::read(
         const char *fname,
         void *buffer)
 {
-    MPI_Info info;
-    MPI_Info_create(&info);
-    MPI_File f;
-    char ffname[200];
-    sprintf(ffname, "%s", fname);
-
-    MPI_File_open(
-            this->comm,
-            ffname,
-            MPI_MODE_RDONLY,
-            info,
-            &f);
-    MPI_File_set_view(
-            f,
-            0,
-            this->mpi_dtype,
-            this->mpi_array_dtype,
-            "native", //this needs to be made more general
-            info);
-    MPI_File_read_all(
-            f,
-            buffer,
-            this->local_size,
-            this->mpi_dtype,
-            MPI_STATUS_IGNORE);
-    MPI_File_close(&f);
+    if (this->subsizes[0] > 0)
+    {
+        MPI_Info info;
+        MPI_Info_create(&info);
+        MPI_File f;
+        char ffname[200];
+        sprintf(ffname, "%s", fname);
 
+        MPI_File_open(
+                this->io_comm,
+                ffname,
+                MPI_MODE_RDONLY,
+                info,
+                &f);
+        MPI_File_set_view(
+                f,
+                0,
+                this->mpi_dtype,
+                this->mpi_array_dtype,
+                "native", //this needs to be made more general
+                info);
+        MPI_File_read_all(
+                f,
+                buffer,
+                this->local_size,
+                this->mpi_dtype,
+                MPI_STATUS_IGNORE);
+        MPI_File_close(&f);
+    }
     return EXIT_SUCCESS;
 }
 
@@ -114,32 +177,35 @@ int field_descriptor::write(
         const char *fname,
         void *buffer)
 {
-    MPI_Info info;
-    MPI_Info_create(&info);
-    MPI_File f;
-    char ffname[200];
-    sprintf(ffname, "%s", fname);
+    if (this->subsizes[0] > 0)
+    {
+        MPI_Info info;
+        MPI_Info_create(&info);
+        MPI_File f;
+        char ffname[200];
+        sprintf(ffname, "%s", fname);
 
-    MPI_File_open(
-            this->comm,
-            ffname,
-            MPI_MODE_CREATE | MPI_MODE_WRONLY,
-            info,
-            &f);
-    MPI_File_set_view(
-            f,
-            0,
-            this->mpi_dtype,
-            this->mpi_array_dtype,
-            "native", //this needs to be made more general
-            info);
-    MPI_File_write_all(
-            f,
-            buffer,
-            this->local_size,
-            this->mpi_dtype,
-            MPI_STATUS_IGNORE);
-    MPI_File_close(&f);
+        MPI_File_open(
+                this->comm,
+                ffname,
+                MPI_MODE_CREATE | MPI_MODE_WRONLY,
+                info,
+                &f);
+        MPI_File_set_view(
+                f,
+                0,
+                this->mpi_dtype,
+                this->mpi_array_dtype,
+                "native", //this needs to be made more general
+                info);
+        MPI_File_write_all(
+                f,
+                buffer,
+                this->local_size,
+                this->mpi_dtype,
+                MPI_STATUS_IGNORE);
+        MPI_File_close(&f);
+    }
 
     return EXIT_SUCCESS;
 }
@@ -300,14 +366,3 @@ field_descriptor* field_descriptor::get_transpose()
     return new field_descriptor(this->ndims, n, this->mpi_dtype, this->comm);
 }
 
-void proc_print_err_message(const char *message)
-{
-#ifndef NDEBUG
-    for (int i = 0; i < nprocs; i++)
-    {
-        if (myrank == i)
-            std::cerr << i << " " << message << std::endl;
-        MPI_Barrier(MPI_COMM_WORLD);
-    }
-#endif
-}
diff --git a/src/field_descriptor.hpp b/src/field_descriptor.hpp
index c2fb4c5fec4336179cf13743db536319995d2414..9960e0efc660e7cb8f90ac397ea08dc53f9ea1b7 100644
--- a/src/field_descriptor.hpp
+++ b/src/field_descriptor.hpp
@@ -19,8 +19,8 @@ class field_descriptor
         int *rank;
         ptrdiff_t slice_size, local_size, full_size;
         MPI_Datatype mpi_array_dtype, mpi_dtype;
-        int myrank, nprocs;
-        MPI_Comm comm;
+        int myrank, nprocs, io_myrank, io_nprocs;
+        MPI_Comm comm, io_comm;
 
 
         /* methods */
@@ -80,7 +80,5 @@ int fftwf_clip_zero_padding(
         field_descriptor *f,
         float *a);
 
-void proc_print_err_message(const char *message);
-
 #endif//FIELD_DESCRIPTOR
 
diff --git a/src/p3DFFT_to_iR.cpp b/src/p3DFFT_to_iR.cpp
index 03fbeb3b723e35b2bf6270c23f33876cdf33e221..72c389903155c48f7b82570465e569d8ef9268c4 100644
--- a/src/p3DFFT_to_iR.cpp
+++ b/src/p3DFFT_to_iR.cpp
@@ -1,17 +1,18 @@
+#include "base.hpp"
 #include "p3DFFT_to_iR.hpp"
 #include <string>
 #include <iostream>
 
 
-
 p3DFFT_to_iR::p3DFFT_to_iR(
         int n0, int n1, int n2,
         int N0, int N1, int N2,
-        int howmany)
+        int howmany,
+        bool allocate_fields)
 {
     this->howmany = howmany;
     int n[7];
-    proc_print_err_message("entering constructor of p3DFFT_to_iR");
+    DEBUG_MSG("entering constructor of p3DFFT_to_iR\n");
 
     // first 3 arguments are dimensions for input array
     // i.e. actual dimensions for the Fourier representation.
@@ -43,29 +44,34 @@ p3DFFT_to_iR::p3DFFT_to_iR(
             N0, N1, N2,
             &this->f3r, &this->f3c);
 
-    //allocate fields
-    this->c0  = fftwf_alloc_complex(this->f0c->local_size);
-    this->c12 = fftwf_alloc_complex(this->f1c->local_size);
-    // 4 instead of 2, because we have 2 fields to write
-    this->r3  = fftwf_alloc_real( 2*this->f3c->local_size*this->howmany);
-    // all FFTs are going to be inplace
-    this->c3  = (fftwf_complex*)this->r3;
-
-    // allocate plans
-    ptrdiff_t blabla[] = {this->f3r->sizes[0],
-                          this->f3r->sizes[1],
-                          this->f3r->sizes[2]};
-    this->complex2real = fftwf_mpi_plan_many_dft_c2r(
-            3,
-            blabla,
-            this->howmany,
-            FFTW_MPI_DEFAULT_BLOCK,
-            FFTW_MPI_DEFAULT_BLOCK,
-            this->c3, this->r3,
-            MPI_COMM_WORLD,
-            FFTW_ESTIMATE);
-
-    proc_print_err_message("exiting constructor of p3DFFT_to_iR");
+    this->fields_allocated = false;
+    if (allocate_fields)
+    {
+        //allocate fields
+        this->c0  = fftwf_alloc_complex(this->f0c->local_size);
+        this->c12 = fftwf_alloc_complex(this->f1c->local_size);
+        // 4 instead of 2, because we have 2 fields to write
+        this->r3  = fftwf_alloc_real( 2*this->f3c->local_size*this->howmany);
+        // all FFTs are going to be inplace
+        this->c3  = (fftwf_complex*)this->r3;
+
+        // allocate plans
+        ptrdiff_t blabla[] = {this->f3r->sizes[0],
+                              this->f3r->sizes[1],
+                              this->f3r->sizes[2]};
+        this->complex2real = fftwf_mpi_plan_many_dft_c2r(
+                3,
+                blabla,
+                this->howmany,
+                FFTW_MPI_DEFAULT_BLOCK,
+                FFTW_MPI_DEFAULT_BLOCK,
+                this->c3, this->r3,
+                MPI_COMM_WORLD,
+                FFTW_ESTIMATE);
+        this->fields_allocated = true;
+    }
+
+    DEBUG_MSG("exiting constructor of p3DFFT_to_iR\n");
 }
 
 p3DFFT_to_iR::~p3DFFT_to_iR()
@@ -76,11 +82,13 @@ p3DFFT_to_iR::~p3DFFT_to_iR()
     delete this->f3c;
     delete this->f3r;
 
-    fftwf_free(this->c0);
-    fftwf_free(this->c12);
-    fftwf_free(this->r3);
-
-    fftwf_destroy_plan(this->complex2real);
+    if (this->fields_allocated)
+    {
+        fftwf_free(this->c0);
+        fftwf_free(this->c12);
+        fftwf_free(this->r3);
+        fftwf_destroy_plan(this->complex2real);
+    }
 }
 
 int p3DFFT_to_iR::read(
@@ -89,34 +97,34 @@ int p3DFFT_to_iR::read(
     //read fields
     for (int i = 0; i < this->howmany; i++)
     {
-        proc_print_err_message("p3DFFT_to_iR::read "
-                               "this->f0c->read(ifile0, (void*)this->c0);");
+        DEBUG_MSG("p3DFFT_to_iR::read "
+                  "this->f0c->read(ifile0, (void*)this->c0);\n");
         this->f0c->read(ifile[i], (void*)this->c0);
-        proc_print_err_message("p3DFFT_to_iR::read "
-                               "this->f0c->transpose(this->c0, this->c12);");
+        DEBUG_MSG("p3DFFT_to_iR::read "
+                  "this->f0c->transpose(this->c0, this->c12);\n");
         this->f0c->transpose(this->c0, this->c12);
-        proc_print_err_message("p3DFFT_to_iR::read "
-                               "this->f1c->transpose(this->c12);");
+        DEBUG_MSG("p3DFFT_to_iR::read "
+                  "this->f1c->transpose(this->c12);\n");
         this->f1c->transpose(this->c12);
-        proc_print_err_message("p3DFFT_to_iR::read "
-                               "fftwf_copy_complex_array(");
+        DEBUG_MSG("p3DFFT_to_iR::read "
+                  "fftwf_copy_complex_array(\n");
         fftwf_copy_complex_array(
                 this->f2c, this->c12,
                 this->f3c, this->c3 + i*this->f3c->local_size);
     }
 
-    proc_print_err_message("p3DFFT_to_iR::read "
-                           "this->f3c->interleave(this->c3, 2);");
+    DEBUG_MSG("p3DFFT_to_iR::read "
+              "this->f3c->interleave(this->c3, 2);\n");
     this->f3c->interleave(this->c3, this->howmany);
 
-    proc_print_err_message("p3DFFT_to_iR::read "
-                           "fftwf_execute(this->complex2real);");
+    DEBUG_MSG("p3DFFT_to_iR::read "
+              "fftwf_execute(this->complex2real);\n");
     fftwf_execute(this->complex2real);
 
-    proc_print_err_message("p3DFFT_to_iR::read "
-                           "fftwf_clip_zero_padding(this->f3r, this->r3, 2);");
+    DEBUG_MSG("p3DFFT_to_iR::read "
+              "fftwf_clip_zero_padding(this->f3r, this->r3, 2);\n");
     fftwf_clip_zero_padding(this->f3r, this->r3, this->howmany);
-    proc_print_err_message("p3DFFT_to_iR::read return");
+    DEBUG_MSG("p3DFFT_to_iR::read return\n");
     return EXIT_SUCCESS;
 }
 
diff --git a/src/p3DFFT_to_iR.hpp b/src/p3DFFT_to_iR.hpp
index 0d61c2612684ee3be24ca0fa6b5e43bae9b23a3c..a3adb827a58736c6ba8d944c2a7c93af7f7fef60 100644
--- a/src/p3DFFT_to_iR.hpp
+++ b/src/p3DFFT_to_iR.hpp
@@ -29,6 +29,7 @@ class p3DFFT_to_iR
         fftwf_complex *c12; // array to store transposed input
         fftwf_complex *c3 ; // array to store resized Fourier data
         float *r3         ; // array to store real space data
+        bool fields_allocated;
 
         fftwf_plan complex2real;
 
@@ -36,7 +37,8 @@ class p3DFFT_to_iR
         p3DFFT_to_iR(
                 int n0, int n1, int n2,
                 int N0, int N1, int N2,
-                int howmany);
+                int howmany,
+                bool allocate_fields = true);
         ~p3DFFT_to_iR();
 
         int read(
diff --git a/src/test_field_descriptor.cpp b/src/test_field_descriptor.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6dcb2621edbb78c99ec40630a3d953721f808378
--- /dev/null
+++ b/src/test_field_descriptor.cpp
@@ -0,0 +1,66 @@
+#include "p3DFFT_to_iR.hpp"
+#include "Morton_shuffler.hpp"
+#include <iostream>
+
+int myrank, nprocs;
+
+int main(int argc, char *argv[])
+{
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+
+    int n, N, nfiles, nfields;
+    int test_type = 0;
+    switch(argc)
+    {
+        case 6:
+            test_type = atoi(argv[5]);
+        case 5:
+            n = atoi(argv[1]);
+            N = atoi(argv[2]);
+            nfiles = atoi(argv[3]);
+            nfields = atoi(argv[4]);
+            break;
+        default:
+            std::cerr <<
+                "not enough (or too many) parameters.\naborting." <<
+                std::endl;
+            MPI_Finalize();
+            return EXIT_SUCCESS;
+    }
+    p3DFFT_to_iR *r = new p3DFFT_to_iR(
+            (n/2+1), n, n,
+            N, N, N,
+            nfields,
+            test_type);
+
+    Morton_shuffler *s = new Morton_shuffler(
+            N, N, N, nfields, nfiles);
+
+    if (test_type)
+    {
+        // initialize file names
+        char* ifile[nfields];
+        for (int i=0; i<nfields; i++)
+        {
+            ifile[i] = (char*)malloc(100*sizeof(char));
+            sprintf(ifile[i], "Kdata%d", i);
+        }
+
+        //read
+        r->read(ifile);
+        for (int i = 0; i<nfields; i++)
+            free(ifile[i]);
+        s->shuffle(r->r3, "Rdata");
+
+        delete s;
+    }
+    delete r;
+
+    // clean up
+    fftwf_mpi_cleanup();
+    MPI_Finalize();
+    return EXIT_SUCCESS;
+}
+