diff --git a/makefile b/makefile
index f05972414ec066186a03669c6907470799f0389a..9eb470ce27518da157a908ec4e0e5455977e8bf2 100644
--- a/makefile
+++ b/makefile
@@ -58,7 +58,7 @@ endif
 
 base_files := \
 	field_descriptor \
-	fftwf_tools \
+	fftw_tools \
 	Morton_shuffler \
 	p3DFFT_to_iR \
 	fluid_solver
diff --git a/src/Morton_shuffler.hpp b/src/Morton_shuffler.hpp
index a1e83b86c69417575e1dab3b02ef0e48aab7495a..afb03596a49199f6393ff17d0d858502cdc1ad08 100644
--- a/src/Morton_shuffler.hpp
+++ b/src/Morton_shuffler.hpp
@@ -22,7 +22,7 @@
 #include <stdlib.h>
 #include <iostream>
 #include "field_descriptor.hpp"
-#include "fftwf_tools.hpp"
+#include "fftw_tools.hpp"
 
 #ifndef MORTON_SHUFFLER
 
diff --git a/src/fftwf_tools.cpp b/src/fftw_tools.cpp
similarity index 71%
rename from src/fftwf_tools.cpp
rename to src/fftw_tools.cpp
index 20de89002a540eceab323c006669cb2cb6dd643c..795982531bfde0e7a908b6547d943f35c5c7b9b5 100644
--- a/src/fftwf_tools.cpp
+++ b/src/fftw_tools.cpp
@@ -21,16 +21,16 @@
 #include <stdlib.h>
 #include <algorithm>
 #include <iostream>
-#include "fftwf_tools.hpp"
+#include "fftw_tools.hpp"
 
 
 
-// should I use a template here?
-int fftwf_copy_complex_array(
-        field_descriptor<float> *fi,
-        fftwf_complex *ai,
-        field_descriptor<float> *fo,
-        fftwf_complex *ao)
+template <class rnumber>
+int copy_complex_array(
+        field_descriptor<rnumber> *fi,
+        rnumber (*ai)[2],
+        field_descriptor<rnumber> *fo,
+        rnumber (*ao)[2])
 {
     if (((fi->ndims != 3) ||
          (fo->ndims != 3)) ||
@@ -46,7 +46,7 @@ int fftwf_copy_complex_array(
 
     // clean up destination, in case we're padding with zeros
     // (even if only for one dimension)
-    std::fill_n((float*)ao, fo->local_size, 0.0);
+    std::fill_n((rnumber*)ao, fo->local_size, 0.0);
 
     int64_t ii0, ii1;
     int64_t oi0, oi1;
@@ -74,9 +74,9 @@ int fftwf_copy_complex_array(
             (irank == fi->myrank))
         {
             std::copy(
-                    (float*)(ai + (ii0 - fi->starts[0]    )*fi->slice_size),
-                    (float*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size),
-                    (float*)buffer);
+                    (rnumber*)(ai + (ii0 - fi->starts[0]    )*fi->slice_size),
+                    (rnumber*)(ai + (ii0 - fi->starts[0] + 1)*fi->slice_size),
+                    (rnumber*)buffer);
         }
         else
         {
@@ -119,9 +119,9 @@ int fftwf_copy_complex_array(
                         continue;
                 }
                 std::copy(
-                        (float*)(buffer + ii1*fi->sizes[2]),
-                        (float*)(buffer + ii1*fi->sizes[2] + min_fast_dim),
-                        (float*)(ao +
+                        (rnumber*)(buffer + ii1*fi->sizes[2]),
+                        (rnumber*)(buffer + ii1*fi->sizes[2] + min_fast_dim),
+                        (rnumber*)(ao +
                                  ((oi0 - fo->starts[0])*fo->sizes[1] +
                                   oi1)*fo->sizes[2]));
             }
@@ -133,14 +133,17 @@ int fftwf_copy_complex_array(
     return EXIT_SUCCESS;
 }
 
-int fftwf_clip_zero_padding(
-        field_descriptor<float> *f,
-        float *a,
+
+
+template <class rnumber>
+int clip_zero_padding(
+        field_descriptor<rnumber> *f,
+        rnumber *a,
         int howmany)
 {
     if (f->ndims != 3)
         return EXIT_FAILURE;
-    float *b = a;
+    rnumber *b = a;
     ptrdiff_t copy_size = f->sizes[2] * howmany;
     ptrdiff_t skip_size = copy_size + 2*howmany;
     for (int i0 = 0; i0 < f->subsizes[0]; i0++)
@@ -153,20 +156,46 @@ int fftwf_clip_zero_padding(
     return EXIT_SUCCESS;
 }
 
-int fftwf_get_descriptors_3D(
+
+
+template <class rnumber>
+int get_descriptors_3D(
         int n0, int n1, int n2,
-        field_descriptor<float> **fr,
-        field_descriptor<float> **fc)
+        field_descriptor<rnumber> **fr,
+        field_descriptor<rnumber> **fc)
 {
     int ntmp[3];
     ntmp[0] = n0;
     ntmp[1] = n1;
     ntmp[2] = n2;
-   *fr = new field_descriptor<float>(3, ntmp, MPI_FLOAT, MPI_COMM_WORLD);
+   *fr = new field_descriptor<rnumber>(3, ntmp, MPI_FLOAT, MPI_COMM_WORLD);
     ntmp[0] = n0;
     ntmp[1] = n1;
     ntmp[2] = n2/2+1;
-    *fc = new field_descriptor<float>(3, ntmp, MPI_COMPLEX8, MPI_COMM_WORLD);
+    *fc = new field_descriptor<rnumber>(3, ntmp, MPI_COMPLEX8, MPI_COMM_WORLD);
     return EXIT_SUCCESS;
 }
 
+#define FORCE_IMPLEMENTATION(rnumber) \
+template \
+int copy_complex_array<rnumber>( \
+        field_descriptor<rnumber> *fi, \
+        rnumber (*ai)[2], \
+        field_descriptor<rnumber> *fo, \
+        rnumber (*ao)[2]); \
+ \
+template \
+int clip_zero_padding<rnumber>( \
+        field_descriptor<rnumber> *f, \
+        rnumber *a, \
+        int howmany); \
+ \
+template \
+int get_descriptors_3D<rnumber>( \
+        int n0, int n1, int n2, \
+        field_descriptor<rnumber> **fr, \
+        field_descriptor<rnumber> **fc);
+
+FORCE_IMPLEMENTATION(float)
+FORCE_IMPLEMENTATION(double)
+
diff --git a/src/fftwf_tools.hpp b/src/fftw_tools.hpp
similarity index 76%
rename from src/fftwf_tools.hpp
rename to src/fftw_tools.hpp
index 12cbb300911f5dde102d90828ad1bd1453b1d47f..78fc88c0e912064672af186e299ff732347336fa 100644
--- a/src/fftwf_tools.hpp
+++ b/src/fftw_tools.hpp
@@ -22,9 +22,9 @@
 #include <fftw3-mpi.h>
 #include "field_descriptor.hpp"
 
-#ifndef FFTWF_TOOLS
+#ifndef FFTW_TOOLS
 
-#define FFTWF_TOOLS
+#define FFTW_TOOLS
 
 extern int myrank, nprocs;
 
@@ -32,15 +32,17 @@ extern int myrank, nprocs;
  * Fourier space: either chop off high modes, or pad with zeros.
  * the arrays are assumed to use 3D mpi fftw layout.
  * */
-int fftwf_copy_complex_array(
-        field_descriptor<float> *fi,
-        fftwf_complex *ai,
-        field_descriptor<float> *fo,
-        fftwf_complex *ao);
-
-int fftwf_clip_zero_padding(
-        field_descriptor<float> *f,
-        float *a,
+template <class rnumber>
+int copy_complex_array(
+        field_descriptor<rnumber> *fi,
+        rnumber (*ai)[2],
+        field_descriptor<rnumber> *fo,
+        rnumber (*ao)[2]);
+
+template <class rnumber>
+int clip_zero_padding(
+        field_descriptor<rnumber> *f,
+        rnumber *a,
         int howmany=1);
 
 /* function to get pair of descriptors for real and Fourier space
@@ -51,10 +53,11 @@ int fftwf_clip_zero_padding(
  * 2*fc->local_size, and then the zeros cleaned up before trying
  * to write data.
  * */
-int fftwf_get_descriptors_3D(
+template <class rnumber>
+int get_descriptors_3D(
         int n0, int n1, int n2,
-        field_descriptor<float> **fr,
-        field_descriptor<float> **fc);
+        field_descriptor<rnumber> **fr,
+        field_descriptor<rnumber> **fc);
 
-#endif//FFTWF_TOOLS
+#endif//FFTW_TOOLS
 
diff --git a/src/field_descriptor.cpp b/src/field_descriptor.cpp
index b8a127b54646f3918716cd02305c1f850404ba51..23eaf05986e6df26ff11cfd6336dd0a12c86fa2e 100644
--- a/src/field_descriptor.cpp
+++ b/src/field_descriptor.cpp
@@ -86,9 +86,9 @@ field_descriptor<rnumber>::field_descriptor(
         tsubsizes[i] = this->subsizes[i];
         tstarts[i] = this->starts[i];
     }
-    tsizes[ndims-1] *= sizeof(float);
-    tsubsizes[ndims-1] *= sizeof(float);
-    tstarts[ndims-1] *= sizeof(float);
+    tsizes[ndims-1] *= sizeof(rnumber);
+    tsubsizes[ndims-1] *= sizeof(rnumber);
+    tstarts[ndims-1] *= sizeof(rnumber);
     if (this->mpi_dtype == MPI_COMPLEX8)
     {
         tsizes[ndims-1] *= 2;
@@ -482,4 +482,5 @@ field_descriptor<float>* field_descriptor<float>::get_transpose()
 }
 
 template class field_descriptor<float>;
+template class field_descriptor<double>;
 
diff --git a/src/fluid_solver.cpp b/src/fluid_solver.cpp
index 754ecfa172f9a3125a71b666d091d0fa2b0d03be..e24827858a9a4dbb878967886afe32f5b56686d4 100644
--- a/src/fluid_solver.cpp
+++ b/src/fluid_solver.cpp
@@ -21,7 +21,7 @@
 
 
 #include "fluid_solver.hpp"
-#include "fftwf_tools.hpp"
+#include "fftw_tools.hpp"
 
 
 
@@ -36,7 +36,7 @@ fluid_solver<R>::fluid_solver( \
         int ny, \
         int nz) \
 { \
-    FFTW(get_descriptors_3D)(nz, ny, nx, &this->fr, &this->fc);\
+    get_descriptors_3D<R>(nz, ny, nx, &this->fr, &this->fc);\
     this->cvorticity = FFTW(alloc_complex)(this->fc->local_size*3);\
     this->cvelocity  = FFTW(alloc_complex)(this->fc->local_size*3);\
     this->rvorticity = FFTW(alloc_real)(this->fc->local_size*6);\
diff --git a/src/main_fluid_solver.cpp b/src/main_fluid_solver.cpp
index f673e4a6ffcb7c57812404d80372e4b099a5057a..4c69e86d11597d100e9ccdc9978e82b1c42fd7c6 100644
--- a/src/main_fluid_solver.cpp
+++ b/src/main_fluid_solver.cpp
@@ -18,6 +18,7 @@
 *
 ************************************************************************/
 
+#include "base.hpp"
 #include "fluid_solver.hpp"
 #include <iostream>
 
@@ -29,10 +30,16 @@ int main(int argc, char *argv[])
     MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
     MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
 
-    fluid_solver<float> fs;
+    fluid_solver<float> *fs;
+    fs = new fluid_solver<float>(32, 32, 32);
+    DEBUG_MSG("fluid_solver object created\n");
+
+    delete fs;
+    DEBUG_MSG("fluid_solver object deleted\n");
 
     // clean up
     fftwf_mpi_cleanup();
+    fftw_mpi_cleanup();
     MPI_Finalize();
     return EXIT_SUCCESS;
 }
diff --git a/src/p3DFFT_to_iR.cpp b/src/p3DFFT_to_iR.cpp
index cb4ecba07ee9a040c1e54d3c009881b99c7699ae..60ea120ea7d7adfd7ff049cfd781c1517e2d1cc3 100644
--- a/src/p3DFFT_to_iR.cpp
+++ b/src/p3DFFT_to_iR.cpp
@@ -69,7 +69,7 @@ p3DFFT_to_iR::p3DFFT_to_iR(
 
     // following 3 arguments are dimensions for real space grid dimensions
     // f3r and f3c will be allocated in this call
-    fftwf_get_descriptors_3D(
+    get_descriptors_3D<float>(
             N0, N1, N2,
             &this->f3r, &this->f3c);
 
@@ -141,7 +141,7 @@ int p3DFFT_to_iR::read(
         this->f1c->transpose(this->c12);
         DEBUG_MSG("p3DFFT_to_iR::read "
                   "fftwf_copy_complex_array(\n");
-        fftwf_copy_complex_array(
+        copy_complex_array<float>(
                 this->f2c, this->c12,
                 this->f3c, this->c3 + i*this->f3c->local_size);
     }
@@ -156,7 +156,7 @@ int p3DFFT_to_iR::read(
 
     DEBUG_MSG("p3DFFT_to_iR::read "
               "fftwf_clip_zero_padding(this->f3r, this->r3, 2);\n");
-    fftwf_clip_zero_padding(this->f3r, this->r3, this->howmany);
+    clip_zero_padding<float>(this->f3r, this->r3, this->howmany);
     DEBUG_MSG("p3DFFT_to_iR::read return\n");
     return EXIT_SUCCESS;
 }
diff --git a/src/p3DFFT_to_iR.hpp b/src/p3DFFT_to_iR.hpp
index db52e7968cca7da347eb03647cf6a97e868df4ca..63a25c914656b1b2ac64e01e1bf0a5da10b65793 100644
--- a/src/p3DFFT_to_iR.hpp
+++ b/src/p3DFFT_to_iR.hpp
@@ -22,7 +22,7 @@
 #include <stdlib.h>
 #include <iostream>
 #include "field_descriptor.hpp"
-#include "fftwf_tools.hpp"
+#include "fftw_tools.hpp"
 
 #ifndef P3DFFT_TO_IR