add preliminary full transformer

dd596ee4 · Chichi Lalescu · a71f1f91 · dd596ee4 · dd596ee4 · dd596ee4
Commit dd596ee4 authored 10 years ago by Chichi Lalescu
--- a/Untitled0.ipynb
+++ b/Untitled0.ipynb
--- a/makefile
+++ b/makefile
@@ -13,7 +13,8 @@ vpath %.cpp ./src/

 src := \
 	field_descriptor.cpp \
-	fftwf_tools.cpp
+	fftwf_tools.cpp \
+	RMHD_converter.cpp

 obj := $(patsubst %.cpp, ./obj/%.o, ${src})

@@ -22,11 +23,17 @@ obj := $(patsubst %.cpp, ./obj/%.o, ${src})
 		${CFLAGS} \
 		-c $^ -o $@

+exec = \
+	   transpose \
+	   resize \
+	   resize_and_transpose \
+	   full
+
 transpose: ${obj} ./obj/transpose.o
 	${LINKER} \
 		./obj/transpose.o \
 		${obj} \
-		-o transpose \
+		-o $@ \
 		${LIBS} \
 		${NULL}

@@ -34,7 +41,7 @@ resize: ${obj} ./obj/resize.o
 	${LINKER} \
 		./obj/resize.o \
 		${obj} \
-		-o resize \
+		-o $@ \
 		${LIBS} \
 		${NULL}

@@ -42,10 +49,19 @@ resize_and_transpose: ${obj} ./obj/resize_and_transpose.o
 	${LINKER} \
 		./obj/resize_and_transpose.o \
 		${obj} \
-		-o resize_and_transpose \
+		-o $@ \
+		${LIBS} \
+		${NULL}
+
+full: ${obj} ./obj/full.o
+	${LINKER} \
+		./obj/full.o \
+		${obj} \
+		-o $@ \
 		${LIBS} \
 		${NULL}

 clean:
 	rm ./obj/*.o
-	rm t2D
+	rm -f ${exec}
+
--- a/src/RMHD_converter.cpp
+++ b/src/RMHD_converter.cpp
+#include "RMHD_converter.hpp"
+
+RMHD_converter::RMHD_converter(
+        int n0, int n1, int n2,
+        int N0, int N1, int N2)
+{
+    int n[3];
+
+    // first 3 arguments are dimensions for input array
+    // i.e. actual dimensions for the Fourier representation.
+    // NOT real space grid dimensions
+    // the input array is read in as a 2D array,
+    // since the first dimension must be a multiple of nprocs
+    // (which is generally an even number)
+    n[0] = n0*n1;
+    n[1] = n2;
+    this->f0c = new field_descriptor(2, n, MPI_COMPLEX8);
+
+    // f1c will be pointing at the input array after it has been
+    // transposed in 2D, therefore we have this correspondence:
+    // f0c->sizes[0] = f1c->sizes[1]*f1c->sizes[2]
+    n[0] = n2;
+    n[1] = n0;
+    n[2] = n1;
+    this->f1c = new field_descriptor(3, n, MPI_COMPLEX8);
+
+    // the description for the fully transposed field
+    n[0] = n2;
+    n[1] = n1;
+    n[2] = n0;
+    this->f2c = new field_descriptor(3, n, MPI_COMPLEX8);
+
+    // following 3 arguments are dimensions for output array
+    // i.e. real space grid dimensions
+    // f3r and f3c will be allocated in this call
+    fftwf_get_descriptors_3D(
+            N0, N1, N2,
+            &this->f3r, &this->f3c);
+
+    //allocate fields
+    this->c0  = fftwf_alloc_complex(this->f0c->local_size);
+    this->c12 = fftwf_alloc_complex(this->f1c->local_size);
+    this->c3  = fftwf_alloc_complex(this->f3c->local_size);
+    // 4 instead of 2, because we have 2 fields to write
+    this->r3  = fftwf_alloc_real( 4*this->f3c->local_size);
+
+    // allocate plans
+    this->complex2real0 = fftwf_mpi_plan_dft_c2r_3d(
+            f3r->sizes[0], f3r->sizes[1], f3r->sizes[2],
+            this->c3, this->r3,
+            MPI_COMM_WORLD,
+            FFTW_ESTIMATE);
+    this->complex2real1 = fftwf_mpi_plan_dft_c2r_3d(
+            f3r->sizes[0], f3r->sizes[1], f3r->sizes[2],
+            this->c3, this->r3 + 2*this->f3c->local_size,
+            MPI_COMM_WORLD,
+            FFTW_PATIENT);
+
+    // the description for the output
+    n[0] = N0*2;
+    n[1] = N1;
+    n[2] = N2;
+    this->f4r = new field_descriptor(3, n, MPI_REAL4);
+}
+
+int RMHD_converter::convert(
+        const char *ifile0,
+        const char *ifile1,
+        const char *ofile)
+{
+    //read first field
+    this->f0c->read(ifile0, (void*)this->c0);
+    this->f0c->transpose(this->c0, this->c12);
+    this->f1c->transpose(this->c12);
+    fftwf_copy_complex_array(
+            this->f2c, this->c12,
+            this->f3c, this->c3);
+    fftwf_execute(this->complex2real0);
+
+    //read second field
+    this->f0c->read(ifile1, (void*)this->c0);
+    this->f0c->transpose(this->c0, this->c12);
+    this->f1c->transpose(this->c12);
+    fftwf_copy_complex_array(
+            this->f2c, this->c12,
+            this->f3c, this->c3);
+    fftwf_execute(this->complex2real1);
+
+    fftwf_clip_zero_padding(this->f4r, this->r3);
+
+    //now mix the two components
+    float *atmp = fftwf_alloc_real(this->f4r->slice_size);
+
+    for (int k = 0; k < this->f4r->subsizes[0]; k++)
+    {
+        // put transposed slice in atmp
+        for (int j = 0; j < this->f3r->slice_size; j++)
+            for (int i = 0; i < 2; i++)
+                atmp[i*2 + j] = this->r3[j*2 + i];
+        // copy back transposed slice
+        for (int j = 0; j < 2; j++)
+            for (int i = 0; i < this->f3r->slice_size; i++)
+                this->r3[i*2 + j] = atmp[j*2 + i];
+    }
+    fftwf_free(atmp);
+
+    f4r->write(ofile, (void*)this->r3);
+    return EXIT_SUCCESS;
+}
+
--- a/src/RMHD_converter.hpp
+++ b/src/RMHD_converter.hpp
+#include <stdio.h>
+#include <stdlib.h>
+#include <iostream>
+#include "field_descriptor.hpp"
+#include "fftwf_tools.hpp"
+
+#ifndef RMHD_CONVERTER
+
+#define RMHD_CONVERTER
+
+class RMHD_converter
+{
+    public:
+        /* members */
+        field_descriptor *f0c = NULL; // descriptor for 2D input
+        field_descriptor *f1c = NULL; // descriptor for 2D transposed input
+        field_descriptor *f2c = NULL; // descriptor for 3D fully transposed input
+        field_descriptor *f3c = NULL, *f3r=NULL; // descriptors for FFT
+        field_descriptor *f4r = NULL; //descriptor for output
+
+        fftwf_complex *c0  = NULL; // array to store 2D input
+        fftwf_complex *c12 = NULL; // array to store transposed input
+        fftwf_complex *c3  = NULL; // array to store resized Fourier data
+        float *r3          = NULL; // array to store real space data
+
+        fftwf_plan complex2real0;
+        fftwf_plan complex2real1;
+
+        /* methods */
+        RMHD_converter(
+                int n0, int n1, int n2,
+                int N0, int N1, int N2);
+        ~RMHD_converter()
+        {
+            if (this->f0c != NULL) delete this->f0c;
+            if (this->f1c != NULL) delete this->f1c;
+            if (this->f2c != NULL) delete this->f2c;
+            if (this->f3c != NULL) delete this->f3c;
+            if (this->f3r != NULL) delete this->f3r;
+            if (this->f4r != NULL) delete this->f4r;
+
+            if (this->c0  != NULL) fftwf_free(this->c0);
+            if (this->c12 != NULL) fftwf_free(this->c12);
+            if (this->c3  != NULL) fftwf_free(this->c3);
+            if (this->r3  != NULL) fftwf_free(this->r3);
+
+            fftwf_destroy_plan(this->complex2real0);
+            fftwf_destroy_plan(this->complex2real1);
+        }
+
+        int convert(
+                const char *ifile0,
+                const char *ifile1,
+                const char *ofile);
+};
+
+#endif//RMHD_CONVERTER
+
--- a/src/fftwf_tools.cpp
+++ b/src/fftwf_tools.cpp
+#include <stdlib.h>
+#include <algorithm>
+#include <iostream>
+#include "field_descriptor.hpp"
+
+extern int myrank, nprocs;
+
+// should I use a template here?
+int fftwf_copy_complex_array(
+        field_descriptor *fi,
+        fftwf_complex *ai,
+        field_descriptor *fo,
+        fftwf_complex *ao)
+{
+    if ((fi->ndims != 3) || (fo->ndims != 3))
+        return EXIT_FAILURE;
+    fftwf_complex *buffer;
+    buffer = fftwf_alloc_complex(fi->slice_size);
+
+    int min_fast_dim;
+    min_fast_dim =
+        (fi->sizes[fi->ndims - 1] > fo->sizes[fi->ndims - 1]) ?
+         fo->sizes[fi->ndims - 1] : fi->sizes[fi->ndims - 1];
+
+    // clean up destination, in case we're padding with zeros
+    // (even if only for one dimension)
+    std::fill_n((float*)ao, fo->local_size, 0.0);
+
+    int64_t ii0, ii1;
+    int64_t oi0, oi1;
+    int64_t delta1, delta0;
+    delta0 = (fo->sizes[0] - fi->sizes[0]);
+    delta1 = (fo->sizes[1] - fi->sizes[1]);
+    for (ii0=0; ii0 < fi->sizes[0]; ii0++)
+    {
+        if (ii0 <= fi->sizes[0]/2)
+        {
+            oi0 = ii0;
+            if (oi0 > fo->sizes[0]/2)
+                continue;
+        }
+        else
+        {
+            oi0 = ii0 + delta0;
+            if ((oi0 < 0) || ((fo->sizes[0] - oi0) >= fo->sizes[0]/2))
+                continue;
+        }
+        if ((fi->rank(ii0) == fo->rank(oi0)) &&
+            (myrank == fi->rank(ii0)))
+        {
+            std::copy(
+                    ai + (ii0 - fi->starts[0]    )*fi->slice_size,
+                    ai + (ii0 - fi->starts[0] + 1)*fi->slice_size,
+                    buffer);
+        }
+        else
+        {
+            if (myrank == fi->rank(ii0))
+            {
+                MPI_Send(
+                        (void*)(ai + (ii0-fi->starts[0])*fi->slice_size),
+                        fi->slice_size,
+                        MPI_COMPLEX8,
+                        fo->rank(oi0),
+                        ii0,
+                        MPI_COMM_WORLD);
+            }
+            if (myrank == fo->rank(oi0))
+            {
+                MPI_Recv(
+                        (void*)(buffer),
+                        fi->slice_size,
+                        MPI_COMPLEX8,
+                        fi->rank(ii0),
+                        ii0,
+                        MPI_COMM_WORLD,
+                        MPI_STATUS_IGNORE);
+            }
+        }
+        if (myrank == fo->rank(oi0))
+        {
+            for (ii1 = 0; ii1 < fi->sizes[1]; ii1++)
+            {
+                if (ii1 <= fi->sizes[1]/2)
+                {
+                    oi1 = ii1;
+                    if (oi1 > fo->sizes[1]/2)
+                        continue;
+                }
+                else
+                {
+                    oi1 = ii1 + delta1;
+                    if ((oi1 < 0) || ((fo->sizes[1] - oi1) >= fo->sizes[1]/2))
+                        continue;
+                }
+                std::copy(
+                        (buffer + ii1*fi->sizes[2]),
+                        (buffer + ii1*fi->sizes[2] + min_fast_dim),
+                        (ao + ((oi0 - fo->starts[0])*fo->sizes[1] + oi1)*fo->sizes[2]));
+            }
+        }
+    }
+    fftw_free(buffer);
+    MPI_Barrier(MPI_COMM_WORLD);
+
+    return EXIT_SUCCESS;
+}
+
+int fftwf_clip_zero_padding(
+        field_descriptor *f,
+        float *a)
+{
+    if (f->ndims != 3)
+        return EXIT_FAILURE;
+    float *b = a;
+    for (int i0 = 0; i0 < f->subsizes[0]; i0++)
+        for (int i1 = 0; i1 < f->sizes[1]; i1++)
+        {
+            std::copy(a, a + f->sizes[2], b);
+            a += f->sizes[2] + 2;
+            b += f->sizes[2];
+        }
+    return EXIT_SUCCESS;
+}
+
+int fftwf_get_descriptors_3D(
+        int n0, int n1, int n2,
+        field_descriptor **fr,
+        field_descriptor **fc)
+{
+    // I need to check whether there's already something there...
+    if (*fr != NULL) delete *fr;
+    if (*fc != NULL) delete *fc;
+    int ntmp[3];
+    ntmp[0] = n0;
+    ntmp[1] = n1;
+    ntmp[2] = n2;
+    *fr = new field_descriptor(3, ntmp, MPI_REAL4);
+    ntmp[0] = n0;
+    ntmp[1] = n1;
+    ntmp[2] = n2/2+1;
+    *fc = new field_descriptor(3, ntmp, MPI_COMPLEX8);
+    return EXIT_SUCCESS;
+}
+
--- a/src/fftwf_tools.hpp
+++ b/src/fftwf_tools.hpp
+#include <mpi.h>
+#include <fftw3-mpi.h>
+
+#ifndef FFTWF_TOOLS
+
+#define FFTWF_TOOLS
+
+/* given two arrays of the same dimension, we do a simple resize in
+ * Fourier space: either chop off high modes, or pad with zeros.
+ * the arrays are assumed to use 3D mpi fftw layout.
+ * */
+int fftwf_copy_complex_array(
+        field_descriptor *fi,
+        fftwf_complex *ai,
+        field_descriptor *fo,
+        fftwf_complex *ao);
+
+int fftwf_clip_zero_padding(
+        field_descriptor *f,
+        float *a);
+
+/* function to get pair of descriptors for real and Fourier space
+ * arrays used with fftw.
+ * the n0, n1, n2 correspond to the real space data WITHOUT the zero
+ * padding that FFTW needs.
+ * IMPORTANT: the real space array must be allocated with
+ * 2*fc->local_size, and then the zeros cleaned up before trying
+ * to write data.
+ * */
+int fftwf_get_descriptors_3D(
+        int n0, int n1, int n2,
+        field_descriptor **fr,
+        field_descriptor **fc);
+
+#endif//FFTWF_TOOLS
+
--- a/src/field_descriptor.hpp
+++ b/src/field_descriptor.hpp
 #include <mpi.h>
 #include <fftw3-mpi.h>

-#ifndef __FIELD_DESCRIPTOR__
+#ifndef FIELD_DESCRIPTOR

-#define __FIELD_DESCRIPTOR__
+#define FIELD_DESCRIPTOR

 class field_descriptor
 {
@@ -14,7 +14,7 @@ class field_descriptor
        int *subsizes = NULL;
        int *starts   = NULL;
        int ndims;
-        int slice_size, local_size, full_size;
+        ptrdiff_t slice_size, local_size, full_size;
        MPI_Datatype mpi_array_dtype, mpi_dtype;


@@ -72,5 +72,5 @@ int fftwf_clip_zero_padding(
        field_descriptor *f,
        float *a);

-#endif//__FIELD_DESCRIPTOR__
+#endif//FIELD_DESCRIPTOR

--- a/src/full.cpp
+++ b/src/full.cpp
+#include "RMHD_converter.hpp"
+
+int myrank, nprocs;
+
+int main(int argc, char *argv[])
+{
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+
+    RMHD_converter *bla = new RMHD_converter(
+            atoi(argv[1]), atoi(argv[2]), atoi(argv[3]),
+            atoi(argv[4]), atoi(argv[5]), atoi(argv[6]));
+
+    bla->convert("Kdata0", "Kdata1", "Rdata");
+    delete bla;
+
+    // clean up
+    fftwf_mpi_cleanup();
+    MPI_Finalize();
+    return EXIT_SUCCESS;
+}
+
--- a/src/resize.cpp
+++ b/src/resize.cpp
@@ -2,37 +2,10 @@
 #include <stdlib.h>
 #include <iostream>
 #include "field_descriptor.hpp"
+#include "fftwf_tools.hpp"

 int myrank, nprocs;

-/* function to get pair of descriptors for real and Fourier space
- * arrays used with fftw.
- * the n0, n1, n2 correspond to the real space data WITHOUT the zero
- * padding that FFTW needs.
- * IMPORTANT: the real space array must be allocated with
- * 2*fc->local_size, and then the zeros cleaned up before trying
- * to write data.
- * */
-int fftwf_get_descriptors_3D(
-        int n0, int n1, int n2,
-        field_descriptor **fr,
-        field_descriptor **fc)
-{
-    // I need to check whether there's already something there...
-    if (*fr != NULL) delete *fr;
-    if (*fc != NULL) delete *fc;
-    int ntmp[3];
-    ntmp[0] = n0;
-    ntmp[1] = n1;
-    ntmp[2] = n2;
-    *fr = new field_descriptor(3, ntmp, MPI_REAL4);
-    ntmp[0] = n0;
-    ntmp[1] = n1;
-    ntmp[2] = n2/2+1;
-    *fc = new field_descriptor(3, ntmp, MPI_COMPLEX8);
-    return EXIT_SUCCESS;
-}
-
 int main(int argc, char *argv[])
 {
    MPI_Init(&argc, &argv);

--- a/src/resize_and_transpose.cpp
+++ b/src/resize_and_transpose.cpp
+#include <stdio.h>
+#include <stdlib.h>
+#include <iostream>
+#include "field_descriptor.hpp"
+#include "fftwf_tools.hpp"
+
+int myrank, nprocs;
+
+int main(int argc, char *argv[])
+{
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+    fftwf_mpi_init();
+
+    field_descriptor *f0c = NULL; // 2D input descriptor, we need to transpose it
+    field_descriptor *f1c = NULL; // descriptor for 3D fully transposed input
+    field_descriptor *f2c = NULL, *f2r=NULL; // descriptors for output
+
+    if (argc != 7)
+    {
+        MPI_Finalize();
+        printf("wrong number of parameters.\n");
+        return EXIT_SUCCESS;
+    }
+
+    // first 3 arguments are dimensions for input array
+    // i.e. actual dimensions for the Fourier representation.
+    // NOT real space grid dimensions
+    int ni[3];
+
+    // the input array is read in as a 2D array,
+    // since the first dimension must be a multiple of nprocs
+    // (which is generally an even number)
+    ni[0] = atoi(argv[1])*atoi(argv[2]);
+    ni[1] = atoi(argv[3]);
+    f0c = new field_descriptor(2, ni, MPI_COMPLEX8);
+    // f1c will be pointing at the input array after it has been
+    // transposed, therefore we have this correspondence:
+    // f0c->sizes[0] = f1c->sizes[1]*f1c->sizes[2]
+    ni[0] = atoi(argv[3]);
+    ni[1] = atoi(argv[1]);
+    ni[2] = atoi(argv[2]);
+    f1c = new field_descriptor(3, ni, MPI_COMPLEX8);
+
+    fftwf_complex *c0, *c1, *c2;
+    c0 = fftwf_alloc_complex(f0c->local_size);
+    f0c->read("data0c", (void*)c0);
+    c1 = fftwf_alloc_complex(f1c->local_size);
+    f0c->transpose(c0, c1);
+    // we don't need c0 anymore
+    fftwf_free(c0);
+
+    // transpose last two dimensions here
+    f1c->transpose(c1);
+
+    // clear f1c, and put in the description for the fully
+    // transposed field
+    delete f1c;
+    ni[0] = atoi(argv[3]);
+    ni[1] = atoi(argv[2]);
+    ni[2] = atoi(argv[1]);
+    f1c = new field_descriptor(3, ni, MPI_COMPLEX8);
+
+    // following 3 arguments are dimensions for output array
+    // i.e. real space grid dimensions
+    // f2r and f2c will be allocated in this call
+    fftwf_get_descriptors_3D(
+            atoi(argv[4]), atoi(argv[5]), atoi(argv[6]),
+            &f2r, &f2c);
+
+    c2 = fftwf_alloc_complex(f2c->local_size);
+
+    // pad input array with zeros
+    // or call it Fourier interpolation if you like
+    fftwf_copy_complex_array(
+            f1c, c1,
+            f2c, c2);
+
+    // we don't need c1 anymore
+    fftwf_free(c1);
+
+    float *r2;
+    r2 = fftwf_alloc_real(2*f2c->local_size);
+
+    fftwf_plan c2r = fftwf_mpi_plan_dft_c2r_3d(
+            f2r->sizes[0], f2r->sizes[1], f2r->sizes[2],
+            c2, r2,
+            MPI_COMM_WORLD,
+            FFTW_ESTIMATE);
+    fftwf_execute(c2r);
+    fftwf_destroy_plan(c2r);
+
+    // we don't need c2 anymore
+    fftw_free(c2);
+
+    fftwf_clip_zero_padding(f2r, r2);
+    f2r->write("data2r", (void*)r2);
+
+    // clean up
+    fftw_free(r2);
+    fftwf_mpi_cleanup();
+    delete f0c;
+    delete f1c;
+    delete f2r;
+    delete f2c;
+    MPI_Finalize();
+    return EXIT_SUCCESS;
+}
+