Commit ea7af0ef authored by Rainer Weinberger's avatar Rainer Weinberger

clang-format applied also to src/*/*/*.c and src/*/*/*.h files

parent 1b54e20b
......@@ -37,8 +37,8 @@
#include "../main/allvars.h"
#include "../main/proto.h"
#include "add_bggrid.h"
#include "../domain/domain.h"
#include "add_bggrid.h"
#ifdef ADDBACKGROUNDGRID
......
......@@ -351,7 +351,7 @@ void fof_prepare_output_order(void)
aux_sort[i].DM_BindingEnergy = PS[i].BindingEnergy;
#endif /* #ifdef SUBFIND */
aux_sort[i].Type = P[i].Type;
aux_sort[i].ID = P[i].ID;
aux_sort[i].ID = P[i].ID;
#if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT)
aux_sort[i].FileOrder = P[i].FileOrder;
#endif /* #if defined(RECOMPUTE_POTENTIAL_IN_SNAPSHOT) */
......
......@@ -82,21 +82,17 @@
* - 26.05.2018 Prepared file for public release -- Rainer Weinberger
*/
#include <math.h>
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "../../main/allvars.h"
#include "../../main/proto.h"
#if defined(PMGRID)
#ifndef FFT_COLUMN_BASED
/*! \brief Initializes slab based FFT.
*
......@@ -107,12 +103,12 @@
*
* \return void
*/
void my_slab_based_fft_init(fft_plan * plan, int NgridX, int NgridY, int NgridZ)
void my_slab_based_fft_init(fft_plan *plan, int NgridX, int NgridY, int NgridZ)
{
subdivide_evenly(NgridX, NTask, ThisTask, &plan->slabstart_x, &plan->nslab_x);
subdivide_evenly(NgridY, NTask, ThisTask, &plan->slabstart_y, &plan->nslab_y);
plan->slab_to_task = (int *) mymalloc("slab_to_task", NgridX * sizeof(int));
plan->slab_to_task = (int *)mymalloc("slab_to_task", NgridX * sizeof(int));
for(int task = 0; task < NTask; task++)
{
......@@ -127,29 +123,28 @@ void my_slab_based_fft_init(fft_plan * plan, int NgridX, int NgridY, int NgridZ)
MPI_Allreduce(&plan->nslab_x, &plan->largest_x_slab, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
MPI_Allreduce(&plan->nslab_y, &plan->largest_y_slab, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
plan->slabs_x_per_task = (int *) mymalloc("slabs_x_per_task", NTask * sizeof(int));
plan->slabs_x_per_task = (int *)mymalloc("slabs_x_per_task", NTask * sizeof(int));
MPI_Allgather(&plan->nslab_x, 1, MPI_INT, plan->slabs_x_per_task, 1, MPI_INT, MPI_COMM_WORLD);
plan->first_slab_x_of_task = (int *) mymalloc("first_slab_x_of_task", NTask * sizeof(int));
plan->first_slab_x_of_task = (int *)mymalloc("first_slab_x_of_task", NTask * sizeof(int));
MPI_Allgather(&plan->slabstart_x, 1, MPI_INT, plan->first_slab_x_of_task, 1, MPI_INT, MPI_COMM_WORLD);
plan->slabs_y_per_task = (int *) mymalloc("slabs_y_per_task", NTask * sizeof(int));
plan->slabs_y_per_task = (int *)mymalloc("slabs_y_per_task", NTask * sizeof(int));
MPI_Allgather(&plan->nslab_y, 1, MPI_INT, plan->slabs_y_per_task, 1, MPI_INT, MPI_COMM_WORLD);
plan->first_slab_y_of_task = (int *) mymalloc("first_slab_y_of_task", NTask * sizeof(int));
plan->first_slab_y_of_task = (int *)mymalloc("first_slab_y_of_task", NTask * sizeof(int));
MPI_Allgather(&plan->slabstart_y, 1, MPI_INT, plan->first_slab_y_of_task, 1, MPI_INT, MPI_COMM_WORLD);
plan->NgridX = NgridX;
plan->NgridY = NgridY;
plan->NgridZ = NgridZ;
int Ngridz = NgridZ / 2 + 1; /* dimension needed in complex space */
int Ngridz = NgridZ / 2 + 1; /* dimension needed in complex space */
plan->Ngridz = Ngridz;
plan->Ngrid2 = 2 * Ngridz;
}
/*! \brief Transposes the array field.
*
* The array field is transposed such that the data in x direction is local
......@@ -164,7 +159,7 @@ void my_slab_based_fft_init(fft_plan * plan, int NgridX, int NgridY, int NgridZ)
*
* \return void
*/
void my_slab_transposeA(fft_plan * plan, fft_real * field, fft_real * scratch)
void my_slab_transposeA(fft_plan *plan, fft_real *field, fft_real *scratch)
{
int n, prod, task, flag_big = 0, flag_big_all = 0;
......@@ -172,21 +167,21 @@ void my_slab_transposeA(fft_plan * plan, fft_real * field, fft_real * scratch)
for(n = 0; n < prod; n++)
{
int x = n / NTask;
int x = n / NTask;
int task = n % NTask;
int y;
for(y = plan->first_slab_y_of_task[task]; y < plan->first_slab_y_of_task[task] + plan->slabs_y_per_task[task]; y++)
memcpy(scratch + ((size_t) plan->NgridZ) * (plan->first_slab_y_of_task[task] * plan->nslab_x
+ x * plan->slabs_y_per_task[task] + (y - plan->first_slab_y_of_task[task])),
field + ((size_t) plan->Ngrid2) * (plan->NgridY * x + y), plan->NgridZ * sizeof(fft_real));
memcpy(scratch + ((size_t)plan->NgridZ) * (plan->first_slab_y_of_task[task] * plan->nslab_x +
x * plan->slabs_y_per_task[task] + (y - plan->first_slab_y_of_task[task])),
field + ((size_t)plan->Ngrid2) * (plan->NgridY * x + y), plan->NgridZ * sizeof(fft_real));
}
size_t *scount = (size_t *) mymalloc("scount", NTask * sizeof(size_t));
size_t *rcount = (size_t *) mymalloc("rcount", NTask * sizeof(size_t));
size_t *soff = (size_t *) mymalloc("soff", NTask * sizeof(size_t));
size_t *roff = (size_t *) mymalloc("roff", NTask * sizeof(size_t));
size_t *scount = (size_t *)mymalloc("scount", NTask * sizeof(size_t));
size_t *rcount = (size_t *)mymalloc("rcount", NTask * sizeof(size_t));
size_t *soff = (size_t *)mymalloc("soff", NTask * sizeof(size_t));
size_t *roff = (size_t *)mymalloc("roff", NTask * sizeof(size_t));
for(task = 0; task < NTask; task++)
{
......@@ -210,7 +205,6 @@ void my_slab_transposeA(fft_plan * plan, fft_real * field, fft_real * scratch)
myfree(scount);
}
/*! \brief Undo the transposition of the array field.
*
* The transposition of the array field is undone such that the data in
......@@ -224,14 +218,14 @@ void my_slab_transposeA(fft_plan * plan, fft_real * field, fft_real * scratch)
*
* \return void
*/
void my_slab_transposeB(fft_plan * plan, fft_real * field, fft_real * scratch)
void my_slab_transposeB(fft_plan *plan, fft_real *field, fft_real *scratch)
{
int n, prod, task, flag_big = 0, flag_big_all = 0;
size_t *scount = (size_t *) mymalloc("scount", NTask * sizeof(size_t));
size_t *rcount = (size_t *) mymalloc("rcount", NTask * sizeof(size_t));
size_t *soff = (size_t *) mymalloc("soff", NTask * sizeof(size_t));
size_t *roff = (size_t *) mymalloc("roff", NTask * sizeof(size_t));
size_t *scount = (size_t *)mymalloc("scount", NTask * sizeof(size_t));
size_t *rcount = (size_t *)mymalloc("rcount", NTask * sizeof(size_t));
size_t *soff = (size_t *)mymalloc("soff", NTask * sizeof(size_t));
size_t *roff = (size_t *)mymalloc("roff", NTask * sizeof(size_t));
for(task = 0; task < NTask; task++)
{
......@@ -258,18 +252,18 @@ void my_slab_transposeB(fft_plan * plan, fft_real * field, fft_real * scratch)
for(n = 0; n < prod; n++)
{
int x = n / NTask;
int x = n / NTask;
int task = n % NTask;
int y;
for(y = plan->first_slab_y_of_task[task]; y < plan->first_slab_y_of_task[task] + plan->slabs_y_per_task[task]; y++)
memcpy(field + ((size_t) plan->Ngrid2) * (plan->NgridY * x + y),
scratch + ((size_t) plan->NgridZ) * (plan->first_slab_y_of_task[task] * plan->nslab_x + x * plan->slabs_y_per_task[task] + (y - plan->first_slab_y_of_task[task])),
memcpy(field + ((size_t)plan->Ngrid2) * (plan->NgridY * x + y),
scratch + ((size_t)plan->NgridZ) * (plan->first_slab_y_of_task[task] * plan->nslab_x +
x * plan->slabs_y_per_task[task] + (y - plan->first_slab_y_of_task[task])),
plan->NgridZ * sizeof(fft_real));
}
}
/* \brief Transpose a slab decomposed 3D field.
*
* Given a slab-decomposed 3D field a[...] with total dimension
......@@ -299,21 +293,21 @@ void my_slab_transposeB(fft_plan * plan, fft_real * field, fft_real * scratch)
*/
static void my_slab_transpose(void *av, void *bv, int *sx, int *firstx, int *sy, int *firsty, int nx, int ny, int nz, int mode)
{
char *a = (char *) av;
char *b = (char *) bv;
char *a = (char *)av;
char *b = (char *)bv;
size_t *scount = (size_t *) mymalloc("scount", NTask * sizeof(size_t));
size_t *rcount = (size_t *) mymalloc("rcount", NTask * sizeof(size_t));
size_t *soff = (size_t *) mymalloc("soff", NTask * sizeof(size_t));
size_t *roff = (size_t *) mymalloc("roff", NTask * sizeof(size_t));
size_t *scount = (size_t *)mymalloc("scount", NTask * sizeof(size_t));
size_t *rcount = (size_t *)mymalloc("rcount", NTask * sizeof(size_t));
size_t *soff = (size_t *)mymalloc("soff", NTask * sizeof(size_t));
size_t *roff = (size_t *)mymalloc("roff", NTask * sizeof(size_t));
int i, n, prod, flag_big = 0, flag_big_all = 0;
for(i = 0; i < NTask; i++)
{
scount[i] = sy[i] * sx[ThisTask] * ((size_t) nz);
rcount[i] = sy[ThisTask] * sx[i] * ((size_t) nz);
soff[i] = firsty[i] * sx[ThisTask] * ((size_t) nz);
roff[i] = sy[ThisTask] * firstx[i] * ((size_t) nz);
scount[i] = sy[i] * sx[ThisTask] * ((size_t)nz);
rcount[i] = sy[ThisTask] * sx[i] * ((size_t)nz);
soff[i] = firsty[i] * sx[ThisTask] * ((size_t)nz);
roff[i] = sy[ThisTask] * firstx[i] * ((size_t)nz);
if(scount[i] * sizeof(fft_complex) > MPI_MESSAGE_SIZELIMIT_IN_BYTES)
flag_big = 1;
......@@ -335,7 +329,8 @@ static void my_slab_transpose(void *av, void *bv, int *sx, int *firstx, int *sy,
int j;
for(j = 0; j < sy[i]; j++)
memcpy(b + (k * sy[i] + j + firsty[i] * sx[ThisTask]) * (nz * sizeof(fft_complex)), a + (k * ny + (firsty[i] + j)) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex));
memcpy(b + (k * sy[i] + j + firsty[i] * sx[ThisTask]) * (nz * sizeof(fft_complex)),
a + (k * ny + (firsty[i] + j)) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex));
}
/* tranfer the data */
......@@ -350,7 +345,8 @@ static void my_slab_transpose(void *av, void *bv, int *sx, int *firstx, int *sy,
int k;
for(k = 0; k < sx[i]; k++)
memcpy(b + (j * nx + k + firstx[i]) * (nz * sizeof(fft_complex)), a + ((k + firstx[i]) * sy[ThisTask] + j) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex));
memcpy(b + (j * nx + k + firstx[i]) * (nz * sizeof(fft_complex)),
a + ((k + firstx[i]) * sy[ThisTask] + j) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex));
}
}
else
......@@ -364,7 +360,8 @@ static void my_slab_transpose(void *av, void *bv, int *sx, int *firstx, int *sy,
int k;
for(k = 0; k < sx[i]; k++)
memcpy(b + ((k + firstx[i]) * sy[ThisTask] + j) * (nz * sizeof(fft_complex)), a + (j * nx + k + firstx[i]) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex));
memcpy(b + ((k + firstx[i]) * sy[ThisTask] + j) * (nz * sizeof(fft_complex)),
a + (j * nx + k + firstx[i]) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex));
}
/* tranfer the data */
......@@ -379,7 +376,8 @@ static void my_slab_transpose(void *av, void *bv, int *sx, int *firstx, int *sy,
int j;
for(j = 0; j < sy[i]; j++)
memcpy(b + (k * ny + (firsty[i] + j)) * (nz * sizeof(fft_complex)), a + (k * sy[i] + j + firsty[i] * sx[ThisTask]) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex));
memcpy(b + (k * ny + (firsty[i] + j)) * (nz * sizeof(fft_complex)),
a + (k * sy[i] + j + firsty[i] * sx[ThisTask]) * (nz * sizeof(fft_complex)), nz * sizeof(fft_complex));
}
}
/* now the result is in b[] */
......@@ -390,7 +388,6 @@ static void my_slab_transpose(void *av, void *bv, int *sx, int *firstx, int *sy,
myfree(scount);
}
/*! \brief Performs a slab-based Fast Fourier transformation.
*
* \param[in] plan FFT plan.
......@@ -400,24 +397,24 @@ static void my_slab_transpose(void *av, void *bv, int *sx, int *firstx, int *sy,
*
* \return void
*/
void my_slab_based_fft(fft_plan * plan, void *data, void *workspace, int forward)
void my_slab_based_fft(fft_plan *plan, void *data, void *workspace, int forward)
{
int n, prod;
int slabsx = plan->slabs_x_per_task[ThisTask];
int slabsy = plan->slabs_y_per_task[ThisTask];
int ngridx = plan->NgridX;
int ngridy = plan->NgridY;
int ngridz = plan->Ngridz;
int ngridx = plan->NgridX;
int ngridy = plan->NgridY;
int ngridz = plan->Ngridz;
int ngridz2 = 2 * ngridz;
size_t ngridx_long = ngridx;
size_t ngridy_long = ngridy;
size_t ngridz_long = ngridz;
size_t ngridx_long = ngridx;
size_t ngridy_long = ngridy;
size_t ngridz_long = ngridz;
size_t ngridz2_long = ngridz2;
fft_real *data_real = (fft_real *) data;
fft_complex *data_complex = (fft_complex *) data, *workspace_complex = (fft_complex *) workspace;
fft_real *data_real = (fft_real *)data;
fft_complex *data_complex = (fft_complex *)data, *workspace_complex = (fft_complex *)workspace;
if(forward == 1)
{
......@@ -425,10 +422,9 @@ void my_slab_based_fft(fft_plan * plan, void *data, void *workspace, int forward
prod = slabsx * ngridy;
for(n = 0; n < prod; n++)
{
FFTW(execute_dft_r2c) (plan->forward_plan_zdir, data_real + n * ngridz2_long, workspace_complex + n * ngridz_long);
FFTW(execute_dft_r2c)(plan->forward_plan_zdir, data_real + n * ngridz2_long, workspace_complex + n * ngridz_long);
}
/* do the y-direction FFT, complex to complex */
prod = slabsx * ngridz;
for(n = 0; n < prod; n++)
......@@ -436,13 +432,15 @@ void my_slab_based_fft(fft_plan * plan, void *data, void *workspace, int forward
int i = n / ngridz;
int j = n % ngridz;
FFTW(execute_dft) (plan->forward_plan_ydir, workspace_complex + i * ngridz * ngridy_long + j, data_complex + i * ngridz * ngridy_long + j);
FFTW(execute_dft)
(plan->forward_plan_ydir, workspace_complex + i * ngridz * ngridy_long + j, data_complex + i * ngridz * ngridy_long + j);
}
/* now our data resides in data_complex[] */
/* do the transpose */
my_slab_transpose(data_complex, workspace_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task, plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 0);
my_slab_transpose(data_complex, workspace_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task,
plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 0);
/* now the data is in workspace_complex[] */
......@@ -453,7 +451,8 @@ void my_slab_based_fft(fft_plan * plan, void *data, void *workspace, int forward
int i = n / ngridz;
int j = n % ngridz;
FFTW(execute_dft) (plan->forward_plan_xdir, workspace_complex + i * ngridz * ngridx_long + j, data_complex + i * ngridz * ngridx_long + j);
FFTW(execute_dft)
(plan->forward_plan_xdir, workspace_complex + i * ngridz * ngridx_long + j, data_complex + i * ngridz * ngridx_long + j);
}
/* now the result is in data_complex[] */
......@@ -467,10 +466,12 @@ void my_slab_based_fft(fft_plan * plan, void *data, void *workspace, int forward
int i = n / ngridz;
int j = n % ngridz;
FFTW(execute_dft) (plan->backward_plan_xdir, data_complex + i * ngridz * ngridx_long + j, workspace_complex + i * ngridz * ngridx_long + j);
FFTW(execute_dft)
(plan->backward_plan_xdir, data_complex + i * ngridz * ngridx_long + j, workspace_complex + i * ngridz * ngridx_long + j);
}
my_slab_transpose(workspace_complex, data_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task, plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 1);
my_slab_transpose(workspace_complex, data_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task,
plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 1);
prod = slabsx * ngridz;
......@@ -479,21 +480,21 @@ void my_slab_based_fft(fft_plan * plan, void *data, void *workspace, int forward
int i = n / ngridz;
int j = n % ngridz;
FFTW(execute_dft) (plan->backward_plan_ydir, data_complex + i * ngridz * ngridy_long + j, workspace_complex + i * ngridz * ngridy_long + j);
FFTW(execute_dft)
(plan->backward_plan_ydir, data_complex + i * ngridz * ngridy_long + j, workspace_complex + i * ngridz * ngridy_long + j);
}
prod = slabsx * ngridy;
for(n = 0; n < prod; n++)
{
FFTW(execute_dft_c2r) (plan->backward_plan_zdir, workspace_complex + n * ngridz_long, data_real + n * ngridz2_long);
FFTW(execute_dft_c2r)(plan->backward_plan_zdir, workspace_complex + n * ngridz_long, data_real + n * ngridz2_long);
}
/* now the result is in data[] */
}
}
/*! \brief Performs a slab-based complex to complex Fast Fourier
* transformation.
*
......@@ -504,7 +505,7 @@ void my_slab_based_fft(fft_plan * plan, void *data, void *workspace, int forward
*
* \return void
*/
void my_slab_based_fft_c2c(fft_plan * plan, void *data, void *workspace, int forward)
void my_slab_based_fft_c2c(fft_plan *plan, void *data, void *workspace, int forward)
{
int n, prod;
int slabsx = plan->slabs_x_per_task[ThisTask];
......@@ -518,8 +519,8 @@ void my_slab_based_fft_c2c(fft_plan * plan, void *data, void *workspace, int for
size_t ngridy_long = ngridy;
size_t ngridz_long = ngridz;
fft_complex *data_start = (fft_complex *) data;
fft_complex *data_complex = (fft_complex *) data, *workspace_complex = (fft_complex *) workspace;
fft_complex *data_start = (fft_complex *)data;
fft_complex *data_complex = (fft_complex *)data, *workspace_complex = (fft_complex *)workspace;
if(forward == 1)
{
......@@ -527,7 +528,7 @@ void my_slab_based_fft_c2c(fft_plan * plan, void *data, void *workspace, int for
prod = slabsx * ngridy;
for(n = 0; n < prod; n++)
{
FFTW(execute_dft) (plan->forward_plan_zdir, data_start + n * ngridz, workspace_complex + n * ngridz);
FFTW(execute_dft)(plan->forward_plan_zdir, data_start + n * ngridz, workspace_complex + n * ngridz);
}
/* do the y-direction FFT, complex to complex */
......@@ -537,13 +538,15 @@ void my_slab_based_fft_c2c(fft_plan * plan, void *data, void *workspace, int for
int i = n / ngridz;
int j = n % ngridz;
FFTW(execute_dft) (plan->forward_plan_ydir, workspace_complex + i * ngridz * ngridy_long + j, data_complex + i * ngridz * ngridy_long + j);
FFTW(execute_dft)
(plan->forward_plan_ydir, workspace_complex + i * ngridz * ngridy_long + j, data_complex + i * ngridz * ngridy_long + j);
}
/* now our data resides in data_complex[] */
/* do the transpose */
my_slab_transpose(data_complex, workspace_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task, plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 0);
my_slab_transpose(data_complex, workspace_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task,
plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 0);
/* now the data is in workspace_complex[] */
......@@ -554,7 +557,8 @@ void my_slab_based_fft_c2c(fft_plan * plan, void *data, void *workspace, int for
int i = n / ngridz;
int j = n % ngridz;
FFTW(execute_dft) (plan->forward_plan_xdir, workspace_complex + i * ngridz * ngridx_long + j, data_complex + i * ngridz * ngridx_long + j);
FFTW(execute_dft)
(plan->forward_plan_xdir, workspace_complex + i * ngridz * ngridx_long + j, data_complex + i * ngridz * ngridx_long + j);
}
/* now the result is in data_complex[] */
......@@ -568,10 +572,12 @@ void my_slab_based_fft_c2c(fft_plan * plan, void *data, void *workspace, int for
int i = n / ngridz;
int j = n % ngridz;
FFTW(execute_dft) (plan->backward_plan_xdir, data_complex + i * ngridz * ngridx_long + j, workspace_complex + i * ngridz * ngridx_long + j);
FFTW(execute_dft)
(plan->backward_plan_xdir, data_complex + i * ngridz * ngridx_long + j, workspace_complex + i * ngridz * ngridx_long + j);
}
my_slab_transpose(workspace_complex, data_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task, plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 1);
my_slab_transpose(workspace_complex, data_complex, plan->slabs_x_per_task, plan->first_slab_x_of_task, plan->slabs_y_per_task,
plan->first_slab_y_of_task, ngridx, ngridy, ngridz, 1);
prod = slabsx * ngridz;
......@@ -580,38 +586,36 @@ void my_slab_based_fft_c2c(fft_plan * plan, void *data, void *workspace, int for
int i = n / ngridz;
int j = n % ngridz;
FFTW(execute_dft) (plan->backward_plan_ydir, data_complex + i * ngridz * ngridy_long + j, workspace_complex + i * ngridz * ngridy_long + j);
FFTW(execute_dft)
(plan->backward_plan_ydir, data_complex + i * ngridz * ngridy_long + j, workspace_complex + i * ngridz * ngridy_long + j);
}
prod = slabsx * ngridy;
for(n = 0; n < prod; n++)
{
FFTW(execute_dft) (plan->backward_plan_zdir, workspace_complex + n * ngridz, data_start + n * ngridz);
FFTW(execute_dft)(plan->backward_plan_zdir, workspace_complex + n * ngridz, data_start + n * ngridz);
}
/* now the result is in data[] */
}
}
#else /* #ifndef FFT_COLUMN_BASED */
static void my_fft_column_remap(fft_complex *data, int Ndims[3], int in_firstcol, int in_ncol, fft_complex *out, int perm[3],
int out_firstcol, int out_ncol, size_t *offset_send, size_t *offset_recv, size_t *count_send,
size_t *count_recv, size_t just_count_flag);
static void my_fft_column_remap(fft_complex * data, int Ndims[3], int in_firstcol, int in_ncol,
fft_complex * out, int perm[3], int out_firstcol, int out_ncol,
size_t * offset_send, size_t * offset_recv, size_t * count_send, size_t * count_recv, size_t just_count_flag);
static void my_fft_column_transpose(fft_real * data, int Ndims[3], /* global dimensions of data cube */
int in_firstcol, int in_ncol, /* first column and number of columns */
fft_real * out, int perm[3],
int out_firstcol, int out_ncol, size_t * offset_send, size_t * offset_recv, size_t * count_send, size_t * count_recv, size_t just_count_flag);
static void my_fft_column_transpose_c(fft_complex * data, int Ndims[3], /* global dimensions of data cube */
int in_firstcol, int in_ncol, /* first column and number of columns */
fft_complex * out, int perm[3],
int out_firstcol, int out_ncol, size_t * offset_send, size_t * offset_recv, size_t * count_send, size_t * count_recv, size_t just_count_flag);
static void my_fft_column_transpose(fft_real *data, int Ndims[3], /* global dimensions of data cube */
int in_firstcol, int in_ncol, /* first column and number of columns */
fft_real *out, int perm[3], int out_firstcol, int out_ncol, size_t *offset_send,
size_t *offset_recv, size_t *count_send, size_t *count_recv, size_t just_count_flag);
static void my_fft_column_transpose_c(fft_complex *data, int Ndims[3], /* global dimensions of data cube */
int in_firstcol, int in_ncol, /* first column and number of columns */
fft_complex *out, int perm[3], int out_firstcol, int out_ncol, size_t *offset_send,
size_t *offset_recv, size_t *count_send, size_t *count_recv, size_t just_count_flag);
/*! \brief Initializes column based FFT.
*
......@@ -622,7 +626,7 @@ static void my_fft_column_transpose_c(fft_complex * data, int Ndims[3], /* globa
*
* \return void
*/
void my_column_based_fft_init(fft_plan * plan, int NgridX, int NgridY, int NgridZ)
void my_column_based_fft_init(fft_plan *plan, int NgridX, int NgridY, int NgridZ)
{
plan->NgridX = NgridX;
plan->NgridY = NgridY;
......@@ -635,25 +639,25 @@ void my_column_based_fft_init(fft_plan * plan, int NgridX, int NgridY, int Ngrid
int columns, avg, exc, tasklastsection, pivotcol;
columns = NgridX * NgridY;
avg = (columns - 1) / NTask + 1;
exc = NTask * avg - columns;
columns = NgridX * NgridY;
avg = (columns - 1) / NTask + 1;
exc = NTask * avg - columns;
tasklastsection = NTask - exc;
pivotcol = tasklastsection * avg;
pivotcol = tasklastsection * avg;
plan->pivotcol = pivotcol;
plan->avg = avg;
plan->pivotcol = pivotcol;
plan->avg = avg;
plan->tasklastsection = tasklastsection;
if(ThisTask < tasklastsection)
{
plan->base_firstcol = ThisTask * avg;
plan->base_ncol = avg;
plan->base_ncol = avg;
}
else
{
plan->base_firstcol = ThisTask * avg - (ThisTask - tasklastsection);
plan->base_ncol = avg - 1;
plan->base_ncol = avg - 1;
}
plan->base_lastcol = plan->base_firstcol + plan->base_ncol - 1;
......@@ -666,101 +670,101 @@ void my_column_based_fft_init(fft_plan * plan, int NgridX, int NgridY, int Ngrid
subdivide_evenly(plan->NgridY * plan->Ngrid2, NTask, ThisTask, &plan->firstcol_YZ, &plan->ncol_YZ);
plan->second_transposed_ncells = ((size_t) plan->NgridX) * plan->second_transposed_ncol;
plan->second_transposed_ncells = ((size_t)plan->NgridX) * plan->second_transposed_ncol;
plan->max_datasize = ((size_t) plan->Ngrid2) * plan->base_ncol;
plan->max_datasize = smax(plan->max_datasize, 2 * ((size_t) plan->NgridY) * plan->transposed_ncol);
plan->max_datasize = smax(plan->max_datasize, 2 * ((size_t) plan->NgridX) * plan->second_transposed_ncol);
plan->max_datasize = smax(plan->max_datasize, ((size_t) plan->ncol_XZ) * plan->NgridY);
plan->max_datasize = smax(plan->max_datasize, ((size_t) plan->ncol_YZ) * plan->NgridX);
plan->max_datasize = ((size_t)plan->Ngrid2) * plan->base_ncol;
plan->max_datasize = smax(plan->max_datasize, 2 * ((size_t)plan->NgridY) * plan->transposed_ncol);
plan->max_datasize = smax(plan->max_datasize, 2 * ((size_t)plan->NgridX) * plan->second_transposed_ncol);
plan->max_datasize = smax(plan->max_datasize, ((size_t)plan->ncol_XZ) * plan->NgridY);
plan->max_datasize = smax(plan->max_datasize, ((size_t)plan->ncol_YZ) * plan->NgridX);
plan->fftsize = plan->max_datasize;
plan->offsets_send_A = mymalloc_clear("offsets_send_A", NTask * sizeof(size_t));
plan->offsets_recv_A = mymalloc_clear("offsets_recv_A", NTask * sizeof(size_t));
plan->offsets_send_B = mymalloc_clear("offsets_send_B", NTask * sizeof(size_t));
plan->offsets_recv_B = mymalloc_clear("offsets_recv_B", NTask * sizeof(size_t));
plan->offsets_send_C = mymalloc_clear("offsets_send_C", NTask * sizeof(size_t));
plan->offsets_recv_C = mymalloc_clear("offsets_recv_C", NTask * sizeof(size_t));
plan->offsets_send_D = mymalloc_clear("offsets_send_D", NTask * sizeof(size_t));
plan->offsets_recv_D = mymalloc_clear("offsets_recv_D", NTask * sizeof(size_t));
plan->offsets_send_13 = mymalloc_clear("offsets_send_13", NTask * sizeof(size_t));
plan->offsets_recv_13 = mymalloc_clear("offsets_recv_13", NTask * sizeof(size_t));
plan->offsets_send_23 = mymalloc_clear("offsets_send_23", NTask * sizeof(size_t));
plan->offsets_recv_23 = mymalloc_clear("offsets_recv_23", NTask * sizeof(size_t));
plan->offsets_send_A = mymalloc_clear("offsets_send_A", NTask * sizeof(size_t));
plan->offsets_recv_A = mymalloc_clear("offsets_recv_A", NTask * sizeof(size_t));
plan->offsets_send_B = mymalloc_clear("offsets_send_B", NTask * sizeof(size_t));
plan->offsets_recv_B = mymalloc_clear("offsets_recv_B", NTask * sizeof(size_t));
plan->offsets_send_C = mymalloc_clear("offsets_send_C", NTask * sizeof(size_t));
plan->offsets_recv_C = mymalloc_clear("offsets_recv_C", NTask * sizeof(size_t));
plan->offsets_send_D = mymalloc_clear("offsets_send_D", NTask * sizeof(size_t));
plan->offsets_recv_D = mymalloc_clear("offsets_recv_D", NTask * sizeof(size_t));
plan->offsets_send_13 = mymalloc_clear("offsets_send_13", NTask * sizeof(size_t));
plan->offsets_recv_13 = mymalloc_clear("offsets_recv_13", NTask * sizeof(size_t));
plan->offsets_send_23 = mymalloc_clear("offsets_send_23", NTask * sizeof(size_t));
plan->offsets_recv_23 = mymalloc_clear("offsets_recv_23", NTask * sizeof(size_t));
plan->offsets_send_13back = mymalloc_clear("offsets_send_13back", NTask * sizeof(size_t));
plan->offsets_recv_13back = mymalloc_clear("offsets_recv_13back", NTask * sizeof(size_t));
plan->offsets_send_23back = mymalloc_clear("offsets_send_23back", NTask * sizeof(size_t));
plan->offsets_recv_23back = mymalloc_clear("offsets_recv_23back", NTask * sizeof(size_t));
plan->count_send_A = mymalloc_clear("count_send_A", NTask * sizeof(size_t));
plan->count_recv_A = mymalloc_clear("count_recv_A", NTask * sizeof(size_t));
plan->count_send_B = mymalloc_clear("count_send_B", NTask * sizeof(size_t));
plan->count_recv_B = mymalloc_clear("count_recv_B", NTask * sizeof(size_t));
plan->count_send_C = mymalloc_clear("count_send_C", NTask * sizeof(size_t));
plan->count_recv_C = mymalloc_clear("count_recv_C", NTask * sizeof(size_t));
plan->count_send_D = mymalloc_clear("count_send_D", NTask * sizeof(size_t));
plan->count_recv_D = mymalloc_clear("count_recv_D", NTask * sizeof(size_t));
plan->count_send_13 = mymalloc_clear("count_send_13", NTask * sizeof(size_t));
plan->count_recv_13 = mymalloc_clear("count_recv_13", NTask * sizeof(size_t));
plan->count_send_23 = mymalloc_clear("count_send_23", NTask * sizeof(size_t));
plan->count_recv_23 = mymalloc_clear("count_recv_23", NTask * sizeof(size_t));
plan->count_send_A = mymalloc_clear("count_send_A", NTask * sizeof(size_t));
plan->count_recv_A = mymalloc_clear("count_recv_A", NTask * sizeof(size_t));
plan->count_send_B = mymalloc_clear("count_send_B", NTask * sizeof(size_t));
plan->count_recv_B = mymalloc_clear("count_recv_B", NTask * sizeof(size_t));