Commit 2259b359 authored by Cristian Constantin Lalescu's avatar Cristian Constantin Lalescu
Browse files

Merge branch 'feature/basic_fftw_test' into develop

parents dfce44ff b760492a
Pipeline #94987 failed with stages
in 28 minutes and 44 seconds
......@@ -39,7 +39,7 @@
/////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////
static const int message_buffer_length = 2048;
static const int message_buffer_length = 32768;
extern int myrank, nprocs;
inline int MOD(int a, int n)
......
test_fftw:
${MPICXX} \
-DPINCHECK_FOUND \
${TURTLE_COMPILATION_FLAGS} \
-I${FFTW_ROOT}/include \
-I${PINCHECK_ROOT}/include \
-Wall \
-g \
-Wfatal-errors \
-fopenmp \
-std=gnu++11 \
test.cpp \
-o test_fft \
${FFTW_OPENMP_LIB} \
${FFTW_LIB}
/**********************************************************************
* *
* Copyright 2021 Max Planck Institute *
* for Dynamics and Self-Organization *
* *
* This file is part of TurTLE. *
* *
* TurTLE is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published *
* by the Free Software Foundation, either version 3 of the License, *
* or (at your option) any later version. *
* *
* TurTLE is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with TurTLE. If not, see <http://www.gnu.org/licenses/> *
* *
* Contact: Cristian.Lalescu@ds.mpg.de *
* *
**********************************************************************/
#ifndef MAIN_CODE_HPP
#define MAIN_CODE_HPP
#include <mpi.h>
#include <omp.h>
#include <cfenv>
#include <string>
#include <iostream>
#include <fftw3-mpi.h>
#include <string>
#include <cassert>
#include <stdarg.h>
int myrank, nprocs;
#ifdef PINCHECK_FOUND
#include <pincheck.hpp>
void print_pinning_info(void)
{
// obtain string with pinning information on rank 0,
// ranks >0 get an empty string
const std::string pinning_info = pincheck::pincheck();
if (myrank == 0)
{
std::cerr << "### pinning info begin" << std::endl;
std::cerr << pinning_info;
std::cerr << "### pinning info end" << std::endl;
std::cout << "### pinning info begin" << std::endl;
std::cout << pinning_info;
std::cout << "### pinning info end" << std::endl;
}
}
#else
#define print_pinning_info(...)
#endif
#ifndef NDEBUG
const int message_buffer_length = 32768;
static char debug_message_buffer[message_buffer_length];
inline void DEBUG_MSG(const char * format, ...)
{
va_list argptr;
va_start(argptr, format);
sprintf(
debug_message_buffer,
"MPIrank%.4d ",
myrank);
vsnprintf(
debug_message_buffer + 12,
message_buffer_length - 12,
format,
argptr);
va_end(argptr);
std::cerr << debug_message_buffer;
}
inline void DEBUG_MSG_WAIT(MPI_Comm communicator, const char * format, ...)
{
va_list argptr;
va_start(argptr, format);
sprintf(
debug_message_buffer,
"MPIrank%.4d ",
myrank);
vsnprintf(
debug_message_buffer + 12,
message_buffer_length - 12,
format,
argptr);
va_end(argptr);
std::cerr << debug_message_buffer;
MPI_Barrier(communicator);
}
#else
#define DEBUG_MSG(...)
#define DEBUG_MSG_WAIT(...)
#endif//NDEBUG
typedef int main_function_call (int argc, char *argv[], bool FPE);
int main_wrapper(
int argc,
char *argv[],
bool floating_point_exceptions,
main_function_call *code_to_execute)
{
/* floating point exception switch */
if (floating_point_exceptions)
feenableexcept(FE_INVALID | FE_OVERFLOW);
else
std::cerr << "FPE have been turned OFF" << std::endl;
/* initialize mpi with threads */
int mpiprovided;
MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &mpiprovided);
MPI_Pcontrol(0);
assert(mpiprovided >= MPI_THREAD_FUNNELED);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
print_pinning_info();
/* initialize fftw with mpi and threads */
const int nThreads = omp_get_max_threads();
DEBUG_MSG("Number of threads for the FFTW = %d\n",
nThreads);
if (nThreads > 1){
fftw_init_threads();
fftwf_init_threads();
}
fftw_mpi_init();
fftwf_mpi_init();
DEBUG_MSG("There are %d processes and %d threads\n",
nprocs,
nThreads);
if (nThreads > 1){
fftw_plan_with_nthreads(nThreads);
fftwf_plan_with_nthreads(nThreads);
}
fftwf_set_timelimit(300);
fftw_set_timelimit(300);
int code_result = code_to_execute(argc, argv, floating_point_exceptions);
std::cerr << "main code returned " << code_result << std::endl;
/* clean up */
fftwf_mpi_cleanup();
fftw_mpi_cleanup();
if (nThreads > 1){
fftw_cleanup_threads();
fftwf_cleanup_threads();
}
MPI_Finalize();
return EXIT_SUCCESS;
}
#endif//MAIN_CODE_HPP
#include "main_code.hpp"
#include <random>
/****************************/
// parameters
const int nx = 4096;
const int ny = 8;
const int nz = 4096;
const int nsteps = 100;
/****************************/
int print_plan(fftw_plan &pl)
{
char *plan_information = fftw_sprint_plan(pl);
if (myrank == 0)
DEBUG_MSG("\n\n%s\n\n", plan_information);
free(plan_information);
return EXIT_SUCCESS;
}
int test_fft(
int argc,
char *argv[],
bool FPE)
{
ptrdiff_t nfftw[3] = {nz, ny, nx};
ptrdiff_t local_n0, local_0_start;
ptrdiff_t local_n1, local_1_start;
double *data = NULL;
fftw_plan c2r_plan;
fftw_plan r2c_plan;
unsigned fftw_plan_rigor = FFTW_MEASURE;
ptrdiff_t local_size = fftw_mpi_local_size_many_transposed(
3, nfftw, 1,
FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
MPI_COMM_WORLD,
&local_n0, &local_0_start,
&local_n1, &local_1_start);
/************/
/* ALLOCATE */
/************/
data = fftw_alloc_real(local_size*2);
c2r_plan = fftw_mpi_plan_many_dft_c2r(
3, nfftw, 1,
FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
(fftw_complex*)(data),
data,
MPI_COMM_WORLD,
fftw_plan_rigor | FFTW_MPI_TRANSPOSED_IN);
assert(c2r_plan != NULL);
r2c_plan = fftw_mpi_plan_many_dft_r2c(
3, nfftw, 1,
FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
data,
(fftw_complex*)(data),
MPI_COMM_WORLD,
fftw_plan_rigor | FFTW_MPI_TRANSPOSED_OUT);
assert(r2c_plan != NULL);
DEBUG_MSG("r2c plan representation\n");
print_plan(r2c_plan);
DEBUG_MSG("c2r plan representation\n");
print_plan(c2r_plan);
// fill up data
std::random_device rd{};
std::mt19937 gen{rd()};
std::normal_distribution<> gaussian;
for (ptrdiff_t ii = 0; ii < local_size; ii++)
data[ii] = gaussian(gen);
// start mpi profiling
MPI_Pcontrol(5);
for (ptrdiff_t tt = 0; tt < nsteps; tt++)
{
fftw_execute(r2c_plan);
fftw_execute(c2r_plan);
#pragma omp parallel for schedule(static)
for(ptrdiff_t ii = 0; ii < local_size; ii++)
data[ii] /= nx*ny*nz;
}
// stop mpi profiling
MPI_Pcontrol(-5);
/************/
/* FREE */
/************/
fftw_free(data);
fftw_destroy_plan(c2r_plan);
fftw_destroy_plan(r2c_plan);
return EXIT_SUCCESS;
}
int main(int argc,
char *argv[])
{
return main_wrapper(argc, argv, true, test_fft);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment