diff --git a/README.rst b/README.rst index 5418d711b7846e2ebd2ca68270a95baf1f9e35b3..efafb817ca921ef3999b7d7d0c84bd3dee56ab58 100644 --- a/README.rst +++ b/README.rst @@ -130,7 +130,7 @@ enough). ./configure --prefix=PREFIX --enable-single --enable-sse --enable-mpi --enable-openmp --enable-threads make make install - ./configure --prefix=PREFIX --enable-sse --enable-sse2 --enable-mpi --enable-openmp --enable-threads + ./configure --prefix=PREFIX --enable-sse2 --enable-mpi --enable-openmp --enable-threads make make install diff --git a/bfps/DNS.py b/bfps/DNS.py index f360db8b8cad20578ffd5669770b92301b40a33c..fdaa0f63d37716d66b7a18a10568db45f7d7cf37 100644 --- a/bfps/DNS.py +++ b/bfps/DNS.py @@ -430,9 +430,7 @@ class DNS(_code): return None def write_par( self, - iter0 = 0, - particle_ic = None, - particles_off = False): + iter0 = 0): assert (self.parameters['niter_todo'] % self.parameters['niter_stat'] == 0) assert (self.parameters['niter_todo'] % self.parameters['niter_out'] == 0) assert (self.parameters['niter_out'] % self.parameters['niter_stat'] == 0) @@ -479,40 +477,8 @@ class DNS(_code): 4), dtype = np.int64) ofile['checkpoint'] = int(0) - if (self.dns_type in ['NSVE', 'NSVE_no_output']) or particles_off: + if (self.dns_type in ['NSVE', 'NSVE_no_output']): return None - - if type(particle_ic) == type(None): - pbase_shape = (self.parameters['nparticles'],) - number_of_particles = self.parameters['nparticles'] - else: - pbase_shape = particle_ic.shape[:-1] - assert(particle_ic.shape[-1] == 3) - number_of_particles = 1 - for val in pbase_shape[1:]: - number_of_particles *= val - ncomponents = 3 - if self.dns_type in ['NSVEcomplex_particles']: - ncomponents = 6 - with h5py.File(self.get_checkpoint_0_fname(), 'a') as ofile: - s = 0 - if not 'tracers{0}'.format(s) in ofile.keys(): - ofile.create_group('tracers{0}'.format(s)) - ofile.create_group('tracers{0}/rhs'.format(s)) - ofile.create_group('tracers{0}/state'.format(s)) - ofile['tracers{0}/rhs'.format(s)].create_dataset( - '0', - shape = ( - (self.parameters['tracers{0}_integration_steps'.format(s)],) + - pbase_shape + - (ncomponents,)), - dtype = np.float) - ofile['tracers{0}/state'.format(s)].create_dataset( - '0', - shape = ( - pbase_shape + - (ncomponents,)), - dtype = np.float) return None def job_parser_arguments( self, @@ -818,35 +784,56 @@ class DNS(_code): def generate_tracer_state( self, rseed = None, - species = 0): - with h5py.File(self.get_checkpoint_0_fname(), 'a') as data_file: - dset = data_file[ - 'tracers{0}/state/0'.format(species)] - if not type(rseed) == type(None): - np.random.seed(rseed) - nn = self.parameters['nparticles'] - cc = int(0) - batch_size = int(1e6) - def get_random_phases(npoints): - return np.random.random( - (npoints, 3))*2*np.pi - def get_random_versors(npoints): - bla = np.random.normal( - size = (npoints, 3)) - bla /= np.sum(bla**2, axis = 1)[:, None]**.5 - return bla - while nn > 0: - if nn > batch_size: - dset[cc*batch_size:(cc+1)*batch_size, :3] = get_random_phases(batch_size) - if dset.shape[1] == 6: - dset[cc*batch_size:(cc+1)*batch_size, 3:] = get_random_versors(batch_size) - nn -= batch_size - else: - dset[cc*batch_size:cc*batch_size+nn, :3] = get_random_phases(nn) - if dset.shape[1] == 6: - dset[cc*batch_size:cc*batch_size+nn, 3:] = get_random_versors(nn) - nn = 0 - cc += 1 + species = 0, + integration_steps = None, + ncomponents = 3): + try: + if type(integration_steps) == type(None): + integration_steps = self.NSVEp_extra_parameters['tracers0_integration_steps'] + if 'tracers{0}_integration_steps'.format(species) in self.parameters.keys(): + integration_steps = self.parameters['tracers{0}_integration_steps'.format(species)] + if self.dns_type == 'NSVEcomplex_particles' and species == 0: + ncomponents = 6 + with h5py.File(self.get_checkpoint_0_fname(), 'a') as data_file: + nn = self.parameters['nparticles'] + if not 'tracers{0}'.format(species) in data_file.keys(): + data_file.create_group('tracers{0}'.format(species)) + data_file.create_group('tracers{0}/rhs'.format(species)) + data_file.create_group('tracers{0}/state'.format(species)) + data_file['tracers{0}/rhs'.format(species)].create_dataset( + '0', + shape = (integration_steps, nn, ncomponents,), + dtype = np.float) + dset = data_file['tracers{0}/state'.format(species)].create_dataset( + '0', + shape = (nn, ncomponents,), + dtype = np.float) + if not type(rseed) == type(None): + np.random.seed(rseed) + cc = int(0) + batch_size = int(1e6) + def get_random_phases(npoints): + return np.random.random( + (npoints, 3))*2*np.pi + def get_random_versors(npoints): + bla = np.random.normal( + size = (npoints, 3)) + bla /= np.sum(bla**2, axis = 1)[:, None]**.5 + return bla + while nn > 0: + if nn > batch_size: + dset[cc*batch_size:(cc+1)*batch_size, :3] = get_random_phases(batch_size) + if dset.shape[1] == 6: + dset[cc*batch_size:(cc+1)*batch_size, 3:] = get_random_versors(batch_size) + nn -= batch_size + else: + dset[cc*batch_size:cc*batch_size+nn, :3] = get_random_phases(nn) + if dset.shape[1] == 6: + dset[cc*batch_size:cc*batch_size+nn, 3:] = get_random_versors(nn) + nn = 0 + cc += 1 + except Exception as e: + print(e) return None def generate_vector_field( self, @@ -995,66 +982,61 @@ class DNS(_code): particle_file.create_group('tracers0/pressure_gradient') particle_file.create_group('tracers0/pressure_Hessian') return None + def generate_initial_condition( + self, + opt = None): + # take care of fields' initial condition + # first, check if initial field exists + need_field = False + if not os.path.exists(self.get_checkpoint_0_fname()): + need_field = True + else: + f = h5py.File(self.get_checkpoint_0_fname(), 'r') + try: + dset = f['vorticity/complex/0'] + need_field = (dset.shape == (self.parameters['ny'], + self.parameters['nz'], + self.parameters['nx']//2+1, + 3)) + except: + need_field = True + f.close() + if need_field: + f = h5py.File(self.get_checkpoint_0_fname(), 'a') + if len(opt.src_simname) > 0: + source_cp = 0 + src_file = 'not_a_file' + while True: + src_file = os.path.join( + os.path.realpath(opt.src_work_dir), + opt.src_simname + '_checkpoint_{0}.h5'.format(source_cp)) + f0 = h5py.File(src_file, 'r') + if '{0}'.format(opt.src_iteration) in f0['vorticity/complex'].keys(): + f0.close() + break + source_cp += 1 + self.copy_complex_field( + src_file, + 'vorticity/complex/{0}'.format(opt.src_iteration), + f, + 'vorticity/complex/{0}'.format(0)) + else: + data = self.generate_vector_field( + write_to_file = False, + spectra_slope = 2.0, + amplitude = 0.05) + f['vorticity/complex/{0}'.format(0)] = data + f.close() + # now take care of particles' initial condition + if self.dns_type in ['NSVEparticles', 'NSVEcomplex_particles', 'NSVEparticles_no_output', 'NSVEp_extra_sampling']: + self.generate_particle_data(opt = opt) + return None def launch_jobs( self, - opt = None, - particle_initial_condition = None): - if not os.path.exists(os.path.join(self.work_dir, self.simname + '.h5')): - # take care of fields' initial condition - if not os.path.exists(self.get_checkpoint_0_fname()): - f = h5py.File(self.get_checkpoint_0_fname(), 'w') - if len(opt.src_simname) > 0: - source_cp = 0 - src_file = 'not_a_file' - while True: - src_file = os.path.join( - os.path.realpath(opt.src_work_dir), - opt.src_simname + '_checkpoint_{0}.h5'.format(source_cp)) - f0 = h5py.File(src_file, 'r') - if '{0}'.format(opt.src_iteration) in f0['vorticity/complex'].keys(): - f0.close() - break - source_cp += 1 - self.copy_complex_field( - src_file, - 'vorticity/complex/{0}'.format(opt.src_iteration), - f, - 'vorticity/complex/{0}'.format(0)) - else: - data = self.generate_vector_field( - write_to_file = False, - spectra_slope = 2.0, - amplitude = 0.05) - f['vorticity/complex/{0}'.format(0)] = data - f.close() - ## take care of particles' initial condition - #if self.dns_type in ['NSVEparticles', 'NSVEparticles_no_output']: - # if opt.pclouds > 1: - # np.random.seed(opt.particle_rand_seed) - # if opt.pcloud_type == 'random-cube': - # particle_initial_condition = ( - # np.random.random((opt.pclouds, 1, 3))*2*np.pi + - # np.random.random((1, self.parameters['nparticles'], 3))*opt.particle_cloud_size) - # elif opt.pcloud_type == 'regular-cube': - # onedarray = np.linspace( - # -opt.particle_cloud_size/2, - # opt.particle_cloud_size/2, - # self.parameters['nparticles']) - # particle_initial_condition = np.zeros( - # (opt.pclouds, - # self.parameters['nparticles'], - # self.parameters['nparticles'], - # self.parameters['nparticles'], 3), - # dtype = np.float64) - # particle_initial_condition[:] = \ - # np.random.random((opt.pclouds, 1, 1, 1, 3))*2*np.pi - # particle_initial_condition[..., 0] += onedarray[None, None, None, :] - # particle_initial_condition[..., 1] += onedarray[None, None, :, None] - # particle_initial_condition[..., 2] += onedarray[None, :, None, None] - self.write_par( - particle_ic = None) - if self.dns_type in ['NSVEparticles', 'NSVEcomplex_particles', 'NSVEparticles_no_output', 'NSVEp_extra_sampling']: - self.generate_particle_data(opt = opt) + opt = None): + if not os.path.exists(self.get_data_file_name()): + self.generate_initial_condition(opt = opt) + self.write_par() self.run( nb_processes = opt.nb_processes, nb_threads_per_process = opt.nb_threads_per_process, diff --git a/bfps/PP.py b/bfps/PP.py index 5716a7fe793c71413b823e4aad10dc6886294ef4..77bf9d6c7fe7e56e6a34954100ff512551cf130f 100644 --- a/bfps/PP.py +++ b/bfps/PP.py @@ -33,6 +33,7 @@ import h5py import math import numpy as np import warnings +import glob import bfps from ._code import _code @@ -804,21 +805,15 @@ class PP(_code): with h5py.File(os.path.join(self.work_dir, self.simname + '_fields.h5'), 'a') as ff: ff.require_group('vorticity') ff.require_group('vorticity/complex') - checkpoint = (iter0 // niter_out) // cppf - while True: - cpf_name = os.path.join( - self.work_dir, - self.simname + '_checkpoint_{0}.h5'.format(checkpoint)) - if os.path.exists(cpf_name): - cpf = h5py.File(cpf_name, 'r') - for iter_name in cpf['vorticity/complex'].keys(): - if iter_name not in ff['vorticity/complex'].keys(): - ff['vorticity/complex/' + iter_name] = h5py.ExternalLink( - cpf_name, - 'vorticity/complex/' + iter_name) - checkpoint += 1 - else: - break + checkpoint_file_list = glob.glob(self.simname + '_checkpoint_*.h5') + for cpf_name in checkpoint_file_list: + cpf = h5py.File(cpf_name, 'r') + for iter_name in cpf['vorticity/complex'].keys(): + if iter_name not in ff['vorticity/complex'].keys(): + ff['vorticity/complex/' + iter_name] = h5py.ExternalLink( + cpf_name, + 'vorticity/complex/' + iter_name) + cpf.close() return None def launch_jobs( self, diff --git a/bfps/__main__.py b/bfps/__main__.py index 16a7cf7d099c49a39368a8ff09cb05bf890feb6f..cf269edbaea91acd4b3de595782b92e32e1cbd3b 100644 --- a/bfps/__main__.py +++ b/bfps/__main__.py @@ -33,7 +33,7 @@ from .PP import PP from .TEST import TEST def main(): - parser = argparse.ArgumentParser(prog = 'bfps') + parser = argparse.ArgumentParser(prog = 'bfps', conflict_handler = 'resolve') parser.add_argument( '-v', '--version', action = 'version', diff --git a/bfps/_code.py b/bfps/_code.py index 297749f50417336486088ce34b738d7a9f055b77..f997d651ca19dc9f77fd393fb47cd89323d73f38 100644 --- a/bfps/_code.py +++ b/bfps/_code.py @@ -262,10 +262,16 @@ class _code(_base): current_dir = os.getcwd() os.chdir(self.work_dir) os.chdir(current_dir) + if not 'MPI' in self.host_info.keys(): + self.host_info['MPI'] = 'openmpi' + if self.host_info['MPI'] == 'openmpi': + mpirun_environment_set = 'x' + else: + mpirun_environment_set = 'env' command_atoms = ['mpirun', '-np', '{0}'.format(nb_processes), - '-x', + '-' + mpirun_environment_set, 'OMP_NUM_THREADS={0}'.format(nb_threads_per_process), './' + self.name, self.simname] diff --git a/bfps/cpp/fftw_tools.hpp b/bfps/cpp/fftw_tools.hpp index e32500fd734803a5884877398fc13fff22aa44c4..b41cd2a453c2c0aa34f56febb17f2a650a2a9685 100644 --- a/bfps/cpp/fftw_tools.hpp +++ b/bfps/cpp/fftw_tools.hpp @@ -26,7 +26,7 @@ #include <mpi.h> #include <fftw3-mpi.h> -#include "field_descriptor.hpp" +#include <map> #ifndef FFTW_TOOLS diff --git a/bfps/cpp/field.cpp b/bfps/cpp/field.cpp index 95a44f449c551e4de4b6e4199b8714a74ca80ee5..9113b138110fbbfdfdfd250f2936a5d7e5958fc3 100644 --- a/bfps/cpp/field.cpp +++ b/bfps/cpp/field.cpp @@ -1387,7 +1387,6 @@ void field<rnumber, be, fc>::compute_stats( // what follows gave me a headache until I found this link: // http://stackoverflow.com/questions/8256636/expected-primary-expression-error-on-template-method-using kk->template cospectrum<rnumber, fc>( - (typename fftw_interface<rnumber>::complex*)this->data, (typename fftw_interface<rnumber>::complex*)this->data, group, dset_name + "_" + dset_name, diff --git a/bfps/cpp/full_code/NSVEparticles.cpp b/bfps/cpp/full_code/NSVEparticles.cpp index 37b1b83353baa235f1b01094210e5b9d68233735..c8918996da0e9726136a0ef4b732f9dfc0bccfec 100644 --- a/bfps/cpp/full_code/NSVEparticles.cpp +++ b/bfps/cpp/full_code/NSVEparticles.cpp @@ -1,4 +1,26 @@ - +/********************************************************************** +* * +* Copyright 2019 Max Planck Institute * +* for Dynamics and Self-Organization * +* * +* This file is part of bfps. * +* * +* bfps is free software: you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published * +* by the Free Software Foundation, either version 3 of the License, * +* or (at your option) any later version. * +* * +* bfps is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with bfps. If not, see <http://www.gnu.org/licenses/> * +* * +* Contact: Cristian.Lalescu@ds.mpg.de * +* * +**********************************************************************/ @@ -39,6 +61,7 @@ int NSVEparticles<rnumber>::initialize(void) "tracers0", nparticles, tracers0_integration_steps); + this->particles_output_writer_mpi->setParticleFileLayout(this->ps->getParticleFileLayout()); this->particles_sample_writer_mpi = new particles_output_sampling_hdf5< long long int, double, 3>( MPI_COMM_WORLD, @@ -46,6 +69,7 @@ int NSVEparticles<rnumber>::initialize(void) (this->simname + "_particles.h5"), "tracers0", "position/0"); + this->particles_sample_writer_mpi->setParticleFileLayout(this->ps->getParticleFileLayout()); return EXIT_SUCCESS; } diff --git a/bfps/cpp/kspace.cpp b/bfps/cpp/kspace.cpp index c480d1b0f62d9da881216cefc3cbae7e2e61b089..04d5dcd8148334adcb9040f4aaa17bb979febfb6 100644 --- a/bfps/cpp/kspace.cpp +++ b/bfps/cpp/kspace.cpp @@ -525,7 +525,7 @@ void kspace<be, dt>::cospectrum( const std::string dset_name, const hsize_t toffset) { - TIMEZONE("field::cospectrum"); + TIMEZONE("field::cospectrum2"); shared_array<double> spec_local_thread(this->nshells*ncomp(fc)*ncomp(fc),[&](double* spec_local){ std::fill_n(spec_local, this->nshells*ncomp(fc)*ncomp(fc), 0); }); @@ -593,6 +593,84 @@ void kspace<be, dt>::cospectrum( } } +template <field_backend be, + kspace_dealias_type dt> +template <typename rnumber, + field_components fc> +void kspace<be, dt>::cospectrum( + const rnumber(* __restrict a)[2], + const hid_t group, + const std::string dset_name, + const hsize_t toffset) +{ + TIMEZONE("field::cospectrum1"); + shared_array<double> spec_local_thread(this->nshells*ncomp(fc)*ncomp(fc),[&](double* spec_local){ + std::fill_n(spec_local, this->nshells*ncomp(fc)*ncomp(fc), 0); + }); + + this->CLOOP_K2_NXMODES( + [&](ptrdiff_t cindex, + ptrdiff_t xindex, + ptrdiff_t yindex, + ptrdiff_t zindex, + double k2, + int nxmodes){ + if (k2 <= this->kM2) + { + double* spec_local = spec_local_thread.getMine(); + int tmp_int = int(sqrt(k2) / this->dk)*ncomp(fc)*ncomp(fc); + for (hsize_t i=0; i<ncomp(fc); i++) + for (hsize_t j=0; j<ncomp(fc); j++){ + spec_local[tmp_int + i*ncomp(fc)+j] += nxmodes * ( + (a[ncomp(fc)*cindex + i][0] * a[ncomp(fc)*cindex + j][0]) + + (a[ncomp(fc)*cindex + i][1] * a[ncomp(fc)*cindex + j][1])); + } + } + }); + + spec_local_thread.mergeParallel(); + + std::vector<double> spec; + spec.resize(this->nshells*ncomp(fc)*ncomp(fc), 0); + MPI_Allreduce( + spec_local_thread.getMasterData(), + &spec.front(), + spec.size(), + MPI_DOUBLE, MPI_SUM, this->layout->comm); + if (this->layout->myrank == 0) + { + hid_t dset, wspace, mspace; + hsize_t count[(ndim(fc)-2)*2], offset[(ndim(fc)-2)*2], dims[(ndim(fc)-2)*2]; + dset = H5Dopen(group, ("spectra/" + dset_name).c_str(), H5P_DEFAULT); + wspace = H5Dget_space(dset); + H5Sget_simple_extent_dims(wspace, dims, NULL); + switch (fc) + { + case THREExTHREE: + offset[4] = 0; + offset[5] = 0; + count[4] = 3; + count[5] = 3; + case THREE: + offset[2] = 0; + offset[3] = 0; + count[2] = 3; + count[3] = 3; + default: + offset[0] = toffset; + offset[1] = 0; + count[0] = 1; + count[1] = this->nshells; + } + mspace = H5Screate_simple((ndim(fc)-2)*2, count, NULL); + H5Sselect_hyperslab(wspace, H5S_SELECT_SET, offset, NULL, count, NULL); + H5Dwrite(dset, H5T_NATIVE_DOUBLE, mspace, wspace, H5P_DEFAULT, &spec.front()); + H5Sclose(wspace); + H5Sclose(mspace); + H5Dclose(dset); + } +} + template <field_backend be, kspace_dealias_type dt> template <typename rnumber, @@ -837,6 +915,68 @@ template void kspace<FFTW, SMOOTH>::cospectrum<double, THREExTHREE>( const std::string dset_name, const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<float, ONE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<float, THREE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<float, THREExTHREE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<double, ONE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<double, THREE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, TWO_THIRDS>::cospectrum<double, THREExTHREE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); + +template void kspace<FFTW, SMOOTH>::cospectrum<float, ONE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<float, THREE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<float, THREExTHREE>( + const typename fftw_interface<float>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<double, ONE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<double, THREE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); +template void kspace<FFTW, SMOOTH>::cospectrum<double, THREExTHREE>( + const typename fftw_interface<double>::complex *__restrict__ a, + const hid_t group, + const std::string dset_name, + const hsize_t toffset); + template double kspace<FFTW, TWO_THIRDS>::L2norm<float, ONE>( const typename fftw_interface<float>::complex *__restrict__ a); template double kspace<FFTW, TWO_THIRDS>::L2norm<float, THREE>( diff --git a/bfps/cpp/kspace.hpp b/bfps/cpp/kspace.hpp index c0bf2583be98149aa471731dff41218cf08ad705..94582da88efb1d3b4facecf8e3e4ac144710e426 100644 --- a/bfps/cpp/kspace.hpp +++ b/bfps/cpp/kspace.hpp @@ -114,6 +114,14 @@ class kspace const std::string dset_name, const hsize_t toffset); + template <typename rnumber, + field_components fc> + void cospectrum( + const rnumber(* __restrict__ a)[2], + const hid_t group, + const std::string dset_name, + const hsize_t toffset); + template <typename rnumber, field_components fc> double L2norm( diff --git a/bfps/cpp/particles/abstract_particles_system.hpp b/bfps/cpp/particles/abstract_particles_system.hpp index ee864b8cf11301f39ebfe447d2cd7155928c102e..67c46855a59f576c186216fa613c7f88523261df 100644 --- a/bfps/cpp/particles/abstract_particles_system.hpp +++ b/bfps/cpp/particles/abstract_particles_system.hpp @@ -80,7 +80,7 @@ public: void completeLoopWithExtraField( const real_number dt, const field<rnumber, be, fc>& in_field) { - static_assert(fc == THREE || THREExTHREE, "only THREE or THREExTHREE is supported for now"); + static_assert((fc == THREE) || (fc == THREExTHREE), "only THREE or THREExTHREE is supported for now"); if (fc == THREE) { std::unique_ptr<real_number[]> extra_rhs(new real_number[getLocalNbParticles()*3]()); @@ -96,6 +96,9 @@ public: completeLoopWithVelocityGradient(dt, extra_rhs.get()); } } + + virtual int setParticleFileLayout(std::vector<hsize_t>) = 0; + virtual std::vector<hsize_t> getParticleFileLayout() = 0; }; #endif diff --git a/bfps/cpp/particles/particles_input_hdf5.hpp b/bfps/cpp/particles/particles_input_hdf5.hpp index 5231872d2f22dce1e8422cac05a0f7b6b72f9d10..e10377bff97812ebee186ee721aeeb32e91d8fec 100644 --- a/bfps/cpp/particles/particles_input_hdf5.hpp +++ b/bfps/cpp/particles/particles_input_hdf5.hpp @@ -22,16 +22,19 @@ class particles_input_hdf5 : public abstract_particles_input<partsize_t, real_nu int my_rank; int nb_processes; - hsize_t nb_total_particles; + hsize_t total_number_of_particles; hsize_t nb_rhs; partsize_t nb_particles_for_me; + std::vector<hsize_t> particle_file_layout; // to hold the shape of initial condition array std::unique_ptr<real_number[]> my_particles_positions; std::unique_ptr<partsize_t[]> my_particles_indexes; std::vector<std::unique_ptr<real_number[]>> my_particles_rhs; - static std::vector<real_number> BuildLimitsAllProcesses(MPI_Comm mpi_comm, - const real_number my_spatial_low_limit, const real_number my_spatial_up_limit){ + static std::vector<real_number> BuildLimitsAllProcesses( + MPI_Comm mpi_comm, + const real_number my_spatial_low_limit, + const real_number my_spatial_up_limit){ int my_rank; int nb_processes; @@ -41,8 +44,15 @@ class particles_input_hdf5 : public abstract_particles_input<partsize_t, real_nu std::vector<real_number> spatial_limit_per_proc(nb_processes*2); real_number intervalToSend[2] = {my_spatial_low_limit, my_spatial_up_limit}; - AssertMpi(MPI_Allgather(intervalToSend, 2, particles_utils::GetMpiType(real_number()), - spatial_limit_per_proc.data(), 2, particles_utils::GetMpiType(real_number()), mpi_comm)); + AssertMpi( + MPI_Allgather( + intervalToSend, + 2, + particles_utils::GetMpiType(real_number()), + spatial_limit_per_proc.data(), + 2, + particles_utils::GetMpiType(real_number()), + mpi_comm)); for(int idx_proc = 0; idx_proc < nb_processes-1 ; ++idx_proc){ assert(spatial_limit_per_proc[idx_proc*2] <= spatial_limit_per_proc[idx_proc*2+1]); @@ -56,18 +66,35 @@ class particles_input_hdf5 : public abstract_particles_input<partsize_t, real_nu } public: - particles_input_hdf5(const MPI_Comm in_mpi_comm,const std::string& inFilename, - const std::string& inDatanameState, const std::string& inDatanameRhs, - const real_number my_spatial_low_limit, const real_number my_spatial_up_limit) - : particles_input_hdf5(in_mpi_comm, inFilename, inDatanameState, inDatanameRhs, - BuildLimitsAllProcesses(in_mpi_comm, my_spatial_low_limit, my_spatial_up_limit)){ + particles_input_hdf5( + const MPI_Comm in_mpi_comm, + const std::string& inFilename, + const std::string& inDatanameState, + const std::string& inDatanameRhs, + const real_number my_spatial_low_limit, + const real_number my_spatial_up_limit) + : particles_input_hdf5( + in_mpi_comm, + inFilename, + inDatanameState, + inDatanameRhs, + BuildLimitsAllProcesses( + in_mpi_comm, + my_spatial_low_limit, + my_spatial_up_limit)){ } - particles_input_hdf5(const MPI_Comm in_mpi_comm,const std::string& inFilename, - const std::string& inDatanameState, const std::string& inDatanameRhs, - const std::vector<real_number>& in_spatial_limit_per_proc) + particles_input_hdf5( + const MPI_Comm in_mpi_comm, + const std::string& inFilename, + const std::string& inDatanameState, + const std::string& inDatanameRhs, + const std::vector<real_number>& in_spatial_limit_per_proc) : filename(inFilename), - mpi_comm(in_mpi_comm), my_rank(-1), nb_processes(-1), nb_total_particles(0), + mpi_comm(in_mpi_comm), + my_rank(-1), + nb_processes(-1), + total_number_of_particles(0), nb_particles_for_me(0){ TIMEZONE("particles_input_hdf5"); @@ -104,9 +131,12 @@ public: // Last value is the position dim of the particles assert(state_dim_array.back() == size_particle_positions); - nb_total_particles = 1; + // compute total number of particles, store initial condition array shape + total_number_of_particles = 1; + particle_file_layout.resize(state_dim_array.size()-1); for (size_t idx_dim = 0; idx_dim < state_dim_array.size()-1; ++idx_dim){ - nb_total_particles *= state_dim_array[idx_dim]; + total_number_of_particles *= state_dim_array[idx_dim]; + particle_file_layout[idx_dim] = state_dim_array[idx_dim]; } hdfret = H5Sclose(dspace); @@ -141,7 +171,7 @@ public: assert(hdfret >= 0); } - particles_utils::IntervalSplitter<hsize_t> load_splitter(nb_total_particles, nb_processes, my_rank); + particles_utils::IntervalSplitter<hsize_t> load_splitter(total_number_of_particles, nb_processes, my_rank); static_assert(std::is_same<real_number, double>::value || std::is_same<real_number, float>::value, "real_number must be double or float"); @@ -158,7 +188,8 @@ public: hid_t dset = H5Dopen(particle_file, inDatanameState.c_str(), H5P_DEFAULT); assert(dset >= 0); - hid_t rspace = H5Dget_space(dset); + hsize_t file_space_dims[2] = {total_number_of_particles, size_particle_positions}; + hid_t rspace = H5Screate_simple(2, file_space_dims, NULL); assert(rspace >= 0); hsize_t offset[2] = {load_splitter.getMyOffset(), 0}; @@ -184,11 +215,11 @@ public: TIMEZONE("rhs-read"); hid_t dset = H5Dopen(particle_file, inDatanameRhs.c_str(), H5P_DEFAULT); assert(dset >= 0); + hsize_t file_space_dims[3] = {nb_rhs, total_number_of_particles, size_particle_rhs}; + hid_t rspace = H5Screate_simple(3, file_space_dims, NULL); + assert(rspace >= 0); for(hsize_t idx_rhs = 0 ; idx_rhs < nb_rhs ; ++idx_rhs){ - hid_t rspace = H5Dget_space(dset); - assert(rspace >= 0); - if(load_splitter.getMySize()){ split_particles_rhs[idx_rhs].reset(new real_number[load_splitter.getMySize()*size_particle_rhs]); } @@ -208,11 +239,11 @@ public: rethdf = H5Sclose(mspace); assert(rethdf >= 0); - - rethdf = H5Sclose(rspace); - assert(rethdf >= 0); } - int rethdf = H5Dclose(dset); + + int rethdf = H5Sclose(rspace); + assert(rethdf >= 0); + rethdf = H5Dclose(dset); variable_used_only_in_assert(rethdf); assert(rethdf >= 0); } @@ -302,7 +333,7 @@ public: } partsize_t getTotalNbParticles() final{ - return partsize_t(nb_total_particles); + return partsize_t(total_number_of_particles); } partsize_t getLocalNbParticles() final{ @@ -327,6 +358,10 @@ public: assert(my_particles_indexes != nullptr || nb_particles_for_me == 0); return std::move(my_particles_indexes); } + + std::vector<hsize_t> getParticleFileLayout(){ + return std::vector<hsize_t>(this->particle_file_layout); + } }; #endif diff --git a/bfps/cpp/particles/particles_output_hdf5.hpp b/bfps/cpp/particles/particles_output_hdf5.hpp index 0098ba542812d68e462fadf19c94d600f4637af3..d7c987eea444d82d5c1180cd0124601c191cbf84 100644 --- a/bfps/cpp/particles/particles_output_hdf5.hpp +++ b/bfps/cpp/particles/particles_output_hdf5.hpp @@ -22,6 +22,7 @@ class particles_output_hdf5 : public abstract_particles_output<partsize_t, hid_t file_id; const partsize_t total_nb_particles; + std::vector<hsize_t> particle_file_layout; // to hold the shape of initial condition array hid_t dset_id_state; hid_t dset_id_rhs; @@ -204,12 +205,9 @@ public: } { - assert(total_nb_particles >= 0); - assert(size_particle_positions >= 0); - const hsize_t datacount[2] = { - hsize_t(total_nb_particles), - hsize_t(size_particle_positions)}; - hid_t dataspace = H5Screate_simple(2, datacount, NULL); + std::vector<hsize_t> datacount = std::vector<hsize_t>(this->particle_file_layout); + datacount.push_back(size_particle_positions); + hid_t dataspace = H5Screate_simple(datacount.size(), &datacount.front(), NULL); assert(dataspace >= 0); hid_t dataset_id = H5Dcreate( dset_id_state, @@ -228,7 +226,12 @@ public: hid_t memspace = H5Screate_simple(2, count, NULL); assert(memspace >= 0); - hid_t filespace = H5Dget_space(dataset_id); + assert(total_nb_particles >= 0); + assert(size_particle_positions >= 0); + const hsize_t file_count[2] = {hsize_t(total_nb_particles), size_particle_positions}; + hid_t filespace = H5Screate_simple(2, file_count, NULL); + assert(filespace >= 0); + int rethdf = H5Sselect_hyperslab( filespace, H5S_SELECT_SET, @@ -257,10 +260,10 @@ public: } { assert(size_particle_rhs >= 0); - const hsize_t datacount[3] = {hsize_t(Parent::getNbRhs()), - hsize_t(total_nb_particles), - hsize_t(size_particle_rhs)}; - hid_t dataspace = H5Screate_simple(3, datacount, NULL); + std::vector<hsize_t> datacount = std::vector<hsize_t>(this->particle_file_layout); + datacount.insert(datacount.begin(), hsize_t(Parent::getNbRhs())); + datacount.push_back(size_particle_positions); + hid_t dataspace = H5Screate_simple(datacount.size(), &datacount.front(), NULL); assert(dataspace >= 0); hid_t dataset_id = H5Dcreate( dset_id_rhs, @@ -285,8 +288,12 @@ public: hid_t memspace = H5Screate_simple(3, count, NULL); assert(memspace >= 0); - hid_t filespace = H5Dget_space(dataset_id); + assert(total_nb_particles >= 0); + assert(size_particle_positions >= 0); + const hsize_t file_count[3] = {hsize_t(Parent::getNbRhs()), hsize_t(total_nb_particles), size_particle_positions}; + hid_t filespace = H5Screate_simple(3, file_count, NULL); assert(filespace >= 0); + int rethdf = H5Sselect_hyperslab( filespace, H5S_SELECT_SET, @@ -322,6 +329,17 @@ public: assert(rethdf >= 0); } } + + int setParticleFileLayout(std::vector<hsize_t> input_layout){ + this->particle_file_layout.resize(input_layout.size()); + for (unsigned int i=0; i<this->particle_file_layout.size(); i++) + this->particle_file_layout[i] = input_layout[i]; + return EXIT_SUCCESS; + } + + std::vector<hsize_t> getParticleFileLayout(void){ + return std::vector<hsize_t>(this->particle_file_layout); + } }; #endif//PARTICLES_OUTPUT_HDF5_HPP diff --git a/bfps/cpp/particles/particles_output_sampling_hdf5.hpp b/bfps/cpp/particles/particles_output_sampling_hdf5.hpp index 22dafaedcd7095bd5214864f224003ea8d8be602..ff3782b58716481303368a946cf30b5219e74345 100644 --- a/bfps/cpp/particles/particles_output_sampling_hdf5.hpp +++ b/bfps/cpp/particles/particles_output_sampling_hdf5.hpp @@ -19,6 +19,7 @@ class particles_output_sampling_hdf5 : public abstract_particles_output< hid_t file_id, pgroup_id; std::string dataset_name; + std::vector<hsize_t> particle_file_layout; // to hold the shape of initial condition array const bool use_collective_io; public: @@ -191,9 +192,9 @@ public: } { assert(size_particle_rhs >= 0); - const hsize_t datacount[2] = {hsize_t(Parent::getTotalNbParticles()), - hsize_t(size_particle_rhs)}; - hid_t dataspace = H5Screate_simple(2, datacount, NULL); + std::vector<hsize_t> datacount = std::vector<hsize_t>(this->particle_file_layout); + datacount.push_back(size_particle_positions); + hid_t dataspace = H5Screate_simple(datacount.size(), &datacount.front(), NULL); assert(dataspace >= 0); hid_t dataset_id = H5Dcreate( pgroup_id, @@ -215,7 +216,8 @@ public: hid_t memspace = H5Screate_simple(2, count, NULL); assert(memspace >= 0); - hid_t filespace = H5Dget_space(dataset_id); + const hsize_t file_count[2] = {hsize_t(Parent::getTotalNbParticles()), hsize_t(size_particle_rhs)}; + hid_t filespace = H5Screate_simple(2, file_count, NULL); assert(filespace >= 0); int rethdf = H5Sselect_hyperslab( filespace, @@ -250,6 +252,17 @@ public: assert(rethdf >= 0); } } + + int setParticleFileLayout(std::vector<hsize_t> input_layout){ + this->particle_file_layout.resize(input_layout.size()); + for (unsigned int i=0; i<this->particle_file_layout.size(); i++) + this->particle_file_layout[i] = input_layout[i]; + return EXIT_SUCCESS; + } + + std::vector<hsize_t> getParticleFileLayout(void){ + return std::vector<hsize_t>(this->particle_file_layout); + } }; #endif diff --git a/bfps/cpp/particles/particles_system.hpp b/bfps/cpp/particles/particles_system.hpp index ebc7e79a67f5349c0b8c711ed9ae229a1cddc3a4..db651904b7f3e752f73846c494a8b9f6ee41b988 100644 --- a/bfps/cpp/particles/particles_system.hpp +++ b/bfps/cpp/particles/particles_system.hpp @@ -48,6 +48,7 @@ class particles_system : public abstract_particles_system<partsize_t, real_numbe partsize_t my_nb_particles; const partsize_t total_nb_particles; std::vector<std::unique_ptr<real_number[]>> my_particles_rhs; + std::vector<hsize_t> particle_file_layout; int step_idx; @@ -351,6 +352,17 @@ public: return int(my_particles_rhs.size()); } + int setParticleFileLayout(std::vector<hsize_t> input_layout) final{ + this->particle_file_layout.resize(input_layout.size()); + for (unsigned int i=0; i<this->particle_file_layout.size(); i++) + this->particle_file_layout[i] = input_layout[i]; + return EXIT_SUCCESS; + } + + std::vector<hsize_t> getParticleFileLayout(void) final{ + return std::vector<hsize_t>(this->particle_file_layout); + } + void checkNan() const { // TODO remove for(partsize_t idx_part = 0 ; idx_part < my_nb_particles ; ++idx_part){ // TODO remove me assert(std::isnan(my_particles_positions[idx_part*size_particle_positions+IDXC_X]) == false); diff --git a/bfps/cpp/particles/particles_system_builder.hpp b/bfps/cpp/particles/particles_system_builder.hpp index a9a140ac9921905513c1adcf558e071093b5afbb..916ab4bfe0f72f53a9a9b5c356d4255c78877096 100644 --- a/bfps/cpp/particles/particles_system_builder.hpp +++ b/bfps/cpp/particles/particles_system_builder.hpp @@ -243,6 +243,9 @@ struct particles_system_build_container { assert(part_sys->getNbRhs() == nsteps); + // store particle file layout + part_sys->setParticleFileLayout(generator.getParticleFileLayout()); + // Return the created particles system return std::unique_ptr<abstract_particles_system<partsize_t, particles_rnumber>>(part_sys); } diff --git a/bfps/test/test_bfps_NSVEparticles.py b/bfps/test/test_bfps_NSVEparticles.py index f914ad7dbfa7f23466c19cfba64f730b9e4cda45..fe1e7875a651b17dd9180f3cbe6d6bfe1f1b5c27 100644 --- a/bfps/test/test_bfps_NSVEparticles.py +++ b/bfps/test/test_bfps_NSVEparticles.py @@ -1,4 +1,29 @@ #! /usr/bin/env python +####################################################################### +# # +# Copyright 2019 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + import os import numpy as np diff --git a/bfps/test/test_particle_clouds.py b/bfps/test/test_particle_clouds.py new file mode 100644 index 0000000000000000000000000000000000000000..5d2045390f51e7f529f78a3eb7037acb3fcae3b9 --- /dev/null +++ b/bfps/test/test_particle_clouds.py @@ -0,0 +1,93 @@ +#! /usr/bin/env python +####################################################################### +# # +# Copyright 2019 Max Planck Institute # +# for Dynamics and Self-Organization # +# # +# This file is part of bfps. # +# # +# bfps is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published # +# by the Free Software Foundation, either version 3 of the License, # +# or (at your option) any later version. # +# # +# bfps is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with bfps. If not, see <http://www.gnu.org/licenses/> # +# # +# Contact: Cristian.Lalescu@ds.mpg.de # +# # +####################################################################### + + + +import os +import numpy as np +import h5py +import sys + +import bfps +from bfps import DNS + + +def main(): + nclouds = 10 + nparticles_per_cloud = 1000 + nparticles = nclouds*nparticles_per_cloud + niterations = 32 + c = DNS() + c.dns_type = 'NSVEparticles' + c.parameters['nparticles'] = nparticles + c.parameters['tracers1_integration_steps'] = 4 + c.generate_tracer_state(rseed = 2, species = 1) + del c.parameters['nparticles'] + del c.parameters['tracers1_integration_steps'] + ic_file = h5py.File(c.get_checkpoint_0_fname(), 'a') + ic_file['tracers0/state/0'] = ic_file['tracers1/state/0'].value.reshape(nclouds, nparticles_per_cloud, 3) + ic_file['tracers0/rhs/0'] = ic_file['tracers1/rhs/0'].value.reshape(4, nclouds, nparticles_per_cloud, 3) + ic_file.close() + c.launch( + ['NSVEparticles', + '-n', '32', + '--src-simname', 'B32p1e4', + '--forcing_type', 'linear', + '--src-wd', bfps.lib_dir + '/test', + '--src-iteration', '0', + '--np', '4', + '--ntpp', '1', + '--fftw_plan_rigor', 'FFTW_PATIENT', + '--niter_todo', '{0}'.format(niterations), + '--niter_out', '{0}'.format(niterations), + '--niter_stat', '1', + '--nparticles', '{0}'.format(nparticles), + '--njobs', '2', + '--wd', './']) + f0 = h5py.File( + os.path.join( + os.path.join(bfps.lib_dir, 'test'), + 'B32p1e4_checkpoint_0.h5'), + 'r') + f1 = h5py.File(c.get_checkpoint_0_fname(), 'r') + for iteration in [0, 32, 64]: + field0 = f0['vorticity/complex/{0}'.format(iteration)].value + field1 = f1['vorticity/complex/{0}'.format(iteration)].value + field_error = np.max(np.abs(field0 - field1)) + x0 = f0['tracers0/state/{0}'.format(iteration)].value + x1 = f1['tracers0/state/{0}'.format(iteration)].value.reshape(x0.shape) + traj_error = np.max(np.abs(x0 - x1)) + y0 = f0['tracers0/rhs/{0}'.format(iteration)].value + y1 = f1['tracers0/rhs/{0}'.format(iteration)].value.reshape(y0.shape) + rhs_error = np.max(np.abs(y0 - y1)) + assert(field_error < 1e-5) + assert(traj_error < 1e-5) + assert(rhs_error < 1e-5) + print('SUCCESS! Basic test passed.') + return None + +if __name__ == '__main__': + main() + diff --git a/documentation/_static/overview.rst b/documentation/_static/overview.rst index afe7a753666e6ea5911ce1266d0803aa25ea5c45..58af5653cab860961c71d057d92a21c9b99e6ddc 100644 --- a/documentation/_static/overview.rst +++ b/documentation/_static/overview.rst @@ -184,16 +184,17 @@ available, called ``bfps``, that you can execute. Just executing it will run a small test DNS on a real space grid of size :math:`32 \times 32 \times 32`, in the current folder, with the simulation name ``test``. -So, open a console, and type ``bfps NavierStokes``: +So, open a console, and type ``bfps DNS NSVE``: .. code:: bash # depending on how curious you are, you may have a look at the # options first: bfps --help - bfps NavierStokes --help + bfps DNS --help + bfps DNS NSVE --help # or you may just run it: - bfps NavierStokes + bfps DNS NSVE The simulation itself should not take more than a few seconds, since this is just a :math:`32^3` simulation run for 8 iterations. @@ -205,9 +206,9 @@ the following: .. code:: python import numpy as np - from bfps import NavierStokes + from bfps import DNS - c = NavierStokes( + c = DNS( work_dir = '/location/of/simulation/data', simname = 'simulation_name_goes_here') c.compute_statistics() @@ -223,7 +224,7 @@ the following: data_file['iteration'].value*c.parameters['dt'] / c.statistics['Tint'], data_file['iteration'].value*c.parameters['dt'] / c.statistics['tauK'])) -:func:`compute_statistics <bfps.NavierStokes.NavierStokes.compute_statistics>` +:func:`compute_statistics <bfps.DNS.DNS.compute_statistics>` will read the data file generated by the DNS, compute a bunch of basic statistics, for example the Taylor scale Reynolds number :math:`R_\lambda` that we're @@ -233,7 +234,7 @@ What happens is that the DNS will have generated an ``HDF5`` file containing a bunch of specific datasets (spectra, moments of real space representations, etc). The function -:func:`compute_statistics <bfps.NavierStokes.NavierStokes.compute_statistics>` +:func:`compute_statistics <bfps.DNS.DNS.compute_statistics>` performs simple postprocessing that may however be expensive, therefore it also saves some data into a ``<simname>_postprocess.h5`` file, and then it also performs some time averages, yielding the ``statistics`` @@ -242,6 +243,8 @@ dictionary that is used in the above code. Behind the scenes ----------------- +TODO FIXME obsolete documentation + In brief the following takes place: 1. An instance ``c`` of diff --git a/tests/misc/makefile b/tests/misc/makefile index c8f5f78812b5c55c0c34a220b4963a8e8d1c4f45..d44b9f04a10bdcb46fcf88ec9c858832c3c6e4df 100644 --- a/tests/misc/makefile +++ b/tests/misc/makefile @@ -1,5 +1,6 @@ test_fftw: test_fftw.c mpicc \ + -DFFTW_PLAN_RIGOR=FFTW_ESTIMATE \ -I/stuff/ext_installs/include \ -fopenmp \ test_fftw.c \ diff --git a/tests/misc/test_fftw.c b/tests/misc/test_fftw.c index 6da0099cca5de5c2a56e8b1afb9fca79945b31e4..af9fef7b6564bdc4b5b3db0908cda43ec3dd9945 100644 --- a/tests/misc/test_fftw.c +++ b/tests/misc/test_fftw.c @@ -5,6 +5,12 @@ #include <assert.h> #include <math.h> +#ifndef FFTW_PLAN_RIGOR + +#define FFTW_PLAN_RIGOR FFTW_ESTIMATE + +#endif + //#define NO_FFTWOMP #define NX 36 @@ -104,7 +110,7 @@ int main( cdata, data, MPI_COMM_WORLD, - FFTW_ESTIMATE | FFTW_MPI_TRANSPOSED_IN); + FFTW_PLAN_RIGOR | FFTW_MPI_TRANSPOSED_IN); r2c_plan = fftwf_mpi_plan_many_dft_r2c( 3, nfftw, 3, @@ -112,7 +118,7 @@ int main( data, cdata, MPI_COMM_WORLD, - FFTW_ESTIMATE | FFTW_MPI_TRANSPOSED_OUT); + FFTW_PLAN_RIGOR | FFTW_MPI_TRANSPOSED_OUT); kx = (double*)malloc(sizeof(double)*(nx/2+1)); ky = (double*)malloc(sizeof(double)*local_n1); @@ -124,8 +130,7 @@ int main( if (jy + local_1_start <= ny/2) ky[jy] = dky*(jy + local_1_start); else - ky[jy] = dky*((jy + local_1_start) - ny); - } + ky[jy] = dky*((jy + local_1_start) - ny); } for (jz = 0; jz < nz; jz++) { if (jz <= nz/2) @@ -305,6 +310,7 @@ int main( //L2norm1 = sqrt(L2norm1 / (nx*ny*nz)); //L2norm2 = sqrt(L2norm2 / (nx*ny*nz)); + printf("FFTW_PLAN_RIGOR=%d\n", FFTW_PLAN_RIGOR); printf("L2normk = %g, L2norm1 = %g, relative error = %g\n", L2normk, L2norm1, fabs(L2normk - L2norm1) / (L2normk));